In [30]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [31]:
# Assuming you have a dataset with columns 'date', 'price', and 'temperature'
# Load your dataset here (replace 'your_dataset.csv' with your actual file)
# Example data loading:
# df = pd.read_csv('your_dataset.csv')

# Due to data ownership issue, here just create a dummy dataset

# Create a sample dataset for illustration purposes
np.random.seed(42)
n = 1000
date_rng = pd.date_range(start='2022-01-01', end='2024-09-27', freq='D')
df = pd.DataFrame({
    'date': date_rng[:n],
    'price': np.random.rand(n) * 100,
    'temperature': np.random.rand(n) * 30 + 20
})

# Set the 'date' column as the index
df.set_index('date', inplace=True)

df.head()

Unnamed: 0_level_0,price,temperature
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-01,37.454012,25.553988
2022-01-02,95.071431,36.257028
2022-01-03,73.199394,46.188375
2022-01-04,59.865848,41.966747
2022-01-05,15.601864,44.196834


In [32]:
# Feature engineering: Create sequences for input data
def create_sequences(data, sequence_length):
    sequences = []
    target = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length]
        target_val = data.iloc[i+sequence_length]['price']  # Adjust this line if your target column has a different name
        sequences.append(seq.values)
        target.append(target_val)
    return np.array(sequences), np.array(target)

# Set sequence length (number of past days to consider)
sequence_length = 10

# Create sequences and target
sequences, target = create_sequences(df, sequence_length)

In [33]:
# Normalize data
scaler = MinMaxScaler()
sequences_scaled = scaler.fit_transform(sequences.reshape(-1, sequences.shape[-1])).reshape(sequences.shape)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sequences_scaled, target, test_size=0.2, random_state=42)

In [46]:
# Build the RNN model
# Input shape is 3D (DNN: 2D)
model = Sequential()
model.add(SimpleRNN(units=250, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(SimpleRNN(units=150, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))  # Output layer

model.compile(optimizer='adam', loss='mean_squared_error')



In [47]:
# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2ecb51390>

In [48]:
# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Set: {loss}')


Mean Squared Error on Test Set: 912.4657592773438


In [49]:
# Make predictions
predictions = model.predict(X_test)



In [50]:
predictions.shape

(198, 1)

In [51]:
import plotly.express as px
import plotly.graph_objects as go

In [52]:
predictions.shape

(198, 1)

In [53]:
result_df = pd.DataFrame({'y_test':y_test, 'prediction':predictions.flatten()})
result_df.head()

Unnamed: 0,y_test,prediction
0,36.77158,34.163586
1,47.347177,43.109112
2,41.824304,42.622513
3,25.02429,52.616413
4,2.541913,46.683868


In [54]:
fig = px.scatter(result_df, x='y_test', y='prediction')
fig.add_trace(go.Scatter(x=result_df['y_test'], y=result_df['y_test'], mode="lines"))
fig.show()