In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import  MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import TimeDistributed, Conv1D, MaxPooling1D, Flatten, LSTM, Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

In [None]:
from tensorflow.keras.layers import LSTM, Conv2D, MaxPooling2D, Flatten, Dense


In [None]:
# Load the data
data = pd.read_csv('/content/merged_file_data.csv', parse_dates=[0], dayfirst=True)

In [None]:
# Normalize the features to the range [0, 1]
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Assuming the first column is the date

In [None]:
# Convert to DataFrame
scaled_data = pd.DataFrame(scaled_data, columns=data.columns[1:])

In [None]:
# Add the date back
scaled_data['Date'] = data.iloc[:, 0]

In [None]:
# Set the date as index
scaled_data.set_index('Date', inplace=True)

In [None]:
# Create sequences of data
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length, :-1].values)
        y.append(data.iloc[i+seq_length, -1])  # Assuming the last column is temperature
    return np.array(X), np.array(y)


In [None]:
seq_length = 10  # For example, using 10 days of data to predict the next day's temperature
X, y = create_sequences(scaled_data, seq_length)


In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Reshape input data to be compatible with TimeDistributed layer
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))


In [None]:
# Define the model
model_cnn_lstm = Sequential()

In [None]:
# TimeDistributed Conv1D layer
model_cnn_lstm.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, X_train.shape[2], X_train.shape[3])))
model_cnn_lstm.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model_cnn_lstm.add(TimeDistributed(Flatten()))

In [None]:
# LSTM layer
model_cnn_lstm.add(LSTM(50, activation='relu'))

In [None]:
# Fully connected layers
model_cnn_lstm.add(Dense(100, activation='relu'))  # First fully connected layer
model_cnn_lstm.add(Dropout(0.2))  # Dropout layer for regularization
model_cnn_lstm.add(Dense(50, activation='relu'))   # Second fully connected layer


In [None]:
# Output layer
model_cnn_lstm.add(Dense(1))

In [None]:
# Compile the model
model_cnn_lstm.compile(loss='mse', optimizer=Adam())

In [None]:
# Summary of the model
model_cnn_lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDist  (None, None, 9, 64)       128       
 ributed)                                                        
                                                                 
 time_distributed_1 (TimeDi  (None, None, 4, 64)       0         
 stributed)                                                      
                                                                 
 time_distributed_2 (TimeDi  (None, None, 256)         0         
 stributed)                                                      
                                                                 
 lstm (LSTM)                 (None, 50)                61400     
                                                                 
 dense (Dense)               (None, 100)               5100      
                                                        

In [None]:
# Train the model
history = model_cnn_lstm.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Evaluate the model
loss = model_cnn_lstm.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss (MSE): {loss}')

Test Loss (MSE): 0.00970414187759161


In [None]:
# Make predictions
y_pred = model_cnn_lstm.predict(X_test)



In [None]:
# Inverse transform the predictions if needed
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
y_test = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]


In [None]:
# Function to calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [None]:
# prompt: print rmse value

print(f'RMSE: {rmse(y_test, y_pred)}')


RMSE: 3.2187160931547667


In [None]:
# Function to calculate Correlation Coefficient (CC)
def correlation_coefficient(y_true, y_pred):
    return np.corrcoef(y_true, y_pred)[0, 1]

In [None]:
# Define the number of folds
n_folds = 5

# Initialize the KFold object
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

# Initialize lists to store the results
acc_per_fold = []
loss_per_fold = []

# Iterate through each fold
for train_index, test_index in kf.split(X_train):
    # Get the training and testing data for the current fold
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    # Reshape the training and testing data
    X_train_fold = X_train_fold.reshape((X_train_fold.shape[0], X_train_fold.shape[1], X_train_fold.shape[2], 1))
    X_test_fold = X_test_fold.reshape((X_test_fold.shape[0], X_test_fold.shape[1], X_test_fold.shape[2], 1))

    # Train the model on the current fold
    model_cnn_lstm.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=1)
    # Evaluate the model on the current fold
    loss = model_cnn_lstm.evaluate(X_test_fold, y_test_fold, verbose=0)
    loss_per_fold.append(loss)

    # Calculate the accuracy on the current fold
    y_pred = model_cnn_lstm.predict(X_test_fold)
    y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
    y_test_fold = scaler.inverse_transform(np.concatenate((np.zeros((y_test_fold.shape[0], scaled_data.shape[1] - 1)), y_test_fold.reshape(-1, 1)), axis=1))[:, -1]
    accuracy = correlation_coefficient(y_test_fold, y_pred)
    acc_per_fold.append(accuracy)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Calculate the average accuracy across all folds
average_acc = np.mean(acc_per_fold)

# Print the average accuracy
print("Average accuracy across folds:", average_acc)

# Calculate the average loss across all folds
average_loss = np.mean(loss_per_fold)

# Print the average loss
print("Average loss across folds:", average_loss)

# Calculate RMSE
rmse_values = rmse(y_test_fold, y_pred)
print(f'RMSE: {rmse_values}')

Average accuracy across folds: 0.935861157141316
Average loss across folds: 0.003856461029499769
RMSE: 1.928430226099921


In [None]:
y_test.shape

(3067,)

In [None]:
# Calculate Correlation Coefficient (CC)
cc_value = correlation_coefficient(y_test_fold, y_pred)
print(f'Correlation Coefficient: {cc_value}')

Correlation Coefficient: 0.9458887292307925


In [None]:
y_train.shape

(12264,)

In [None]:
# Compare predictions with actual values
print(f'Predictions: {y_pred[:10]}')
print(f'Actual values: {y_test[:10]}')

Predictions: [33.92348544 33.11139657 37.43157912 29.79710939 30.10754958 33.76706372
 20.78847024 33.443801   32.20172865 34.09902022]
Actual values: [32.82686615 39.18675613 37.745876   34.26612473 33.67470932 32.7926445
 42.33344269 34.84613419 27.58231354 21.32916641]


In [None]:
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and preprocess data
data = pd.read_csv('/content/merged_file_data.csv', parse_dates=[0], dayfirst=True)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])
scaled_data = pd.DataFrame(scaled_data, columns=data.columns[1:])
scaled_data['Date'] = data.iloc[:, 0]
scaled_data.set_index('Date', inplace=True)

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length, :-1].values)
        y.append(data.iloc[i+seq_length, -1])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(scaled_data, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define LSTM model
model_lstm = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')
model_lstm.summary()

# Train the LSTM model
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate and predict with the LSTM model
loss = model_lstm.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss (MSE): {loss}')
y_pred = model_lstm.predict(X_test)
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
y_test = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

print(f'RMSE: {rmse(y_test, y_pred)}')


Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 64)                18944     
                                                                 
 dense_15 (Dense)            (None, 1)                 65        
                                                                 
Total params: 19009 (74.25 KB)
Trainable params: 19009 (74.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss (MSE): 0.00408964604139328
RMSE: 2.0895221220687654


In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and preprocess data
data = pd.read_csv('/content/merged_file_data.csv', parse_dates=[0], dayfirst=True)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])
scaled_data = pd.DataFrame(scaled_data, columns=data.columns[1:])
scaled_data['Date'] = data.iloc[:, 0]
scaled_data.set_index('Date', inplace=True)

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length, :-1].values)
        y.append(data.iloc[i+seq_length, -1])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(scaled_data, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape input data for CNN
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))

# Define CNN model
model_cnn = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)
])
model_cnn.compile(optimizer='adam', loss='mse')
model_cnn.summary()

# Train the CNN model
model_cnn.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate and predict with the CNN model
loss = model_cnn.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss (MSE): {loss}')
y_pred = model_cnn.predict(X_test)
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
y_test = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

print(f'RMSE: {rmse(y_test, y_pred)}')


Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 8, 7, 32)          320       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 4, 3, 32)          0         
 g2D)                                                            
                                                                 
 flatten_8 (Flatten)         (None, 384)               0         
                                                                 
 dense_20 (Dense)            (None, 64)                24640     
                                                                 
 dense_21 (Dense)            (None, 1)                 65        
                                                                 
Total params: 25025 (97.75 KB)
Trainable params: 25025 (97.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________

In [None]:
from scipy.stats import pearsonr
# Calculate coefficient of correlation
corr, _ = pearsonr(y_test_orig, y_pred)
print(f'Coefficient of Correlation: {corr}')

Coefficient of Correlation: 0.9221579063898298


In [None]:
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and preprocess data
data = pd.read_csv('/content/merged_file_data.csv', parse_dates=[0], dayfirst=True)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])
scaled_data = pd.DataFrame(scaled_data, columns=data.columns[1:])
scaled_data['Date'] = data.iloc[:, 0]
scaled_data.set_index('Date', inplace=True)

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length, :-1].values)
        y.append(data.iloc[i+seq_length, -1])
    return np.array(X), np.array(y)

seq_length = 10
X, y = create_sequences(scaled_data, seq_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define MLP model
model_mlp = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(1)
])
model_mlp.compile(optimizer='adam', loss='mse')
model_mlp.summary()

# Train the MLP model
model_mlp.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate and predict with the MLP model
loss = model_mlp.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss (MSE): {loss}')
y_pred = model_mlp.predict(X_test)
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
y_test = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

print(f'RMSE: {rmse(y_test, y_pred)}')


Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_9 (Flatten)         (None, 90)                0         
                                                                 
 dense_22 (Dense)            (None, 64)                5824      
                                                                 
 dense_23 (Dense)            (None, 1)                 65        
                                                                 
Total params: 5889 (23.00 KB)
Trainable params: 5889 (23.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss (MSE): 0.004698020871728659
RMSE: 2.2395535985895783


In [None]:
from scipy.stats import pearsonr

# Evaluate and predict with the MLP model
loss = model_mlp.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss (MSE): {loss}')
y_pred = model_mlp.predict(X_test)
y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
y_test_orig = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

# Calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

rmse_value = rmse(y_test_orig, y_pred)
print(f'RMSE: {rmse_value}')

# Calculate coefficient of correlation
corr, _ = pearsonr(y_test_orig, y_pred)
print(f'Coefficient of Correlation: {corr}')


Test Loss (MSE): 1030.578857421875
RMSE: 1048.9254600569407
Coefficient of Correlation: 0.9221579063898298


In [None]:
# Define LSTM model
model_lstm = Sequential([
    LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')

# Define CNN model
model_cnn = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)
])
model_cnn.compile(optimizer='adam', loss='mse')

# Define MLP model
model_mlp = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(1)
])
model_mlp.compile(optimizer='adam', loss='mse')

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv2D, MaxPooling2D, Flatten
from sklearn.preprocessing import MinMaxScaler

# Assuming you have already loaded and preprocessed your data
# X_train, X_test, y_train, y_test should be defined
# If scaling is needed, it should have been done before this step

# Initialize lists to store the results
rmse_per_model = {'LSTM': [], 'CNN': [], 'MLP': []}

# Define a function to calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Scale the data if needed (example with MinMaxScaler)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Ensure y_train and y_test are reshaped properly if needed
# y_train = y_train.reshape(-1, 1)
# y_test = y_test.reshape(-1, 1)

# Define and compile the LSTM model
model_lstm = Sequential([
    LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')

# Define and compile the CNN model
model_cnn = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)
])
model_cnn.compile(optimizer='adam', loss='mse')

# Define and compile the MLP model
model_mlp = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(1)
])
model_mlp.compile(optimizer='adam', loss='mse')

# Dictionary to store models
models = {'LSTM': model_lstm, 'CNN': model_cnn, 'MLP': model_mlp}

# Train and evaluate each model
for model_name, model in models.items():
    # Ensure input shape is correct for each model
    if model_name == 'CNN':
        X_train_input = X_train_scaled.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
        X_test_input = X_test_scaled.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
    else:
        X_train_input = X_train_scaled
        X_test_input = X_test_scaled

    model.fit(X_train_input, y_train, epochs=50, batch_size=32, verbose=1)
    y_pred = model.predict(X_test_input)

    # Inverse transform predictions if needed
    y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], X_test.shape[2] - 1)), y_pred), axis=1))[:, -1]
    y_test_orig = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], X_test.shape[2] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

    # Calculate and store RMSE
    rmse_value = rmse(y_test_orig, y_pred)
    rmse_per_model[model_name].append(rmse_value)

# Print the RMSE for each model
for model_name, rmse_values in rmse_per_model.items():
    print(f'Model: {model_name}, RMSE: {np.mean(rmse_values)}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv2D, MaxPooling2D, Flatten
from sklearn.preprocessing import MinMaxScaler

# Assuming you have already loaded and preprocessed your data
# X_train, X_test, y_train, y_test should be defined
# If scaling is needed, it should have been done before this step

# Initialize lists and dictionaries to store the results
rmse_per_model = {'LSTM': [], 'CNN': [], 'MLP': []}
corr_coef_per_model = {'LSTM': [], 'CNN': [], 'MLP': []}

# Define a function to calculate RMSE
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Scale the data if needed (example with MinMaxScaler)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Ensure y_train and y_test are reshaped properly if needed
# y_train = y_train.reshape(-1, 1)
# y_test = y_test.reshape(-1, 1)

# Define and compile the LSTM model
model_lstm = Sequential([
    LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse')

# Define and compile the CNN model
model_cnn = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], X_train.shape[2], 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)
])
model_cnn.compile(optimizer='adam', loss='mse')

# Define and compile the MLP model
model_mlp = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(1)
])
model_mlp.compile(optimizer='adam', loss='mse')

# Dictionary to store models
models = {'LSTM': model_lstm, 'CNN': model_cnn, 'MLP': model_mlp}

# Train and evaluate each model
for model_name, model in models.items():
    # Ensure input shape is correct for each model
    if model_name == 'CNN':
        X_train_input = X_train_scaled.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1))
        X_test_input = X_test_scaled.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))
    else:
        X_train_input = X_train_scaled
        X_test_input = X_test_scaled

    model.fit(X_train_input, y_train, epochs=100, batch_size=32, verbose=1)
    y_pred = model.predict(X_test_input)

    # Inverse transform predictions if needed
    y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], X_test.shape[2] - 1)), y_pred), axis=1))[:, -1]
    y_test_orig = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], X_test.shape[2] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]

    # Calculate and store RMSE
    rmse_value = rmse(y_test_orig, y_pred)
    rmse_per_model[model_name].append(rmse_value)

    # Calculate and store coefficient of correlation
    corr_coef = np.corrcoef(y_test_orig, y_pred)[0, 1]
    corr_coef_per_model[model_name].append(corr_coef)

# Print the RMSE and coefficient of correlation for each model
for model_name, rmse_values in rmse_per_model.items():
    print(f'Model: {model_name}, RMSE: {np.mean(rmse_values)}, Correlation Coefficient: {np.mean(corr_coef_per_model[model_name])}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Initialize lists to store the results
rmse_per_model = {'LSTM': [], 'CNN': [], 'MLP': []}


In [None]:
# prompt: print rmse of per model

# Train and evaluate each model
for model_name, model in {'LSTM': model_lstm, 'CNN': model_cnn, 'MLP': model_mlp}.items():
    model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=1)
    y_pred = model.predict(X_test)
    y_pred = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1] - 1)), y_pred), axis=1))[:, -1]
    y_test = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], scaled_data.shape[1] - 1)), y_test.reshape(-1, 1)), axis=1))[:, -1]
    rmse_per_model[model_name].append(rmse(y_test, y_pred))

# Print the RMSE for each model
for model_name, rmse_values in rmse_per_model.items():
    print(f'Model: {model_name}, RMSE: {np.mean(rmse_values)}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: LSTM, RMSE: 5392554.804033854
Model: CNN, RMSE: 176197347.85489184
Model: MLP, RMSE: 13433223480.895035


In [None]:
# Iterate through each fold
for train_index, test_index in kf.split(X_train):
    # Get the training and testing data for the current fold
    X_train_fold, X_test_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

    # Reshape the data if necessary for each model
    X_train_fold_lstm = X_train_fold.reshape((X_train_fold.shape[0], X_train_fold.shape[1], X_train_fold.shape[2], 1))
    X_test_fold_lstm = X_test_fold.reshape((X_test_fold.shape[0], X_test_fold.shape[1], X_test_fold.shape[2], 1))

    # Train and evaluate LSTM model
    model_lstm.fit(X_train_fold_lstm, y_train_fold, epochs=10, batch_size=32, verbose=0)
    y_pred_lstm = model_lstm.predict(X_test_fold_lstm)
    rmse_lstm = np.sqrt(mean_squared_error(y_test_fold, y_pred_lstm))
    rmse_per_model['LSTM'].append(rmse_lstm)

    # Train and evaluate CNN model
    model_cnn.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0)
    y_pred_cnn = model_cnn.predict(X_test_fold)
    rmse_cnn = np.sqrt(mean_squared_error(y_test_fold, y_pred_cnn))
    rmse_per_model['CNN'].append(rmse_cnn)



In [None]:
# Define MLP model
model_mlp = Sequential([
    Flatten(input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(1)
])
model_mlp.compile(optimizer='adam', loss='mse')

In [None]:
# Train and evaluate MLP model
model_mlp.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0)
y_pred_mlp = model_mlp.predict(X_test_fold)
rmse_mlp = np.sqrt(mean_squared_error(y_test_fold, y_pred_mlp))
rmse_per_model['MLP'].append(rmse_mlp)




In [None]:
# prompt: calculate the rmse for above lstm,cnn and mlp model

# Calculate the RMSE for each model
for model_name, rmse_values in rmse_per_model.items():
    print(f'Model: {model_name}, RMSE: {np.mean(rmse_values)}')


Model: LSTM, RMSE: 174.87824344036292
Model: CNN, RMSE: 5887.361146342601
Model: MLP, RMSE: 577609.9253042161


In [None]:
# prompt: add code of finding rmse value of all this model(lstm ,cnn,mlp)

# Train and evaluate MLP model
model_mlp.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0)
y_pred_mlp = model_mlp.predict(X_test_fold)
rmse_mlp = np.sqrt(mean_squared_error(y_test_fold, y_pred_mlp))
rmse_per_model['MLP'].append(rmse_mlp)

# Calculate average RMSE for each model
average_rmse = {model: sum(rmse_values) / len(rmse_values) for model, rmse_values in rmse_per_model.items()}

# Print average RMSE for each model
for model, rmse in average_rmse.items():
    print(f"Average RMSE for {model}: {rmse}")


Average RMSE for LSTM: 0.06097418912534851
Average RMSE for CNN: 0.059910658735339226
Average RMSE for MLP: 0.06692094587191981
