In [1]:
# Load the libraries
import pandas as pd
import numpy  as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
import warnings
import platform
import sys
warnings.filterwarnings("ignore",category=DeprecationWarning)
warnings.simplefilter("ignore")

#LSTM
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , LSTM , Flatten
import os
os.environ['TF_METAL_DEVICE'] = '1'

# Making the plots standard 
%matplotlib inline
plt.rcParams["figure.figsize"] = [16, 5]

In [2]:
# Initialize an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Iteration', 'RMSE', 'Denormalized_RMSE'])

In [3]:
# Initialize an empty list to store the results
results_list = []

for iteration in range(1, 11):
    # Load the train and test pickles for the current iteration
    train_pickle_path = os.path.join('train_test_pickles', f'train_df_{iteration}.pickle')
    test_pickle_path = os.path.join('train_test_pickles', f'test_df_{iteration}.pickle')
    
    # Load the pickles
    train_df = pd.read_pickle(train_pickle_path)
    test_df = pd.read_pickle(test_pickle_path)
    
    #get the training std 
    trainrtt_mean = train_df['last_rtt'].mean()
    trainrtt_std = train_df['last_rtt'].std()
    
    cols_dropped = ['date','last_rtt','normalizzed_rtt','src_names']

    X_train = train_df['normalizzed_distance'].values.reshape(-1,1)
    y_train = train_df['normalizzed_rtt'].values
            
    X_test = test_df['normalizzed_distance'].values.reshape(-1,1)
    y_test = test_df['normalizzed_rtt'].values
    
    from numpy import array
    def split_sequence(sequence, n_steps):
        X = list()
        for i in range(len(sequence)):
            # find the end of this pattern
            end_ix = i + n_steps
            # check if we are beyond the sequence
            if end_ix > len(sequence)-1:
                break
        # gather input and output parts of the pattern
            seq_x = sequence[i:end_ix]
            X.append(seq_x)
        return array(X)

    n_steps = 100

    Xtrain_3d = split_sequence(X_train[:10000],n_steps)
    Xtest_3d = split_sequence(X_test[:10000],n_steps)
    Xtrain_3d.shape
    
    n_features = Xtrain_3d.shape[2]
    Xtrain_3d  = Xtrain_3d .reshape((Xtrain_3d.shape[0], Xtrain_3d.shape[1], n_features))
    Xtest_3d = Xtest_3d .reshape((Xtest_3d.shape[0], Xtest_3d.shape[1], n_features))
    Xtrain_3d[0].shape
    
    
    # define model
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.layers import BatchNormalization
    from tensorflow.keras.layers import Dropout
    from tensorflow.keras.optimizers import SGD
    from tensorflow.keras.optimizers import RMSprop

    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features),return_sequences=True))
    model.add(LSTM(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))

    # Define RMSprop optimizer with your desired learning rate
    optimizer = RMSprop(learning_rate=0.0001)

    # Compile your model using the RMSprop optimizer
    model.compile(optimizer=optimizer, loss='mse')


    model.fit(Xtrain_3d,y_train[0:len(Xtrain_3d)],epochs=20, verbose=0)

    # https://stackoverflow.com/questions/66080745/keras-model-predicts-nan
    y_pred = model.predict(Xtest_3d, verbose=0)
    
    #loss function for LSTM
    from sklearn.metrics import mean_squared_error
    lstm_rmse = np.sqrt(mean_squared_error(y_test[0:len(y_pred)], y_pred))
    lstm_drmse = lstm_rmse * trainrtt_std

    
    # Append the results to the list
    results_list.append({'Iteration': iteration, 'RMSE': lstm_rmse, 'Denormalized_RMSE': lstm_drmse})


# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(results_list)

# Save the results to a CSV file
results_folder = 'model_dist_results'
results_csv_path = os.path.join(results_folder, 'evaluation_results_lstm.csv')
results_df.to_csv(results_csv_path, index=False)

print('Evaluation results saved to:', results_csv_path)


Evaluation results saved to: model_dist_results/evaluation_results_lstm.csv
