In [1]:
import os 
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
import numpy as np 

from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
regressor= XGBRegressor()

In [2]:
# Initialize an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['Iteration', 'RMSE', 'Denormalized_RMSE'])

In [3]:
# Initialize an empty list to store the results
results_list = []

for iteration in range(30):
    # Load the train and test pickles for the current iteration
    train_pickle_path = os.path.join('train_test_pickles', f'train_df_{iteration}.pickle')
    test_pickle_path = os.path.join('train_test_pickles', f'test_df_{iteration}.pickle')
    
    # Load the pickles
    train_df = pd.read_pickle(train_pickle_path)
    test_df = pd.read_pickle(test_pickle_path)
    
    #get the training std 
    trainrtt_mean = train_df['last_rtt'].mean()
    trainrtt_std = train_df['last_rtt'].std()
    
    cols_dropped = ['date','last_rtt','normalizzed_rtt','src_names']

    X_train = train_df.drop(columns=cols_dropped, axis=1)
    y_train = train_df['normalizzed_rtt']

    X_test = test_df.drop(columns=cols_dropped, axis=1)
    y_test = test_df['normalizzed_rtt'] 
    
    param_grid = {"max_depth":    [4, 5, 6],
            "n_estimators": [50, 60, 70],
            "learning_rate": [0.01, 0.015]}

    # try out every combination of the above values
    grid_search = GridSearchCV(regressor, param_grid).fit(X_train, y_train)

    
    # Train a Decision Tree model
    regressor=XGBRegressor(learning_rate = grid_search.best_params_["learning_rate"],
                        n_estimators  = grid_search.best_params_["n_estimators"],
                        max_depth     = grid_search.best_params_["max_depth"],
                        objective     = 'reg:squarederror')
    regressor.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = regressor.predict(X_test)

    # Calculate evaluation metrics
    rmse_lm = np.sqrt(mean_squared_error(y_test, y_pred))
    drmse_lm = rmse_lm * trainrtt_std
    
    # Append the results to the DataFrame
    # Append the results to the list
    results_list.append({'Iteration': iteration, 'RMSE': rmse_lm, 'Denormalized_RMSE': drmse_lm})


# Convert the list of dictionaries to a DataFrame
results_df = pd.DataFrame(results_list)

# Save the results to a CSV file
results_folder = 'model_results'
results_csv_path = os.path.join(results_folder, 'xgboost_evaluation_results.csv')
results_df.to_csv(results_csv_path, index=False)

print('Evaluation results saved to:', results_csv_path)


Evaluation results saved to: model_results/xgboost_evaluation_results.csv
