# Step 1: Loading train, test


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb
from sklearn.svm import SVR
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
import keras_tuner as kt

# Metrics calculation
def calculate_metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape}

# XGBoost Training
def train_xgboost(X_train, y_train, X_test, y_test, dates_train, dates_test):
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=50)
    model.fit(X_train, y_train)
    
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    
    metrics_train = calculate_metrics(y_train, y_pred_train)
    metrics_test = calculate_metrics(y_test, y_pred_test)
    return metrics_train, metrics_test, model

# Random Forest Training
def train_random_forest(X_train, y_train, X_test, y_test, dates_train, dates_test):
    model = RandomForestRegressor(n_estimators=50)
    model.fit(X_train, y_train)

    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    metrics_train = calculate_metrics(y_train, y_pred_train)
    metrics_test = calculate_metrics(y_test, y_pred_test)
    return metrics_train, metrics_test, model

# LightGBM Training
def train_lightgbm(X_train, y_train, X_test, y_test, dates_train, dates_test):
    model = lgb.LGBMRegressor()
    model.fit(X_train, y_train)

    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    metrics_train = calculate_metrics(y_train, y_pred_train)
    metrics_test = calculate_metrics(y_test, y_pred_test)
    return metrics_train, metrics_test, model

# SVR Training
def train_svr(X_train, y_train, X_test, y_test, dates_train, dates_test):
    params = {'C': [0.1, 1, 0.01], 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['scale', 'auto']}
    model = SVR()
    grid_search = GridSearchCV(estimator=model, param_grid=params, scoring='neg_mean_squared_error', cv=3)
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_
    y_pred_train = best_model.predict(X_train)
    y_pred_test = best_model.predict(X_test)
    
    metrics_train = calculate_metrics(y_train, y_pred_train)
    metrics_test = calculate_metrics(y_test, y_pred_test)
    return metrics_train, metrics_test, best_model

# Deep Learning Model Builder
def build_deep_learning_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=256, step=32), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout_rate', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Fine-Tuned Deep Learning Training
def train_fine_tuned_deep_learning(X_train, y_train, X_test, y_test, dates_train, dates_test):
    tuner = kt.BayesianOptimization(
        build_deep_learning_model,
        objective='val_loss',
        max_trials=20,
        directory='kt_dir',
        project_name='fine_tuned_deep_learning'
    )
    tuner.search(X_train, y_train, epochs=50, validation_split=0.2, verbose=0)
    
    best_model = tuner.get_best_models(num_models=1)[0]
    best_model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2, verbose=0,
                   callbacks=[EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)])
    
    y_pred_train = best_model.predict(X_train).flatten()
    y_pred_test = best_model.predict(X_test).flatten()
    
    metrics_train = calculate_metrics(y_train, y_pred_train)
    metrics_test = calculate_metrics(y_test, y_pred_test)
    return metrics_train, metrics_test, best_model

# Model Training Function
def train_models(X_train, y_train, X_test, y_test, dates_train, dates_test, i):
    results_train = {"Series_Number": i, "Models": {}}
    results_test = {"Series_Number": i, "Models": {}}

    print("Training XGBoost...")
    xgb_train_metrics, xgb_test_metrics, _ = train_xgboost(X_train, y_train, X_test, y_test, dates_train, dates_test)
    results_train["Models"]["XGBoost"] = xgb_train_metrics
    results_test["Models"]["XGBoost"] = xgb_test_metrics

    print("Training Random Forest...")
    rf_train_metrics, rf_test_metrics, _ = train_random_forest(X_train, y_train, X_test, y_test, dates_train, dates_test)
    results_train["Models"]["Random Forest"] = rf_train_metrics
    results_test["Models"]["Random Forest"] = rf_test_metrics

    print("Training LightGBM...")
    lgbm_train_metrics, lgbm_test_metrics, _ = train_lightgbm(X_train, y_train, X_test, y_test, dates_train, dates_test)
    results_train["Models"]["LightGBM"] = lgbm_train_metrics
    results_test["Models"]["LightGBM"] = lgbm_test_metrics

    print("Training SVR...")
    svr_train_metrics, svr_test_metrics, _ = train_svr(X_train, y_train, X_test, y_test, dates_train, dates_test)
    results_train["Models"]["SVR"] = svr_train_metrics
    results_test["Models"]["SVR"] = svr_test_metrics

    print("Training Fine-Tuned Deep Learning...")
    ft_dl_train_metrics, ft_dl_test_metrics, _ = train_fine_tuned_deep_learning(X_train, y_train, X_test, y_test, dates_train, dates_test)
    results_train["Models"]["Fine-Tuned Deep Learning"] = ft_dl_train_metrics
    results_test["Models"]["Fine-Tuned Deep Learning"] = ft_dl_test_metrics

    # Convert to DataFrame for better visualization
    df_results_train = pd.DataFrame.from_dict(results_train["Models"], orient='index').reset_index()
    df_results_test = pd.DataFrame.from_dict(results_test["Models"], orient='index').reset_index()

    df_results_train["Series_Number"] = i
    df_results_test["Series_Number"] = i
    print("Results for training data:")
    print(df_results_train)
    print("Results for testing data:")
    print(df_results_test)
    return df_results_train, df_results_test

## MAIN

In [None]:
import pandas as pd

start_series = 41  # Specify the starting series number
end_series = 45   # Specify the ending series number

# Lists to collect all results for concatenation
all_results_train = []  
all_results_test = []

# Loop through the defined range
for i in range(start_series, end_series + 1):

    train_path = f"C:\\Users\\Siwar\\Research_project\\First approach\\Datasets\\3_Data_ready_for_modelling\\Train\\serie_{i}train.csv"
    test_path = f"C:\\Users\\Siwar\\Research_project\\First approach\\Datasets\\3_Data_ready_for_modelling\\Test\\serie_{i}test.csv"
    
    # Load datasets
    train_data = pd.read_csv(train_path, index_col="Date")
    test_data = pd.read_csv(test_path, index_col="Date")
    
    # Prepare X and y
    X_train = train_data.drop(columns=['y'])
    X_test = test_data.drop(columns=['y'])
    y_train = train_data['y']
    y_test = test_data['y']
    
    # Train models and save results
    results_train, results_test = train_models(X_train, y_train, X_test, y_test, train_data.index, test_data.index, i)
    
    # Collect all results for concatenation
    all_results_train.append(results_train)
    all_results_test.append(results_test)

# Concatenate all the results into a single DataFrame
final_results_train = pd.concat(all_results_train, ignore_index=True)
final_results_test = pd.concat(all_results_test, ignore_index=True)

# Save the concatenated results 
train_results_path = f"C:\\Users\\Siwar\\Research_project\\First approach\\Datasets\\4_Results_metrics\\all_results_train_series_{start_series}_to_{end_series}.csv"
test_results_path = f"C:\\Users\\Siwar\\Research_project\\First approach\\Datasets\\4_Results_metrics\\all_results_test_series_{start_series}_to_{end_series}.csv"

final_results_train.to_csv(train_results_path, index=False)
final_results_test.to_csv(test_results_path, index=False)

print(f"Results saved for series {start_series} to {end_series}.")
