In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('')

In [None]:
from tqdm import tqdm

from models.arima import Arima
from models.baseline import Baseline
from models.lgbm import LGBM
from models.neuralproph import Neuralprophet
from models.timegpt import TimeGPT

from enhanced_experiment import EnhancedTimeSeriesExperiment
from data.data_retriever import Dataretreiver

In [None]:
# Define models
models = {
    'BASELINE': Baseline(),
    'LGBM': LGBM(),
    'ARIMA': Arima(order=[1,0,1], seasonal_order=[1,1,1,24]),
    'NEURALPROPHET': Neuralprophet(),
    'TimeGPT': TimeGPT()
}

In [4]:
areas_to_include = [1, 8, 13, 18, 25, 30, 39]

In [None]:
def kmeans():
    # Define models
    models_kmeans = {
        'BASELINE': Baseline(),
        'LGBM': LGBM(),
        'ARIMA': Arima(order=[1,0,1], seasonal_order=[1,1,1,24]),
        'NEURALPROPHET': Neuralprophet(),
        'TimeGPT': TimeGPT()

    }
    print("PCA_KMEANS STARTED")
    for i in tqdm(areas_to_include):
        data = Dataretreiver(reduce='pca_kmeans', debug=False, reduction_num_cols=i)
        df = data.combined
        # Initialize the experiment
        experiment = EnhancedTimeSeriesExperiment(
            models=models_kmeans,
            target_column='price',
            forecast_horizon=192,
            n_splits=3,
            step_size=192 # 8 days are skipped between each forecast.
        )
        # Run the experiments with these custom combinations
        experiment.run_feature_group_experiments(
            df=df,
            add_all_columns=True,
            add_base_columns=False,
        )
        results_df_MAE = experiment.summarize_feature_group_results(metric='MAE')
        results_df_MSE = experiment.summarize_feature_group_results(metric='RMSE')
        results_df_time = experiment.summarize_feature_group_results(metric='elapsed_time')
        print('='*50)
        print(f"Iterataion {i} results for pca_kmeans")
        print("MAE:")
        print(results_df_MAE)
        print("RMSE")
        print(results_df_MSE)
        print("Elapsed time")
        print(results_df_time)
        print('='*50)
        experiment.save_feature_group_results_to_csv(f"../results/opt_num_areas/only_arima/pca_kmeans_{i}.csv")

In [None]:
def pure():
    print("PCA_PURE STARTED")
    # Define models
    models_pure = {
        'BASELINE': Baseline(),
        'LGBM': LGBM(),
        'ARIMA': Arima(order=[1,0,1], seasonal_order=[1,1,1,24]),
        'NEURALPROPHET': Neuralprophet(),
        'TimeGPT': TimeGPT()

    }
    for i in tqdm(areas_to_include):
        data = Dataretreiver(reduce='pca_pure', debug=False, reduction_num_cols=i)
        df = data.combined
        # Initialize the experiment
        experiment = EnhancedTimeSeriesExperiment(
            models=models_pure,
            target_column='price',
            forecast_horizon=192,
            n_splits=3,
            step_size=192 # 8 days are skipped between each forecast.
        )
        # Run the experiments with these custom combinations
        experiment.run_feature_group_experiments(
            df=df,
            add_all_columns=True,
            add_base_columns=False,
        )
        results_df_MAE = experiment.summarize_feature_group_results(metric='MAE')
        results_df_MSE = experiment.summarize_feature_group_results(metric='RMSE')
        results_df_time = experiment.summarize_feature_group_results(metric='elapsed_time')
        print('='*50)
        print(f"Iterataion {i} results for pca_pure")
        print("MAE:")
        print(results_df_MAE)
        print("RMSE")
        print(results_df_MSE)
        print("Elapsed time")
        print(results_df_time)
        print('='*50)
        experiment.save_feature_group_results_to_csv(f"../results/opt_num_areas/only_arima/pca_pure_{i}.csv")

In [None]:
def pearson():
    print("PEARSON STARTED")
    models_pearson = {
        'BASELINE': Baseline(),
        'LGBM': LGBM(),
        'ARIMA': Arima(order=[1,0,1], seasonal_order=[1,1,1,24]),
        'NEURALPROPHET': Neuralprophet(),
        'TimeGPT': TimeGPT()

    }
    for i in tqdm(areas_to_include):
        data = Dataretreiver(reduce='pearson', debug=True, reduction_num_cols=i)
        df = data.combined
        # Initialize the experiment
        experiment = EnhancedTimeSeriesExperiment(
            models=models_pearson,
            target_column='price',
            forecast_horizon=192,
            n_splits=3,
            step_size=192 # 8 days are skipped between each forecast.
        )   
        # Run the experiments with these custom combinations
        experiment.run_feature_group_experiments(
            df=df,
            add_all_columns=True,
            add_base_columns=False,
        )
        results_df_MAE = experiment.summarize_feature_group_results(metric='MAE')
        results_df_MSE = experiment.summarize_feature_group_results(metric='RMSE')
        results_df_time = experiment.summarize_feature_group_results(metric='elapsed_time')
        print('='*50)
        print(f"Iterataion {i} results for pearson")
        print("MAE:")
        print(results_df_MAE)
        print("RMSE")
        print(results_df_MSE)
        print("Elapsed time")
        print(results_df_time)
        print('='*50)
        experiment.save_feature_group_results_to_csv(f"../results/opt_num_areas/pearson_{i}.csv")

In [None]:
import threading

pca_kmeans_thread = threading.Thread(target=kmeans)
pca_pure_thread = threading.Thread(target=pure)
pearson_thread = threading.Thread(target=pearson)

pca_kmeans_thread.start()
pca_pure_thread.start()
pearson_thread.start()