# Library Import

In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Load Dataset

In [2]:
df = pd.read_csv("AirQuality_cleaned.csv", index_col="timestamp", parse_dates=True)

# Preparation

In [3]:
horizons = [1, 6, 12, 24]

# CO Discretization

In [4]:
CO_class_le = LabelEncoder()
CO_class_le.fit(["low", "mid", "high"])

def get_CO_class(CO_val):
    if CO_val < 1.5:
        return "low"
    elif CO_val < 2.5:
        return "mid"
    else:
        return "high"

def get_CO_classes(CO_vals, prefix):
    CO_class = CO_vals.apply(get_CO_class)
    CO_class_l_encoded = CO_class_le.transform(CO_class)
    CO_class_oh_encoded = pd.get_dummies(
        pd.Series(CO_class),
        prefix = prefix,
        dtype = int,
        drop_first = False
    )
    CO_classes = pd.DataFrame({
        prefix + "_class": CO_class,
        prefix + "_class_l_encoded": CO_class_l_encoded
    })

    return pd.concat([CO_classes, CO_class_oh_encoded], axis = 1)

df_temp = df.copy()
CO_classes = get_CO_classes(df_temp["CO(GT)"], "CO")
df_temp = pd.concat([df_temp, CO_classes], axis = 1)

for h in horizons:
    df_temp[f"CO(GT)_fut_{h}h"] = df_temp[f"CO(GT)_fut_{h}h"].fillna(df_temp[f"CO(GT)_fut_{h}h"].mean())
    CO_fut_h_classes = get_CO_classes(df_temp[f"CO(GT)_fut_{h}h"], f"CO_fut_{h}h")
    df_temp = pd.concat([df_temp, CO_fut_h_classes], axis = 1)

pd.set_option('display.max_columns', None)
df_temp.head(5)

Unnamed: 0_level_0,CO(GT),PT08.S1(CO),C6H6(GT),PT08.S2(NMHC),NOx(GT),PT08.S3(NOx),NO2(GT),PT08.S4(NO2),PT08.S5(O3),T,RH,AH,hour,weekday,month,is_weekend,hour_sin,hour_cos,weekday_sin,weekday_cos,CO(GT)_lag_1h,CO(GT)_lag_6h,CO(GT)_lag_12h,CO(GT)_lag_24h,CO(GT)_ma_6h,CO(GT)_ma_24h,CO(GT)_fut_1h,CO(GT)_fut_6h,CO(GT)_fut_12h,CO(GT)_fut_24h,C6H6(GT)_lag_1h,C6H6(GT)_lag_6h,C6H6(GT)_lag_12h,C6H6(GT)_lag_24h,C6H6(GT)_ma_6h,C6H6(GT)_ma_24h,C6H6(GT)_fut_1h,C6H6(GT)_fut_6h,C6H6(GT)_fut_12h,C6H6(GT)_fut_24h,NOx(GT)_lag_1h,NOx(GT)_lag_6h,NOx(GT)_lag_12h,NOx(GT)_lag_24h,NOx(GT)_ma_6h,NOx(GT)_ma_24h,NOx(GT)_fut_1h,NOx(GT)_fut_6h,NOx(GT)_fut_12h,NOx(GT)_fut_24h,NO2(GT)_lag_1h,NO2(GT)_lag_6h,NO2(GT)_lag_12h,NO2(GT)_lag_24h,NO2(GT)_ma_6h,NO2(GT)_ma_24h,NO2(GT)_fut_1h,NO2(GT)_fut_6h,NO2(GT)_fut_12h,NO2(GT)_fut_24h,CO(GT)_scaled,PT08.S1(CO)_scaled,C6H6(GT)_scaled,PT08.S2(NMHC)_scaled,NOx(GT)_scaled,PT08.S3(NOx)_scaled,NO2(GT)_scaled,PT08.S4(NO2)_scaled,PT08.S5(O3)_scaled,T_scaled,RH_scaled,AH_scaled,hour_scaled,weekday_scaled,month_scaled,is_weekend_scaled,hour_sin_scaled,hour_cos_scaled,weekday_sin_scaled,weekday_cos_scaled,CO(GT)_lag_1h_scaled,CO(GT)_lag_6h_scaled,CO(GT)_lag_12h_scaled,CO(GT)_lag_24h_scaled,CO(GT)_ma_6h_scaled,CO(GT)_ma_24h_scaled,CO(GT)_fut_1h_scaled,CO(GT)_fut_6h_scaled,CO(GT)_fut_12h_scaled,CO(GT)_fut_24h_scaled,C6H6(GT)_lag_1h_scaled,C6H6(GT)_lag_6h_scaled,C6H6(GT)_lag_12h_scaled,C6H6(GT)_lag_24h_scaled,C6H6(GT)_ma_6h_scaled,C6H6(GT)_ma_24h_scaled,C6H6(GT)_fut_1h_scaled,C6H6(GT)_fut_6h_scaled,C6H6(GT)_fut_12h_scaled,C6H6(GT)_fut_24h_scaled,NOx(GT)_lag_1h_scaled,NOx(GT)_lag_6h_scaled,NOx(GT)_lag_12h_scaled,NOx(GT)_lag_24h_scaled,NOx(GT)_ma_6h_scaled,NOx(GT)_ma_24h_scaled,NOx(GT)_fut_1h_scaled,NOx(GT)_fut_6h_scaled,NOx(GT)_fut_12h_scaled,NOx(GT)_fut_24h_scaled,NO2(GT)_lag_1h_scaled,NO2(GT)_lag_6h_scaled,NO2(GT)_lag_12h_scaled,NO2(GT)_lag_24h_scaled,NO2(GT)_ma_6h_scaled,NO2(GT)_ma_24h_scaled,NO2(GT)_fut_1h_scaled,NO2(GT)_fut_6h_scaled,NO2(GT)_fut_12h_scaled,NO2(GT)_fut_24h_scaled,CO_class,CO_class_l_encoded,CO_high,CO_low,CO_mid,CO_fut_1h_class,CO_fut_1h_class_l_encoded,CO_fut_1h_high,CO_fut_1h_low,CO_fut_1h_mid,CO_fut_6h_class,CO_fut_6h_class_l_encoded,CO_fut_6h_high,CO_fut_6h_low,CO_fut_6h_mid,CO_fut_12h_class,CO_fut_12h_class_l_encoded,CO_fut_12h_high,CO_fut_12h_low,CO_fut_12h_mid,CO_fut_24h_class,CO_fut_24h_class_l_encoded,CO_fut_24h_high,CO_fut_24h_low,CO_fut_24h_mid
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1
2004-03-11 18:00:00,4.8,1581.0,20.8,1319.0,281.0,799.0,151.0,2083.0,1409.0,10.3,64.2,0.8065,18,3,3,0,-1.0,-1.83697e-16,0.433884,-0.900969,2.9,1.6,0.7,2.6,2.816667,1.795833,6.9,1.7,0.8,3.9,11.2,5.2,1.6,11.9,11.316667,6.616667,27.4,5.9,1.8,19.3,243.0,104.0,34.0,166.0,209.0,125.5,383.0,122.0,56.0,206.0,135.0,95.0,48.0,113.0,130.5,97.291667,172.0,97.0,71.0,149.0,2.048974,2.266827,1.475857,1.456457,0.234883,-0.140352,0.878955,1.846083,0.994045,-0.923746,0.880844,-0.550329,0.939133,-0.004969,-0.962879,-0.634869,-1.414701,-0.000222,0.619518,-1.2703,0.61277,-0.3695,-1.049909,0.38555,0.683829,-0.409797,3.636207,-0.294244,-0.974776,1.366759,0.162855,-0.657437,-1.150048,0.257652,0.220967,-0.850995,2.378468,-0.562044,-1.1234,1.268526,0.040106,-0.671967,-1.030845,-0.35423,-0.15225,-0.787145,0.75767,-0.580375,-0.919138,-0.150974,0.515025,-0.394551,-1.464958,0.015406,0.480969,-0.524045,1.356741,-0.349768,-0.942114,0.832018,high,0,1,0,0,high,0,1,0,0,mid,2,0,0,1,low,1,0,1,0,high,0,1,0,0
2004-03-11 19:00:00,6.9,1776.0,27.4,1488.0,383.0,702.0,172.0,2333.0,1704.0,9.7,69.3,0.8319,19,3,3,0,-0.965926,0.258819,0.433884,-0.900969,4.8,1.9,1.1,2.0,3.65,2.0,6.1,1.9,1.4,3.7,20.8,7.3,3.2,9.4,14.666667,7.366667,24.0,6.4,4.4,18.2,281.0,146.0,98.0,103.0,248.5,137.166667,351.0,133.0,109.0,202.0,151.0,112.0,82.0,92.0,140.5,100.625,165.0,110.0,104.0,145.0,3.636332,3.182749,2.378559,2.102453,0.757744,-0.525585,1.356812,2.582782,1.748976,-0.993048,1.181303,-0.486164,1.083583,-0.004969,-0.962879,-0.634869,-1.366509,0.365777,0.619518,-1.2703,2.048875,-0.142776,-0.747536,-0.06787,1.467306,-0.124337,3.031527,-0.14311,-0.521413,1.215711,1.475811,-0.370295,-0.931238,-0.084129,0.787164,-0.662784,1.913463,-0.493677,-0.76787,1.118176,0.234887,-0.456693,-0.702756,-0.677039,0.077805,-0.703262,0.593643,-0.524,-0.647529,-0.171464,0.879118,-0.007571,-0.690858,-0.462726,0.746068,-0.407865,1.197464,-0.05404,-0.191274,0.741047,high,0,1,0,0,high,0,1,0,0,mid,2,0,0,1,low,1,0,1,0,high,0,1,0,0
2004-03-11 20:00:00,6.1,1640.0,24.0,1404.0,351.0,743.0,165.0,2191.0,1654.0,9.6,67.8,0.8133,20,3,3,0,-0.866025,0.5,0.433884,-0.900969,6.9,2.9,2.0,2.2,4.183333,2.1625,3.9,1.4,4.4,6.6,27.4,11.5,8.0,9.0,16.75,7.991667,12.8,4.1,17.9,32.6,383.0,207.0,174.0,131.0,272.5,146.333333,240.0,82.0,307.0,340.0,172.0,128.0,112.0,114.0,146.666667,102.75,136.0,91.0,141.0,170.0,3.031624,2.543952,1.913531,1.781366,0.593709,-0.362755,1.197526,2.164337,1.621022,-1.004599,1.092933,-0.533151,1.228033,-0.004969,-0.962879,-0.634869,-1.225218,0.706834,0.619518,-1.2703,3.636148,0.61297,-0.067197,0.08327,1.968731,0.102867,1.368657,-0.520946,1.745406,3.405899,2.378468,0.203989,-0.27481,-0.138814,1.139277,-0.505941,0.381681,-0.808168,1.078151,3.086392,0.75772,-0.144033,-0.313149,-0.533568,0.217585,-0.637353,0.024674,-0.785372,0.367159,0.535428,1.356991,0.356646,-0.007828,0.038174,0.909546,-0.3338,0.537601,-0.486258,0.650577,1.309617,high,0,1,0,0,high,0,1,0,0,low,1,0,1,0,high,0,1,0,0,high,0,1,0,0
2004-03-11 21:00:00,3.9,1313.0,12.8,1076.0,240.0,957.0,136.0,1707.0,1285.0,9.1,64.0,0.7419,21,3,3,0,-0.707107,0.7071068,0.433884,-0.900969,6.1,2.2,2.2,2.2,4.466667,2.233333,1.5,0.8,1.8,4.4,24.0,8.8,9.5,9.2,17.416667,8.141667,4.7,1.9,22.1,20.1,351.0,184.0,129.0,172.0,281.833333,149.166667,94.0,180.0,180.0,274.0,165.0,126.0,101.0,122.0,148.333333,103.333333,85.0,109.0,109.0,149.0,1.368678,1.00802,0.381672,0.527599,0.024713,0.487142,0.537628,0.738089,0.676718,-1.06235,0.869062,-0.713519,1.372483,-0.004969,-0.962879,-0.634869,-1.000457,0.999705,0.619518,-1.2703,3.031472,0.083948,0.08399,0.08327,2.235113,0.201904,-0.445383,-0.97435,-0.21917,1.744377,1.913463,-0.165194,-0.069676,-0.111472,1.251953,-0.468298,-0.726126,-1.108986,1.652468,1.377871,0.593694,-0.261921,-0.543837,-0.323486,0.271944,-0.616981,-0.723699,-0.283128,-0.283676,0.19735,1.1977,0.311119,-0.258272,0.220319,0.95373,-0.313468,-0.622848,-0.076789,-0.07751,0.832018,high,0,1,0,0,mid,2,0,0,1,low,1,0,1,0,mid,2,0,0,1,high,0,1,0,0
2004-03-11 22:00:00,1.5,965.0,4.7,749.0,94.0,1325.0,85.0,1333.0,821.0,8.2,63.4,0.6905,22,3,3,0,-0.5,0.8660254,0.433884,-0.900969,3.9,2.2,1.7,1.6,4.35,2.229167,1.0,1.8,3.1,3.5,12.8,8.3,6.3,6.5,16.816667,8.066667,2.6,1.1,14.0,14.3,240.0,193.0,112.0,131.0,265.333333,147.625,47.0,21.0,187.0,253.0,136.0,131.0,98.0,116.0,140.666667,102.041667,53.0,32.0,122.0,139.0,-0.445444,-0.626549,-0.726189,-0.722345,-0.723695,1.948646,-0.622883,-0.364012,-0.510699,-1.166303,0.833714,-0.843364,1.516933,-0.004969,-0.962879,-0.634869,-0.707542,1.224434,0.619518,-1.2703,1.368615,0.083948,-0.293976,-0.37015,2.125426,0.196078,-0.823308,-0.218677,0.763118,1.064664,0.381681,-0.233561,-0.507295,-0.480596,1.150544,-0.487119,-1.013335,-1.218375,0.544856,0.585117,0.024728,-0.215791,-0.630986,-0.533568,0.175845,-0.628066,-0.964614,-1.097994,-0.247804,0.089779,0.537781,0.424937,-0.326575,0.08371,0.750487,-0.358488,-1.350972,-1.828409,0.218275,0.604591,mid,2,0,0,1,low,1,0,1,0,mid,2,0,0,1,high,0,1,0,0,high,0,1,0,0


# Data Splitting

In [5]:
train = df_temp[df_temp.index.year == 2004]
test = df_temp[df_temp.index.year == 2005]

predictors = [
    "PT08.S1(CO)",
    "PT08.S2(NMHC)",
    "PT08.S3(NOx)",
    "PT08.S4(NO2)",
    "PT08.S5(O3)",
    "T",
    "RH",
    "AH",
    "hour",
    "weekday",
    "month",
    "CO(GT)_lag_1h",
    "CO(GT)_lag_6h",
    "CO(GT)_lag_12h",
    "CO(GT)_lag_24h",
    # "CO_lag_1",
    # "CO_lag_6",
    # "CO_lag_12",
    # "CO_lag_24",
    "CO(GT)_ma_1h",
    "CO(GT)_ma_6h",
    "CO(GT)_ma_12h",
    "CO(GT)_ma_24h",
    # "CO_roll_mean_1",
    # "CO_roll_mean_6",
    # "CO_roll_mean_12",
    # "CO_roll_mean_24"
]

scaled_predictors = []

for predictor in predictors:
    scaled_predictors.append(predictor + "_scaled")

# Classification

In [6]:
random_state = 23

def predict_CO_class_using_decision_tree(params, train_df, test_df, h):
    classifier = DecisionTreeClassifier(
        max_depth = params["max_depth"],
        min_samples_leaf = params["min_samples_leaf"],
        max_features = params["max_features"],
        random_state = random_state
    )
    classifier.fit(train_df[scaled_predictors], train_df[f"CO_fut_{h}h_class_l_encoded"])
    return classifier.predict(test_df[scaled_predictors])

def predict_CO_class_using_support_vector_machine(params, train_df, test_df, h):
    classifier = SVC(
        kernel = "rbf",
        C = params["C"],
        gamma = params["gamma"],
        random_state = random_state
    )
    classifier.fit(train_df[scaled_predictors], train_df[f"CO_fut_{h}h_class_l_encoded"])
    return classifier.predict(test_df[scaled_predictors])

def predict_CO_class_using_logistic_regression(params, train_df, test_df, h):
    low_classifier = LogisticRegression(
        penalty = "l2",
        C = params["C"],
        solver = "lbfgs",
        max_iter = 1000,
        random_state = random_state
    )
    low_classifier.fit(train_df[scaled_predictors], train_df[f"CO_fut_{h}h_low"])

    mid_classifier = LogisticRegression(
        penalty = "l2",
        C = params["C"],
        solver = "lbfgs",
        max_iter = 1000,
        random_state = random_state
    )
    mid_classifier.fit(train_df[scaled_predictors], train_df[f"CO_fut_{h}h_mid"])

    high_classifier = LogisticRegression(
        penalty = "l2",
        C = params["C"],
        solver = "lbfgs",
        max_iter = 1000,
        random_state = random_state
    )
    high_classifier.fit(train_df[scaled_predictors], train_df[f"CO_fut_{h}h_high"])

    prob_matrix = np.zeros((len(test_df), 3))
    CO_class_l_encoded = CO_class_le.transform(["low", "mid", "high"])
    prob_matrix[:, CO_class_l_encoded[0]] = low_classifier.predict_proba(test_df[scaled_predictors])[:, 1]
    prob_matrix[:, CO_class_l_encoded[1]] = mid_classifier.predict_proba(test_df[scaled_predictors])[:, 1]
    prob_matrix[:, CO_class_l_encoded[2]] = high_classifier.predict_proba(test_df[scaled_predictors])[:, 1]

    return np.argmax(prob_matrix, axis = 1)

def predict_CO_class_using_naive_baseline(params, train_df, test_df, h):
    return test_df["CO_class_l_encoded"]

predictor_by_clf_name = {
    "Decision Tree": predict_CO_class_using_decision_tree,
    "Support Vector Machine": predict_CO_class_using_support_vector_machine,
    "Logistic Regression": predict_CO_class_using_logistic_regression,
    "Naive Baseline": predict_CO_class_using_naive_baseline
}

params_grids_by_clf_name = {
    "Decision Tree": {
        "max_depth": [1, 3, 5, 7, 9, 11, 13, 15, None],
        "min_samples_leaf": [1, 5, 10, 15, 20, 25],
        "max_features": [None, "sqrt", "log2"],
    },
    "Support Vector Machine": {
        "C": [0.1, 1, 10, 100, 1000],
        "gamma": ["scale", 0.1, 0.01, 0.001, 0.0001],
    },
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10, 100, 1000]
    },
    "Naive Baseline": {}
    
}

results = pd.DataFrame({
    "Horizon": horizons,
    "Decision Tree accuracy": [0] * len(horizons),
    "Support Vector Machine accuracy": [0] * len(horizons),
    "Logistic Regression accuracy": [0] * len(horizons),
    "Naive Baseline accuracy": [0] * len(horizons)
})

for h in horizons:
    tuning_train_ratio = 0.8
    tuning_train_test_split = int(len(train) * tuning_train_ratio)
    train_train = train.iloc[:tuning_train_test_split]
    train_test = train.iloc[tuning_train_test_split:]
    y_train_test = train_test[f"CO_fut_{h}h_class_l_encoded"]

    eff_test = test.iloc[:(-1 * h)]
    y_test = eff_test[f"CO_fut_{h}h_class_l_encoded"]

    for clf_name, predict_CO_class in predictor_by_clf_name.items():
        # Hyperparameters tuning
        params_grids = params_grids_by_clf_name[clf_name]
        param_keys = list(params_grids.keys())
        if len(param_keys) > 0:
            params_list = [params_grids[param_keys[0]]]
        else:
            param_list = []
        for i in range(1, len(param_keys)):
            result = []
            next_list = params_grids[param_keys[i]]
            for i in range(0, len(params_list)):
                for j in range(0, len(next_list)):
                    if type(params_list[i]) != list:         
                        params_list[i] = [params_list[i]]
                    temp = [num for num in params_list[i]]
                    temp.append(next_list[j])
                    result.append(temp)
            params_list = result
        params_list = [dict(zip(param_keys, row_list)) for row_list in params_list]
        
        best_params = None
        best_tuning_acc = -np.inf

        print(f"Tuning hyperparameters of {clf_name} for horizon {h}...")
        for curr_params in params_list:
            print("curr_params:", curr_params)
            y_pred_train_test = predict_CO_class(curr_params, train_train, train_test, h)
            curr_tuning_acc = accuracy_score(y_train_test,y_pred_train_test)
            print(clf_name, "with params:", curr_params, f"=> accuracy = {curr_tuning_acc:.4f}")

            if curr_tuning_acc > best_tuning_acc:
                best_tuning_acc = curr_tuning_acc
                best_params = curr_params

        print(f"For horizon: {h}, best params on validation:", best_params, f"with accuracy: {best_tuning_acc:.4f}")
        
        # Train and test the model with the tuned hyperparameters
        print(f"{clf_name} model training for horizon: {h}...")
        y_pred_test = predict_CO_class(best_params, train, eff_test, h)
        test_acc = accuracy_score(y_test, y_pred_test)
        
        print(f"Finished using {clf_name} model for horizon: {h}! Accuracy: {test_acc:.4f}\n\n")

        results.loc[results["Horizon"] == h, clf_name + " accuracy"] = test_acc
        
results

Tuning hyperparameters of Decision Tree for horizon 1...
curr_params: {'max_depth': 1, 'min_samples_leaf': 3, 'max_features': 5}


KeyError: "['CO(GT)_ma_1h_scaled', 'CO(GT)_ma_12h_scaled'] not in index"

# Accuracy Plot

In [None]:
ax = results.plot(kind = "line", x = "Horizon", figsize=(8, 5))
ax.set_xlabel('Horizon (t + x hours)')
ax.set_ylabel('Accuracy')
ax.set_title('Classification Accuracy by Horizon')
plt.savefig("classification-accuracy.png")
plt.show()
plt.close()

# Export Report

In [None]:
print("\n=== Exporting Data ===")

# Export to CSV
try:
    csv_filename = "co_class_results.csv"
    results.to_csv(csv_filename, index = False)
    print(f"Successfully saved results to '{csv_filename}'")
except Exception as e:
    print(f"Error saving CSV: {e}")

# Export to Excel
try:
    xlsx_filename = "co_class_results.xlsx"
    results.to_excel(xlsx_filename, index = False)
    print(f"Successfully saved results to '{xlsx_filename}'")
except ImportError:
    print(f"Could not save to Excel ({xlsx_filename}). Missing dependency.\nPlease run: pip install openpyxl")
except Exception as e:
    print(f"Error saving Excel: {e}")