In [None]:
# !pip install --pre pycaret[full]
# !pip install packaging==21.3

In [None]:
# !pip install --upgrade packaging
# !pip install --upgrade pycaret

In [None]:
import pandas as pd
import numpy as np
import warnings
from pycaret.time_series import *
from sktime.forecasting.model_selection import SlidingWindowSplitter

warnings.filterwarnings('ignore')

In [None]:
df1 = pd.read_csv("./after_clean/cleaned_Songkhla.csv")
df2 = pd.read_csv("./after_clean/cleaned_Mueang_Yala.csv")
df3 = pd.read_csv("./after_clean/cleaned_Betong_Yala.csv")
df4 = pd.read_csv("./after_clean/cleaned_Nakhon_Si_Thammarat.csv")
df5 = pd.read_csv("./after_clean/cleaned_Narathiwat.csv")
df6 = pd.read_csv("./after_clean/cleaned_Phuket.csv")
df7 = pd.read_csv("./after_clean/cleaned_Satun.csv")
df8 = pd.read_csv("./after_clean/cleaned_Surat_Thani.csv")
df9 = pd.read_csv("./after_clean/cleaned_Trang.csv")

df_list = [df1, df2, df3, df4, df5, df6, df7, df8, df9]

In [None]:
province_list = ["Songkhla","Mueang_Yala","Betong_Yala",
            "Nakhon_Si_Thammarat","Narathiwat","Phuket",
            "Satun","Surat_Thani","Trang"]
variable_list = ['TEMP','RH']

In [None]:
for i, df in enumerate(df_list):
    df["DATETIMEDATA"] = pd.to_datetime(df["DATE"] + ' ' + df["TIME"], format="%Y-%m-%d %H:%M:%S")
    df["DATETIMEDATA"] = pd.to_datetime(df["DATETIMEDATA"], format="%Y-%m-%d")
    df.sort_values("DATETIMEDATA", inplace=True)

for var in variable_list:
    if 'DATETIMEDATA' not in df.columns or var not in df.columns:
        print(f"Warning: Required columns missing in DataFrame {i+1} for variable {var}")
        continue
    df1 = df[['DATETIMEDATA', var]]
    df1 = df1.set_index('DATETIMEDATA')

    exp = TSForecastingExperiment()
    exp.setup(data=df1, target=var, coverage=0.90, fold_strategy=SlidingWindowSplitter(fh=np.arange(1,25), window_length=130, step_length=130))
    exp.compare_models(sort='rmse')


In [None]:
best_models = ['dt_cds_dt','huber_cds_dt']

In [None]:
for variable in variable_list:
    for  df, province in zip(df_list, province_list):
        for i in best_models :
            if "DATETIMEDATA" in df.columns and variable in df.columns:
                df = df[['DATETIMEDATA', variable]]
                df.set_index('DATETIMEDATA', inplace=True)
                
                exp = TSForecastingExperiment()
                exp.setup(data=df, target=variable, coverage=0.90,
                        fold_strategy=SlidingWindowSplitter(fh=np.arange(1, 25),
                                                            window_length=130,
                                                            step_length=130))

                model = exp.create_model(i)
                tuned_model = exp.tune_model(model)
                holdout_pred = exp.predict_model(tuned_model)
                final_model = exp.finalize_model(tuned_model)
                
                unseen_predictions = exp.predict_model(final_model, fh=168)
                unseen_predictions = unseen_predictions.rename(columns={'y_pred': variable})
                DATETIMEDATA = pd.date_range(start='2024-03-13', end='2024-03-19 23:00:00', freq='H')
                unseen_predictions['DATETIMEDATA'] = DATETIMEDATA

                unseen_predictions['DATE'] = unseen_predictions['DATETIMEDATA'].dt.date
                unseen_predictions['TIME'] = unseen_predictions['DATETIMEDATA'].dt.time

                unseen_predictions.drop(columns=['DATETIMEDATA'], inplace=True)

                cols = list(unseen_predictions.columns)
                cols.remove(variable) 
                cols.insert(2, variable)  
                unseen_predictions = unseen_predictions[cols]
                
                unseen_predictions.to_csv(f'./{variable}_predictions/{variable}_{province}.csv', index=False)
            else:
                print(f"Required columns not found in {province} DataFrame.")