# Random Search

In [76]:
#Import the libraries
import numpy as np
import pandas as pd

import pickle

import random
import warnings
warnings.filterwarnings("ignore")

import itertools
from tqdm import tqdm

In [77]:
train_data = pickle.load(open('clean_dataset_2022/train_set.bin', 'rb'))

test_data = pickle.load(open('clean_dataset_2022/test_set.bin', 'rb'))

In [106]:
province = ['BKK','CNX','KKC','RAY','SARA','SURAT']
exog_columns = ['Temp', 'WindSpeed', 'WindDir']

In [79]:
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

from matplotlib.pyplot import figure
import matplotlib.pyplot as plt

In [80]:
from importlib import reload

from custom_function import minimalSARIMAX

reload(minimalSARIMAX)

from custom_function.minimalSARIMAX import MinimalSARIMAX

In [81]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))

In [82]:
P = Q = D = range(0, 2) ; S = 1461
pdqs = [(x[0], x[1], x[2], S) for x in list(itertools.product(P, D, Q))]

In [83]:
# Define function
def sarimax_randomsearch(y_train, y_test, pdq, PDQs, y_val = None, x_train = None, x_test = None, x_val = None, model_exog=None, verbose=0, n_rand=10):
    '''
    Input: 
        y_train: training data
        y_test: test data
        pdq : ARIMA combinations 
        pdqs : seasonal ARIMA combinations 
        x_train: exogenous training data
        x_test: exogenous test data

    Return:
        Returns dataframe of parameter combinations with the least RMSE
    '''

    ans_df = pd.DataFrame(columns=['pdq', 'pdqs', 'rmse'])

    save_comb = set()
    
    i = 0
    while i!=n_rand:
        comb = random.sample(pdq, 1)[0]
        combs = random.sample(PDQs, 1)[0]

        if (comb+combs) in save_comb:
            continue
        
        save_comb.add(comb+combs)
        
        p, d, q = comb[0], comb[1], comb[2]
        P, D, Q = combs[0], combs[1], combs[2]
        if (d <= 1) and (D <= 1) and (P <= 1) and (Q <= 1):  
            model = MinimalSARIMAX(y_train, comb, combs, exog=x_train)
            model.fit(lr=1e-6, lr_decay=0.999, verbose=0) 

            if (y_val is None):
                y_pred, err = model.predict(y_test, y_exog=x_test, verbose=verbose)
                rmse = model.scoring(y_pred, y_test)

            else:
                Result = model.predict_step(y_val, y_test, val_X_exog=x_val, y_exog=x_test,
                                            model_exog=model_exog, lr=np.array([1e-6, 1e-6, 1e-6, 5e-7]), lr_decay=0.999875,
                                            learn=True, verbose=verbose, verbose_rmse=0)

                _, y_pred_sav, _ = Result
                
                rmse = model.scoring(y_pred_sav.iloc[:,[1]], y_pred_sav.iloc[:,[2]])
            
            print(f"ITER#{i} {comb} {combs} {rmse}") ; i=i+1
            
            ans_df = ans_df.append({'pdq':comb, 'pdqs':combs, 'rmse':rmse}, ignore_index=True)
        else: continue

    # Sort and return a combination with the lowest RMSE
    ans_df = ans_df.sort_values(by=['rmse'],ascending=True)
    
    return ans_df

In [107]:
ratio = 0.7

pm_train_bkk = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_bkk = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_bkk = test_data[province[0]][['PM25']]
exo_train_bkk = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_train_bkk['WindDir'])*10 + 10)
exo_train_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_train_bkk['WindDir'])*10 + 10)
exo_valid_bkk = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_bkk['WindDir'])*10 + 10)
exo_valid_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_bkk['WindDir'])*10 + 10)
exo_test_bkk = test_data[province[0]][exog_columns]
exo_test_bkk['WindDir'] = pd.to_numeric(exo_test_bkk['WindDir'], errors='coerce')
exo_test_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_test_bkk['WindDir'])*10 + 10)
exo_test_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_test_bkk['WindDir'])*10 + 10)


pm_train_cnx = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_cnx = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_cnx = test_data[province[0]][['PM25']]
exo_train_cnx = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_train_cnx['WindDir'])*10 + 10)
exo_train_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_train_cnx['WindDir'])*10 + 10)
exo_valid_cnx = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_cnx['WindDir'])*10 + 10)
exo_valid_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_cnx['WindDir'])*10 + 10)
exo_test_cnx = test_data[province[0]][exog_columns]
exo_test_cnx['WindDir'] = pd.to_numeric(exo_test_cnx['WindDir'], errors='coerce')
exo_test_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_test_cnx['WindDir'])*10 + 10)
exo_test_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_test_cnx['WindDir'])*10 + 10)


pm_train_kkc = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_kkc = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_kkc = test_data[province[0]][['PM25']]
exo_train_kkc = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_train_kkc['WindDir'])*10 + 10)
exo_train_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_train_kkc['WindDir'])*10 + 10)
exo_valid_kkc = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_kkc['WindDir'])*10 + 10)
exo_valid_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_kkc['WindDir'])*10 + 10)
exo_test_kkc = test_data[province[0]][exog_columns]
exo_test_kkc['WindDir'] = pd.to_numeric(exo_test_kkc['WindDir'], errors='coerce')
exo_test_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_test_kkc['WindDir'])*10 + 10)
exo_test_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_test_kkc['WindDir'])*10 + 10)


pm_train_ray = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_ray = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_ray = test_data[province[0]][['PM25']]
exo_train_ray = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_train_ray['WindDir'])*10 + 10)
exo_train_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_train_ray['WindDir'])*10 + 10)
exo_valid_ray = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_ray['WindDir'])*10 + 10)
exo_valid_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_ray['WindDir'])*10 + 10)
exo_test_ray = test_data[province[0]][exog_columns]
exo_test_ray['WindDir'] = pd.to_numeric(exo_test_ray['WindDir'], errors='coerce')
exo_test_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_test_ray['WindDir'])*10 + 10)
exo_test_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_test_ray['WindDir'])*10 + 10)


pm_train_sara = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_sara = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_sara = test_data[province[0]][['PM25']]
exo_train_sara = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_train_sara['WindDir'])*10 + 10)
exo_train_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_train_sara['WindDir'])*10 + 10)
exo_valid_sara = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_sara['WindDir'])*10 + 10)
exo_valid_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_sara['WindDir'])*10 + 10)
exo_test_sara = test_data[province[0]][exog_columns]
exo_test_sara['WindDir'] = pd.to_numeric(exo_test_sara['WindDir'], errors='coerce')
exo_test_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_test_sara['WindDir'])*10 + 10)
exo_test_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_test_sara['WindDir'])*10 + 10)


pm_train_surat = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_surat = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_surat = test_data[province[0]][['PM25']]
exo_train_surat = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_train_surat['WindDir'])*10 + 10)
exo_train_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_train_surat['WindDir'])*10 + 10)
exo_valid_surat = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_surat['WindDir'])*10 + 10)
exo_valid_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_surat['WindDir'])*10 + 10)
exo_test_surat = test_data[province[0]][exog_columns]
exo_test_surat['WindDir'] = pd.to_numeric(exo_test_surat['WindDir'], errors='coerce')
exo_test_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_test_surat['WindDir'])*10 + 10)
exo_test_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_test_surat['WindDir'])*10 + 10)

## Tuning parameters for temperature

In [96]:
temp = exog_columns[0]

temp_train_bkk = pd.DataFrame(exo_train_bkk[temp])
temp_valid_bkk = pd.DataFrame(exo_valid_bkk[temp])
temp_test_bkk = pd.DataFrame(exo_test_bkk[temp])

temp_train_cnx = pd.DataFrame(exo_train_cnx[temp])
temp_valid_cnx = pd.DataFrame(exo_valid_cnx[temp])
temp_test_cnx = pd.DataFrame(exo_test_cnx[temp])

temp_train_kkc = pd.DataFrame(exo_train_kkc[temp])
temp_valid_kkc = pd.DataFrame(exo_valid_kkc[temp])
temp_test_kkc = pd.DataFrame(exo_test_kkc[temp])

temp_train_ray = pd.DataFrame(exo_train_ray[temp])
temp_valid_ray = pd.DataFrame(exo_valid_ray[temp])
temp_test_ray = pd.DataFrame(exo_test_ray[temp])

temp_train_sara = pd.DataFrame(exo_train_sara[temp])
temp_valid_sara = pd.DataFrame(exo_valid_sara[temp])
temp_test_sara = pd.DataFrame(exo_test_sara[temp])

temp_train_surat = pd.DataFrame(exo_train_surat[temp])
temp_valid_surat = pd.DataFrame(exo_valid_surat[temp])
temp_test_surat = pd.DataFrame(exo_test_surat[temp])

In [None]:
gSearch_temp_bkk = sarimax_randomsearch(temp_train_bkk, temp_test_bkk, pdq, pdqs, y_val=temp_valid_bkk, n_rand=5, verbose=1)
display(gSearch_temp_bkk)

In [None]:
gSearch_temp_cnx = sarimax_randomsearch(temp_train_cnx, temp_test_cnx, pdq, pdqs, y_val=temp_valid_cnx, n_rand=5, verbose=1)
display(gSearch_temp_cnx)

In [None]:
gSearch_temp_kkc = sarimax_randomsearch(temp_train_kkc, temp_test_kkc, pdq, pdqs, y_val=temp_valid_kkc, n_rand=5, verbose=1)
display(gSearch_temp_kkc)

In [23]:
gSearch_temp_ray = sarimax_randomsearch(temp_train_ray, temp_test_ray, pdq, pdqs, y_val=temp_valid_ray, n_rand=5, verbose=1)
display(gSearch_temp_ray)

100%|██████████| 16675/16675 [18:46<00:00, 14.80it/s]


ITER#0 (1, 0, 0) (0, 0, 0, 1461) 15.992090812440019


100%|██████████| 16675/16675 [18:15<00:00, 15.22it/s]


ITER#1 (2, 0, 0) (1, 1, 1, 1461) 1.901751181284816


100%|██████████| 16675/16675 [18:12<00:00, 15.26it/s]


ITER#2 (2, 0, 1) (1, 1, 1, 1461) 2.018218655203814


100%|██████████| 16675/16675 [18:32<00:00, 14.98it/s]


ITER#3 (2, 1, 0) (0, 0, 1, 1461) 1.8989755763806049


100%|██████████| 16675/16675 [18:13<00:00, 15.25it/s]

ITER#4 (2, 1, 2) (1, 1, 1, 1461) 2.2089383756091827





Unnamed: 0,pdq,pdqs,rmse
3,"(2, 1, 0)","(0, 0, 1, 1461)",1.898976
1,"(2, 0, 0)","(1, 1, 1, 1461)",1.901751
2,"(2, 0, 1)","(1, 1, 1, 1461)",2.018219
4,"(2, 1, 2)","(1, 1, 1, 1461)",2.208938
0,"(1, 0, 0)","(0, 0, 0, 1461)",15.992091


In [24]:
gSearch_temp_sara = sarimax_randomsearch(temp_train_sara, temp_test_sara, pdq, pdqs, y_val=temp_valid_sara, n_rand=5, verbose=1)
display(gSearch_temp_sara)

100%|██████████| 16675/16675 [19:29<00:00, 14.26it/s]


ITER#0 (2, 1, 2) (1, 0, 1, 1461) 4.372241089480689


100%|██████████| 16675/16675 [20:14<00:00, 13.73it/s]


ITER#1 (2, 0, 1) (0, 0, 0, 1461) 4.610449766166291


100%|██████████| 16675/16675 [20:34<00:00, 13.50it/s]


ITER#2 (0, 0, 2) (0, 0, 0, 1461) 24.94109239910067


100%|██████████| 16675/16675 [20:28<00:00, 13.57it/s]


ITER#3 (2, 0, 2) (0, 0, 1, 1461) 4.938988995499015


100%|██████████| 16675/16675 [20:18<00:00, 13.69it/s]

ITER#4 (2, 1, 1) (0, 0, 0, 1461) 4.644085648835342





Unnamed: 0,pdq,pdqs,rmse
0,"(2, 1, 2)","(1, 0, 1, 1461)",4.372241
1,"(2, 0, 1)","(0, 0, 0, 1461)",4.61045
4,"(2, 1, 1)","(0, 0, 0, 1461)",4.644086
3,"(2, 0, 2)","(0, 0, 1, 1461)",4.938989
2,"(0, 0, 2)","(0, 0, 0, 1461)",24.941092


In [25]:
gSearch_temp_surat = sarimax_randomsearch(temp_train_surat, temp_test_surat, pdq, pdqs, y_val=temp_valid_surat, n_rand=5, verbose=1)
display(gSearch_temp_surat)

100%|██████████| 13384/13384 [15:16<00:00, 14.60it/s]


ITER#0 (2, 0, 2) (0, 0, 0, 1461) 6.5026777090149706


100%|██████████| 13384/13384 [14:22<00:00, 15.52it/s]


ITER#1 (1, 0, 1) (1, 0, 1, 1461) 16.33677280548023


100%|██████████| 13384/13384 [15:59<00:00, 13.95it/s]


ITER#2 (0, 1, 1) (1, 0, 0, 1461) 22.183957373613918


100%|██████████| 13384/13384 [14:26<00:00, 15.45it/s]


ITER#3 (0, 0, 0) (1, 1, 1, 1461) 8.441664080871561


100%|██████████| 13384/13384 [15:03<00:00, 14.82it/s]

ITER#4 (0, 0, 1) (1, 1, 0, 1461) 9.388591396304008





Unnamed: 0,pdq,pdqs,rmse
0,"(2, 0, 2)","(0, 0, 0, 1461)",6.502678
3,"(0, 0, 0)","(1, 1, 1, 1461)",8.441664
4,"(0, 0, 1)","(1, 1, 0, 1461)",9.388591
1,"(1, 0, 1)","(1, 0, 1, 1461)",16.336773
2,"(0, 1, 1)","(1, 0, 0, 1461)",22.183957


In [None]:
# with open('answer_dataset/gSearch_temp_bkk_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_bkk, f)

# with open('answer_dataset/gSearch_temp_cnx_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_cnx, f)
    
# with open('answer_dataset/gSearch_temp_ray_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_bkk, f)

# with open('answer_dataset/gSearch_temp_sara_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_sara, f)

# with open('answer_dataset/gSearch_temp_surat_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_surat, f)

In [None]:
# temp_order = {province[0]: gSearch_temp_bkk.pdq, province[1]: gSearch_temp_cnx.pdq, province[2]: gSearch_temp_kkc.pdq, province[3]: gSearch_temp_ray.pdq, province[4]: gSearch_temp_sara.pdq, province[5]: gSearch_temp_surat.pdq}
# temp_seasonal_order = {province[0]: gSearch_temp_bkk.pdqs, province[1]: gSearch_temp_cnx.pdqs, province[2]: gSearch_temp_kkc.pdqs, province[3]: gSearch_temp_ray.pdqs, province[4]: gSearch_temp_sara.pdqs, province[5]: gSearch_temp_surat.pdqs}
# print(temp_order)
# print(temp_seasonal_order)

## Tuning parameters for windspeed

In [97]:
windSpeed = exog_columns[1]
windSpeed_train_bkk = pd.DataFrame(exo_train_bkk[windSpeed])
windSpeed_valid_bkk = pd.DataFrame(exo_valid_bkk[windSpeed])
windSpeed_test_bkk = pd.DataFrame(exo_test_bkk[windSpeed])

windSpeed_train_cnx = pd.DataFrame(exo_train_cnx[windSpeed])
windSpeed_valid_cnx = pd.DataFrame(exo_valid_cnx[windSpeed])
windSpeed_test_cnx = pd.DataFrame(exo_test_cnx[windSpeed])

windSpeed_train_kkc = pd.DataFrame(exo_train_kkc[windSpeed])
windSpeed_valid_kkc = pd.DataFrame(exo_valid_kkc[windSpeed])
windSpeed_test_kkc = pd.DataFrame(exo_test_kkc[windSpeed])

windSpeed_train_ray = pd.DataFrame(exo_train_ray[windSpeed])
windSpeed_valid_ray = pd.DataFrame(exo_valid_ray[windSpeed])
windSpeed_test_ray = pd.DataFrame(exo_test_ray[windSpeed])

windSpeed_train_sara = pd.DataFrame(exo_train_sara[windSpeed])
windSpeed_valid_sara = pd.DataFrame(exo_valid_sara[windSpeed])
windSpeed_test_sara = pd.DataFrame(exo_test_sara[windSpeed])

windSpeed_train_surat = pd.DataFrame(exo_train_surat[windSpeed])
windSpeed_valid_surat = pd.DataFrame(exo_valid_surat[windSpeed])
windSpeed_test_surat = pd.DataFrame(exo_test_surat[windSpeed])

In [None]:
gSearch_windSpeed_bkk = sarimax_randomsearch(windSpeed_train_bkk, windSpeed_test_bkk, pdq, pdqs, y_val=windSpeed_valid_bkk, n_rand=3, verbose=1)
display(gSearch_windSpeed_bkk)

In [None]:
gSearch_windSpeed_cnx = sarimax_randomsearch(windSpeed_train_cnx, windSpeed_test_cnx, pdq, pdqs, y_val=windSpeed_valid_cnx, n_rand=3, verbose=1)
display(gSearch_windSpeed_cnx)

In [None]:
gSearch_windSpeed_kkc = sarimax_randomsearch(windSpeed_train_kkc, windSpeed_test_kkc, pdq, pdqs, y_val=windSpeed_valid_kkc, n_rand=3, verbose=1)
display(gSearch_windSpeed_kkc)

In [28]:
gSearch_windSpeed_ray = sarimax_randomsearch(windSpeed_train_ray, windSpeed_test_ray, pdq, pdqs, y_val=windSpeed_valid_ray, n_rand=3, verbose=1)
display(gSearch_windSpeed_ray)

100%|██████████| 16675/16675 [19:46<00:00, 14.06it/s]


ITER#0 (2, 1, 2) (0, 1, 1, 1461) 8.137118239621214


100%|██████████| 16675/16675 [20:01<00:00, 13.88it/s]


ITER#1 (0, 1, 2) (1, 0, 1, 1461) 16.790215289666115


100%|██████████| 16675/16675 [19:16<00:00, 14.42it/s]

ITER#2 (2, 0, 2) (0, 0, 1, 1461) 6.276570302253418





Unnamed: 0,pdq,pdqs,rmse
2,"(2, 0, 2)","(0, 0, 1, 1461)",6.27657
0,"(2, 1, 2)","(0, 1, 1, 1461)",8.137118
1,"(0, 1, 2)","(1, 0, 1, 1461)",16.790215


In [29]:
gSearch_windSpeed_sara = sarimax_randomsearch(windSpeed_train_sara, windSpeed_test_sara, pdq, pdqs, y_val=windSpeed_valid_sara, n_rand=3, verbose=1)
display(gSearch_windSpeed_sara)

100%|██████████| 16675/16675 [19:52<00:00, 13.99it/s]


ITER#0 (1, 0, 1) (1, 1, 1, 1461) 12.681074808280666


100%|██████████| 16675/16675 [19:49<00:00, 14.02it/s]


ITER#1 (0, 1, 1) (1, 0, 1, 1461) 19.872739739631086


100%|██████████| 16675/16675 [20:01<00:00, 13.88it/s]

ITER#2 (1, 1, 1) (1, 0, 0, 1461) 12.362628555045351





Unnamed: 0,pdq,pdqs,rmse
2,"(1, 1, 1)","(1, 0, 0, 1461)",12.362629
0,"(1, 0, 1)","(1, 1, 1, 1461)",12.681075
1,"(0, 1, 1)","(1, 0, 1, 1461)",19.87274


In [30]:
gSearch_windSpeed_surat = sarimax_randomsearch(windSpeed_train_surat, windSpeed_test_surat, pdq, pdqs, y_val=windSpeed_valid_surat, n_rand=3, verbose=1)
display(gSearch_windSpeed_surat)

100%|██████████| 13384/13384 [15:07<00:00, 14.75it/s]


ITER#0 (2, 0, 1) (1, 1, 1, 1461) 12.259352841598002


100%|██████████| 13384/13384 [15:37<00:00, 14.27it/s]


ITER#1 (2, 0, 0) (0, 1, 1, 1461) 13.005182117202928


100%|██████████| 13384/13384 [16:26<00:00, 13.57it/s]

ITER#2 (2, 1, 0) (1, 0, 1, 1461) 8.643798578549305





Unnamed: 0,pdq,pdqs,rmse
2,"(2, 1, 0)","(1, 0, 1, 1461)",8.643799
0,"(2, 0, 1)","(1, 1, 1, 1461)",12.259353
1,"(2, 0, 0)","(0, 1, 1, 1461)",13.005182


In [None]:
windSpeed_order = {province[0]: gSearch_windSpeed_bkk.pdq, province[1]: gSearch_windSpeed_cnx.pdq, province[2]: gSearch_windSpeed_kkc.pdq, province[3]: gSearch_windSpeed_ray.pdq, province[4]: gSearch_windSpeed_sara.pdq, province[5]: gSearch_windSpeed_surat.pdq}
windSpeed_seasonal_order = {province[0]: gSearch_windSpeed_bkk.pdqs, province[1]: gSearch_windSpeed_cnx.pdqs, province[2]: gSearch_windSpeed_kkc.pdqs, province[3]: gSearch_windSpeed_ray.pdqs, province[4]: gSearch_windSpeed_sara.pdqs, province[5]: gSearch_windSpeed_surat.pdqs}
print(windSpeed_order)
print(windSpeed_seasonal_order)

## Tuning parameters for wind direction

In [93]:
windDir = exog_columns[2]

windDirSin_train_bkk = pd.DataFrame(np.sin(exo_train_bkk[windDir])*10 + 10)
windDirSin_valid_bkk = pd.DataFrame(np.sin(exo_valid_bkk[windDir])*10 + 10)
windDirSin_test_bkk = pd.DataFrame(np.sin(exo_test_bkk[windDir])*10 + 10)
windDirCos_train_bkk = pd.DataFrame(np.cos(exo_train_bkk[windDir])*10 + 10)
windDirCos_valid_bkk = pd.DataFrame(np.sin(exo_valid_bkk[windDir])*10 + 10)
windDirCos_test_bkk = pd.DataFrame(np.cos(exo_test_bkk[windDir])*10 + 10)

windDirSin_train_cnx = pd.DataFrame(np.sin(exo_train_cnx[windDir])*10 + 10)
windDirSin_valid_cnx = pd.DataFrame(np.sin(exo_valid_cnx[windDir])*10 + 10)
windDirSin_test_cnx = pd.DataFrame(np.sin(exo_test_cnx[windDir])*10 + 10)
windDirCos_train_cnx = pd.DataFrame(np.cos(exo_train_cnx[windDir])*10 + 10)
windDirCos_valid_cnx = pd.DataFrame(np.sin(exo_valid_cnx[windDir])*10 + 10)
windDirCos_test_cnx = pd.DataFrame(np.cos(exo_test_cnx[windDir])*10 + 10)

windDirSin_train_kkc = pd.DataFrame(np.sin(exo_train_kkc[windDir])*10 + 10)
windDirSin_valid_kkc = pd.DataFrame(np.sin(exo_valid_kkc[windDir])*10 + 10)
windDirSin_test_kkc = pd.DataFrame(np.sin(exo_test_kkc[windDir])*10 + 10)
windDirCos_train_kkc = pd.DataFrame(np.cos(exo_train_kkc[windDir])*10 + 10)
windDirCos_valid_kkc = pd.DataFrame(np.sin(exo_valid_kkc[windDir])*10 + 10)
windDirCos_test_kkc = pd.DataFrame(np.cos(exo_test_kkc[windDir])*10 + 10)

windDirSin_train_ray = pd.DataFrame(np.sin(exo_train_ray[windDir])*10 + 10)
windDirSin_valid_ray = pd.DataFrame(np.sin(exo_valid_ray[windDir])*10 + 10)
windDirSin_test_ray = pd.DataFrame(np.sin(exo_test_ray[windDir])*10 + 10)
windDirCos_train_ray = pd.DataFrame(np.cos(exo_train_ray[windDir])*10 + 10)
windDirCos_valid_ray = pd.DataFrame(np.sin(exo_valid_ray[windDir])*10 + 10)
windDirCos_test_ray = pd.DataFrame(np.cos(exo_test_ray[windDir])*10 + 10)

windDirSin_train_sara = pd.DataFrame(np.sin(exo_train_sara[windDir])*10 + 10)
windDirSin_valid_sara = pd.DataFrame(np.sin(exo_valid_sara[windDir])*10 + 10)
windDirSin_test_sara = pd.DataFrame(np.sin(exo_test_sara[windDir])*10 + 10)
windDirCos_train_sara = pd.DataFrame(np.cos(exo_train_sara[windDir])*10 + 10)
windDirCos_valid_sara = pd.DataFrame(np.sin(exo_valid_sara[windDir])*10 + 10)
windDirCos_test_sara = pd.DataFrame(np.cos(exo_test_sara[windDir])*10 + 10)

windDirSin_train_surat = pd.DataFrame(np.sin(exo_train_surat[windDir])*10 + 10)
windDirSin_valid_surat = pd.DataFrame(np.sin(exo_valid_surat[windDir])*10 + 10)
windDirSin_test_surat = pd.DataFrame(np.sin(exo_test_surat[windDir])*10 + 10)
windDirCos_train_surat = pd.DataFrame(np.cos(exo_train_surat[windDir])*10 + 10)
windDirCos_valid_surat = pd.DataFrame(np.sin(exo_valid_surat[windDir])*10 + 10)
windDirCos_test_surat = pd.DataFrame(np.cos(exo_test_surat[windDir])*10 + 10)

In [None]:
gSearch_windDirSin_bkk = sarimax_randomsearch(windDirSin_train_bkk, windDirSin_test_bkk, pdq, pdqs, y_val=windDirSin_valid_bkk, n_rand=3, verbose=1)
display(gSearch_windDirSin_bkk)

In [None]:
gSearch_windDirSin_cnx = sarimax_randomsearch(windDirSin_train_cnx, windDirSin_test_cnx, pdq, pdqs, y_val=windDirSin_valid_cnx, n_rand=5, verbose=1)
display(gSearch_windDirSin_cnx)

In [None]:
gSearch_windDirSin_kkc = sarimax_randomsearch(windDirSin_train_kkc, windDirSin_test_kkc, pdq, pdqs, y_val=windDirSin_valid_kkc, n_rand=5, verbose=1)
display(gSearch_windDirSin_kkc)

In [11]:
gSearch_windDirSin_ray = sarimax_randomsearch(windDirSin_train_ray, windDirSin_test_ray, pdq, pdqs, y_val=windDirSin_valid_ray, n_rand=3, verbose=1)
display(gSearch_windDirSin_ray)

100%|██████████| 16675/16675 [20:52<00:00, 13.31it/s]


ITER#0 (0, 1, 2) (1, 0, 0, 1461) 12.126218929802084


100%|██████████| 16675/16675 [19:32<00:00, 14.22it/s]


ITER#1 (2, 1, 0) (0, 1, 1, 1461) 10.122559327616138


100%|██████████| 16675/16675 [19:34<00:00, 14.20it/s]

ITER#2 (0, 0, 2) (0, 1, 0, 1461) 11.014303477260475





Unnamed: 0,pdq,pdqs,rmse
1,"(2, 1, 0)","(0, 1, 1, 1461)",10.122559
2,"(0, 0, 2)","(0, 1, 0, 1461)",11.014303
0,"(0, 1, 2)","(1, 0, 0, 1461)",12.126219


In [12]:
gSearch_windDirSin_sara = sarimax_randomsearch(windDirSin_train_sara, windDirSin_test_sara, pdq, pdqs, y_val=windDirSin_valid_sara, n_rand=3, verbose=1)
display(gSearch_windDirSin_sara)

100%|██████████| 16675/16675 [19:43<00:00, 14.08it/s]


ITER#0 (2, 1, 0) (1, 0, 0, 1461) 9.913282746834783


100%|██████████| 16675/16675 [19:54<00:00, 13.96it/s]


ITER#1 (1, 1, 1) (1, 1, 0, 1461) 9.161331923997333


100%|██████████| 16675/16675 [19:20<00:00, 14.36it/s]

ITER#2 (2, 1, 2) (0, 0, 0, 1461) 11.222181208989157





Unnamed: 0,pdq,pdqs,rmse
1,"(1, 1, 1)","(1, 1, 0, 1461)",9.161332
0,"(2, 1, 0)","(1, 0, 0, 1461)",9.913283
2,"(2, 1, 2)","(0, 0, 0, 1461)",11.222181


In [13]:
gSearch_windDirSin_surat = sarimax_randomsearch(windDirSin_train_surat, windDirSin_test_surat, pdq, pdqs, y_val=windDirSin_valid_surat, n_rand=3, verbose=1)
display(gSearch_windDirSin_surat)

100%|██████████| 13384/13384 [14:20<00:00, 15.55it/s]


ITER#0 (0, 0, 0) (0, 0, 1, 1461) 12.667716009570269


100%|██████████| 13384/13384 [14:46<00:00, 15.11it/s]


ITER#1 (1, 0, 2) (0, 1, 0, 1461) 10.845617259847007


100%|██████████| 13384/13384 [14:46<00:00, 15.10it/s]

ITER#2 (2, 1, 2) (0, 0, 0, 1461) 12.135672870576478





Unnamed: 0,pdq,pdqs,rmse
1,"(1, 0, 2)","(0, 1, 0, 1461)",10.845617
2,"(2, 1, 2)","(0, 0, 0, 1461)",12.135673
0,"(0, 0, 0)","(0, 0, 1, 1461)",12.667716


In [14]:
gSearch_windDirCos_ray = sarimax_randomsearch(windDirCos_train_ray, windDirCos_test_ray, pdq, pdqs, y_val=windDirCos_valid_ray, n_rand=3, verbose=1)
display(gSearch_windDirCos_ray)

100%|██████████| 16675/16675 [19:27<00:00, 14.28it/s]


ITER#0 (2, 1, 2) (0, 0, 1, 1461) 11.961316618412


100%|██████████| 16675/16675 [20:31<00:00, 13.54it/s]


ITER#1 (0, 1, 2) (1, 1, 1, 1461) 10.719768930057587


100%|██████████| 16675/16675 [21:55<00:00, 12.67it/s]

ITER#2 (2, 1, 0) (0, 0, 1, 1461) 12.391357735712376





Unnamed: 0,pdq,pdqs,rmse
1,"(0, 1, 2)","(1, 1, 1, 1461)",10.719769
0,"(2, 1, 2)","(0, 0, 1, 1461)",11.961317
2,"(2, 1, 0)","(0, 0, 1, 1461)",12.391358


In [15]:
gSearch_windDirCos_sara = sarimax_randomsearch(windDirCos_train_sara, windDirCos_test_sara, pdq, pdqs, y_val=windDirCos_valid_sara, n_rand=3, verbose=1)
display(gSearch_windDirCos_sara)

100%|██████████| 16675/16675 [20:12<00:00, 13.75it/s]


ITER#0 (0, 1, 1) (0, 1, 0, 1461) 10.245684563019658


100%|██████████| 16675/16675 [20:26<00:00, 13.60it/s]


ITER#1 (2, 0, 2) (1, 1, 1, 1461) 9.324734055740773


100%|██████████| 16675/16675 [19:01<00:00, 14.60it/s]

ITER#2 (0, 1, 0) (0, 0, 0, 1461) 11.748596516952324





Unnamed: 0,pdq,pdqs,rmse
1,"(2, 0, 2)","(1, 1, 1, 1461)",9.324734
0,"(0, 1, 1)","(0, 1, 0, 1461)",10.245685
2,"(0, 1, 0)","(0, 0, 0, 1461)",11.748597


In [16]:
gSearch_windDirCos_surat = sarimax_randomsearch(windDirCos_train_surat, windDirCos_test_surat, pdq, pdqs, y_val=windDirCos_valid_surat, n_rand=3, verbose=1)
display(gSearch_windDirCos_surat)

100%|██████████| 13384/13384 [16:09<00:00, 13.80it/s]


ITER#0 (2, 0, 2) (0, 1, 0, 1461) 10.179417251700995


100%|██████████| 13384/13384 [15:07<00:00, 14.74it/s]


ITER#1 (1, 1, 1) (1, 1, 0, 1461) 10.394841982514222


100%|██████████| 13384/13384 [14:38<00:00, 15.23it/s]

ITER#2 (1, 0, 0) (0, 1, 1, 1461) 10.579088569821547





Unnamed: 0,pdq,pdqs,rmse
0,"(2, 0, 2)","(0, 1, 0, 1461)",10.179417
1,"(1, 1, 1)","(1, 1, 0, 1461)",10.394842
2,"(1, 0, 0)","(0, 1, 1, 1461)",10.579089


In [None]:
# gSearch_windDir_cnx = sarimax_randomsearch(windDir_train_cnx, windDir_test_cnx, pdq, pdqs, y_val=windDirCos_valid_cnx, n_rand=5, verbose=1)
# gSearch_windDir_kkc = sarimax_randomsearch(windDir_train_kkc, windDir_test_kkc, pdq, pdqs, y_val=windDirCos_valid_kkc, n_rand=5, verbose=1)
# gSearch_windDir_ray = sarimax_randomsearch(windDir_train_ray, windDir_test_ray, pdq, pdqs, y_val=windDirCos_valid_ray, n_rand=5, verbose=1)
# gSearch_windDir_sara = sarimax_randomsearch(windDir_train_sara, windDir_test_sara, pdq, pdqs, y_val=windDirCos_valid_sara, n_rand=5, verbose=1)
# gSearch_windDir_surat = sarimax_randomsearch(windDir_train_surat, windDir_test_surat, pdq, pdqs, y_val=windDirCos_valid_surat, n_rand=5, verbose=1)

In [None]:
# windDir_order = {province[0]: gSearch_windDir_bkk.pdq, province[1]: gSearch_windDir_cnx.pdq, province[2]: gSearch_windDir_kkc.pdq, province[3]: gSearch_windDir_ray.pdq, province[4]: gSearch_windDir_sara.pdq, province[5]: gSearch_windDir_surat.pdq}
# windDir_seasonal_order = {province[0]: gSearch_windDir_bkk.pdqs, province[1]: gSearch_windDir_cnx.pdqs, province[2]: gSearch_windDir_kkc.pdqs, province[3]: gSearch_windDir_ray.pdqs, province[4]: gSearch_windDir_sara.pdqs, province[5]: gSearch_windDir_surat.pdqs}
# print(windDir_order)
# print(windDir_seasonal_order)

## Tuning parameters for PM2.5

In [108]:
exog_columns = ['Temp', 'WindSpeed', 'WindDirSin', 'WindDirCos']

In [None]:
result_bkk = sarimax_randomsearch(pm_train_bkk, pm_test_bkk, pdq, pdqs, y_val=pm_valid_bkk, x_train=exo_train_bkk, x_test=exo_test_bkk, x_val=exo_valid_bkk)
display(result_bkk)

In [None]:
result_cnx = sarimax_randomsearch(pm_train_cnx, pm_test_cnx, pdq, pdqs, y_val=pm_valid_cnx, x_train=exo_train_cnx, x_test=exo_test_cnx, x_val=exo_valid_cnx)
display(result_cnx)

In [None]:
result_kkc = sarimax_randomsearch(pm_train_kkc, pm_test_kkc, pdq, pdqs, y_val=pm_valid_kkc, x_train=exo_train_kkc, x_test=exo_test_kkc, x_val=exo_valid_kkc)
display(result_kkc)

In [109]:
model_exog_ray = {}
exog_order_ray = {}
exog_seasonal_order_ray = {}

model_exog_ray['Temp'] = MinimalSARIMAX(temp_train_ray, (2, 1, 0), (0, 0, 1, 1461))
model_exog_ray['WindSpeed'] = MinimalSARIMAX(windSpeed_train_ray, (2, 0, 2), (0, 0, 1, 1461))
model_exog_ray['WindDirSin'] = MinimalSARIMAX(windDirSin_train_ray, (2, 1, 0), (0, 1, 1, 1461))
model_exog_ray['WindDirCos'] = MinimalSARIMAX(windDirCos_train_ray, (0, 1, 2), (1, 1, 1, 1461))

for exog in exog_columns:
    model_exog_ray[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

result_ray = sarimax_randomsearch(pm_train_ray, pm_test_ray, pdq, pdqs, y_val=pm_valid_ray, x_train=exo_train_ray[exog_columns], x_test=exo_test_ray[exog_columns], x_val=exo_valid_ray[exog_columns],
                                  model_exog=model_exog_ray, n_rand=3, verbose=1)

display(result_ray)

100%|██████████| 16675/16675 [2:43:32<00:00,  1.70it/s]  


ITER#0 (0, 1, 2) (1, 0, 0, 1461) 11.789344728849196


100%|██████████| 16675/16675 [2:47:32<00:00,  1.66it/s]  


ITER#1 (0, 1, 0) (0, 1, 1, 1461) 12.156300366231592


100%|██████████| 16675/16675 [2:40:07<00:00,  1.74it/s]  

ITER#2 (2, 0, 1) (0, 1, 1, 1461) 11.380818345770507





Unnamed: 0,pdq,pdqs,rmse
2,"(2, 0, 1)","(0, 1, 1, 1461)",11.380818
0,"(0, 1, 2)","(1, 0, 0, 1461)",11.789345
1,"(0, 1, 0)","(0, 1, 1, 1461)",12.1563


In [None]:
model_exog_sara = {}
exog_order_sara = {}
exog_seasonal_order_sara = {}

model_exog_sara['Temp'] = MinimalSARIMAX(temp_train_sara, (2, 1, 2), (1, 0, 1, 1461))
model_exog_sara['WindSpeed'] = MinimalSARIMAX(windSpeed_train_sara, (1, 1, 1), (1, 0, 0, 1461))
model_exog_sara['WindDirSin'] = MinimalSARIMAX(windDirSin_train_sara, (1, 1, 1), (1, 1, 0, 1461))
model_exog_sara['WindDirCos'] = MinimalSARIMAX(windDirCos_train_sara, (2, 0, 2), (1, 1, 1, 1461))

for exog in exog_columns:
    model_exog_sara[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

result_sara = sarimax_randomsearch(pm_train_sara, pm_test_sara, pdq, pdqs, y_val=pm_valid_sara, x_train=exo_train_sara[exog_columns], x_test=exo_test_sara[exog_columns], x_val=exo_valid_sara[exog_columns],
                                  model_exog=model_exog_sara, n_rand=3, verbose=1)

display(result_sara)

In [None]:
model_exog_surat = {}
exog_order_surat = {}
exog_seasonal_order_surat = {}

model_exog_surat['Temp'] = MinimalSARIMAX(temp_train_surat, (2, 0, 2), (0, 0, 0, 1461))
model_exog_surat['WindSpeed'] = MinimalSARIMAX(windSpeed_train_surat, (2, 1, 0), (1, 0, 1, 1461))
model_exog_surat['WindDirSin'] = MinimalSARIMAX(windDirSin_train_surat, (1, 0, 2), (0, 1, 0, 1461))
model_exog_surat['WindDirCos'] = MinimalSARIMAX(windDirCos_train_surat, (2, 0, 2), (0, 1, 0, 1461)	)

for exog in exog_columns:
    model_exog_surat[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

result_surat = sarimax_randomsearch(pm_train_surat, pm_test_surat, pdq, pdqs, y_val=pm_valid_surat, x_train=exo_train_surat[exog_columns], x_test=exo_test_surat[exog_columns], x_val=exo_valid_surat[exog_columns],
                                  model_exog=model_exog_surat, n_rand=3, verbose=1)

display(result_surat)

In [None]:
# with open('answer_dataset/gridsearch/result_bkk_03.bin',"wb") as f:
#     pickle.dump(result_bkk, f)

# with open('answer_dataset/gridsearch/result_cnx_03.bin',"wb") as f:
#     pickle.dump(result_cnx, f)
    
# with open('answer_dataset/gridsearch/result_ray_03.bin',"wb") as f:
#     pickle.dump(result_bkk, f)

# with open('answer_dataset/gridsearch/result_sara_03.bin',"wb") as f:
#     pickle.dump(result_sara, f)

# with open('answer_dataset/gridsearch/result_surat_03.bin',"wb") as f:
#     pickle.dump(result_surat, f)

In [None]:
# order = {province[0]: result_bkk.pdq, province[1]: result_cnx.pdq, province[2]: result_kkc.pdq, province[3]: result_ray.pdq, province[4]: result_sara.pdq, province[5]: result_surat.pdq}
# seasonal_order = {province[0]: result_bkk.pdqs, province[1]: result_cnx.pdqs, province[2]: result_kkc.pdqs, province[3]: result_ray.pdqs, province[4]: result_sara.pdqs, province[5]: result_surat.pdqs}
# print(order)
# print(seasonal_order)