# Random Search

In [1]:
#Import the libraries
import numpy as np
import pandas as pd

import pickle

import random
import warnings
warnings.filterwarnings("ignore")

import itertools
from tqdm import tqdm

In [2]:
train_data = pickle.load(open('clean_dataset_2022/train_set.bin', 'rb'))

test_data = pickle.load(open('clean_dataset_2022/test_set.bin', 'rb'))

In [3]:
province = ['BKK','CNX','KKC','RAY','SARA','SURAT']
exog_columns = ['Temp', 'WindSpeed', 'WindDir']

In [4]:
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

from matplotlib.pyplot import figure
import matplotlib.pyplot as plt

In [5]:
from importlib import reload

from custom_function import minimalSARIMAX

reload(minimalSARIMAX)

from custom_function.minimalSARIMAX import MinimalSARIMAX

In [6]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))

In [7]:
P = Q = D = range(0, 2) ; S = 1461
pdqs = [(x[0], x[1], x[2], S) for x in list(itertools.product(P, D, Q))]

In [8]:
# Define function
def sarimax_randomsearch(y_train, y_test, pdq, PDQs, y_val = None, x_train = None, x_test = None, x_val = None, model_exog=None, verbose=0, n_rand=10):
    '''
    Input: 
        y_train: training data
        y_test: test data
        pdq : ARIMA combinations 
        pdqs : seasonal ARIMA combinations 
        x_train: exogenous training data
        x_test: exogenous test data

    Return:
        Returns dataframe of parameter combinations with the least RMSE
    '''

    ans_df = pd.DataFrame(columns=['pdq', 'pdqs', 'rmse'])

    save_comb = set()
    
    i = 0
    while i!=n_rand:
        comb = random.sample(pdq, 1)[0]
        combs = random.sample(PDQs, 1)[0]

        if (comb+combs) in save_comb:
            continue
        
        save_comb.add(comb+combs)
        
        p, d, q = comb[0], comb[1], comb[2]
        P, D, Q = combs[0], combs[1], combs[2]
        if (d <= 1) and (D <= 1) and (P <= 1) and (Q <= 1):  
            model = MinimalSARIMAX(y_train, comb, combs, exog=x_train)
            model.fit(lr=1e-6, lr_decay=0.999, verbose=0) 

            if (y_val is None):
                y_pred, err = model.predict(y_test, y_exog=x_test, verbose=verbose)
                rmse = model.scoring(y_pred, y_test)

            else:
                Result = model.predict_step(y_val, y_test, val_X_exog=x_val, y_exog=x_test,
                                            model_exog=model_exog, lr=np.array([1e-6, 1e-6, 1e-6, 1e-6, 1e-6]), lr_decay=0.9995,
                                            learn=True, verbose=verbose, verbose_rmse=0)

                _, y_pred_sav, _ = Result
                
                rmse = model.scoring(y_pred_sav.iloc[:,[1]], y_pred_sav.iloc[:,[2]])
            
            print(f"ITER#{i} {comb} {combs} {rmse}") ; i=i+1
            
            ans_df = ans_df.append({'pdq':comb, 'pdqs':combs, 'rmse':rmse}, ignore_index=True)
        else: continue

    # Sort and return a combination with the lowest RMSE
    ans_df = ans_df.sort_values(by=['rmse'],ascending=True)
    
    return ans_df

In [9]:
ratio = 0.7

pm_train_bkk = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_bkk = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_bkk = test_data[province[0]][['PM25']]
exo_train_bkk = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_train_bkk['WindDir'])*10 + 10)
exo_train_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_train_bkk['WindDir'])*10 + 10)
exo_valid_bkk = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_bkk['WindDir'])*10 + 10)
exo_valid_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_bkk['WindDir'])*10 + 10)
exo_test_bkk = test_data[province[0]][exog_columns]
exo_test_bkk['WindDir'] = pd.to_numeric(exo_test_bkk['WindDir'], errors='coerce')
exo_test_bkk['WindDirSin'] = pd.DataFrame(np.sin(exo_test_bkk['WindDir'])*10 + 10)
exo_test_bkk['WindDirCos'] = pd.DataFrame(np.cos(exo_test_bkk['WindDir'])*10 + 10)


pm_train_cnx = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_cnx = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_cnx = test_data[province[0]][['PM25']]
exo_train_cnx = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_train_cnx['WindDir'])*10 + 10)
exo_train_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_train_cnx['WindDir'])*10 + 10)
exo_valid_cnx = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_cnx['WindDir'])*10 + 10)
exo_valid_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_cnx['WindDir'])*10 + 10)
exo_test_cnx = test_data[province[0]][exog_columns]
exo_test_cnx['WindDir'] = pd.to_numeric(exo_test_cnx['WindDir'], errors='coerce')
exo_test_cnx['WindDirSin'] = pd.DataFrame(np.sin(exo_test_cnx['WindDir'])*10 + 10)
exo_test_cnx['WindDirCos'] = pd.DataFrame(np.cos(exo_test_cnx['WindDir'])*10 + 10)


pm_train_kkc = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_kkc = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_kkc = test_data[province[0]][['PM25']]
exo_train_kkc = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_train_kkc['WindDir'])*10 + 10)
exo_train_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_train_kkc['WindDir'])*10 + 10)
exo_valid_kkc = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_kkc['WindDir'])*10 + 10)
exo_valid_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_kkc['WindDir'])*10 + 10)
exo_test_kkc = test_data[province[0]][exog_columns]
exo_test_kkc['WindDir'] = pd.to_numeric(exo_test_kkc['WindDir'], errors='coerce')
exo_test_kkc['WindDirSin'] = pd.DataFrame(np.sin(exo_test_kkc['WindDir'])*10 + 10)
exo_test_kkc['WindDirCos'] = pd.DataFrame(np.cos(exo_test_kkc['WindDir'])*10 + 10)


pm_train_ray = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_ray = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_ray = test_data[province[0]][['PM25']]
exo_train_ray = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_train_ray['WindDir'])*10 + 10)
exo_train_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_train_ray['WindDir'])*10 + 10)
exo_valid_ray = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_ray['WindDir'])*10 + 10)
exo_valid_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_ray['WindDir'])*10 + 10)
exo_test_ray = test_data[province[0]][exog_columns]
exo_test_ray['WindDir'] = pd.to_numeric(exo_test_ray['WindDir'], errors='coerce')
exo_test_ray['WindDirSin'] = pd.DataFrame(np.sin(exo_test_ray['WindDir'])*10 + 10)
exo_test_ray['WindDirCos'] = pd.DataFrame(np.cos(exo_test_ray['WindDir'])*10 + 10)


pm_train_sara = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_sara = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_sara = test_data[province[0]][['PM25']]
exo_train_sara = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_train_sara['WindDir'])*10 + 10)
exo_train_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_train_sara['WindDir'])*10 + 10)
exo_valid_sara = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_sara['WindDir'])*10 + 10)
exo_valid_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_sara['WindDir'])*10 + 10)
exo_test_sara = test_data[province[0]][exog_columns]
exo_test_sara['WindDir'] = pd.to_numeric(exo_test_sara['WindDir'], errors='coerce')
exo_test_sara['WindDirSin'] = pd.DataFrame(np.sin(exo_test_sara['WindDir'])*10 + 10)
exo_test_sara['WindDirCos'] = pd.DataFrame(np.cos(exo_test_sara['WindDir'])*10 + 10)


pm_train_surat = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_surat = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_surat = test_data[province[0]][['PM25']]
exo_train_surat = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_train_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_train_surat['WindDir'])*10 + 10)
exo_train_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_train_surat['WindDir'])*10 + 10)
exo_valid_surat = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_valid_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_valid_surat['WindDir'])*10 + 10)
exo_valid_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_valid_surat['WindDir'])*10 + 10)
exo_test_surat = test_data[province[0]][exog_columns]
exo_test_surat['WindDir'] = pd.to_numeric(exo_test_surat['WindDir'], errors='coerce')
exo_test_surat['WindDirSin'] = pd.DataFrame(np.sin(exo_test_surat['WindDir'])*10 + 10)
exo_test_surat['WindDirCos'] = pd.DataFrame(np.cos(exo_test_surat['WindDir'])*10 + 10)

## Tuning parameters for temperature

In [11]:
temp = exog_columns[0]

temp_train_bkk = pd.DataFrame(exo_train_bkk[temp])
temp_valid_bkk = pd.DataFrame(exo_valid_bkk[temp])
temp_test_bkk = pd.DataFrame(exo_test_bkk[temp])

temp_train_cnx = pd.DataFrame(exo_train_cnx[temp])
temp_valid_cnx = pd.DataFrame(exo_valid_cnx[temp])
temp_test_cnx = pd.DataFrame(exo_test_cnx[temp])

temp_train_kkc = pd.DataFrame(exo_train_kkc[temp])
temp_valid_kkc = pd.DataFrame(exo_valid_kkc[temp])
temp_test_kkc = pd.DataFrame(exo_test_kkc[temp])

temp_train_ray = pd.DataFrame(exo_train_ray[temp])
temp_valid_ray = pd.DataFrame(exo_valid_ray[temp])
temp_test_ray = pd.DataFrame(exo_test_ray[temp])

temp_train_sara = pd.DataFrame(exo_train_sara[temp])
temp_valid_sara = pd.DataFrame(exo_valid_sara[temp])
temp_test_sara = pd.DataFrame(exo_test_sara[temp])

temp_train_surat = pd.DataFrame(exo_train_surat[temp])
temp_valid_surat = pd.DataFrame(exo_valid_surat[temp])
temp_test_surat = pd.DataFrame(exo_test_surat[temp])

In [36]:
gSearch_temp_bkk = sarimax_randomsearch(temp_train_bkk, temp_test_bkk, pdq, pdqs, y_val=temp_valid_bkk, n_rand=5, verbose=1)
display(gSearch_temp_bkk)

100%|██████████| 16675/16675 [14:14<00:00, 19.50it/s]


ITER#0 (1, 0, 0) (0, 1, 1, 1461) 1.6773643226482746


100%|██████████| 16675/16675 [15:17<00:00, 18.18it/s]


ITER#1 (0, 0, 0) (1, 0, 1, 1461) 17.981191633711877


100%|██████████| 16675/16675 [14:01<00:00, 19.82it/s]


ITER#2 (2, 0, 1) (0, 1, 0, 1461) 1.6132899843343023


100%|██████████| 16675/16675 [11:29<00:00, 24.19it/s]


ITER#3 (2, 0, 0) (1, 1, 1, 1461) 1.7878396504883909


100%|██████████| 16675/16675 [10:59<00:00, 25.28it/s]

ITER#4 (0, 0, 0) (0, 0, 1, 1461) 28.744358403990983





Unnamed: 0,pdq,pdqs,rmse
2,"(2, 0, 1)","(0, 1, 0, 1461)",1.61329
0,"(1, 0, 0)","(0, 1, 1, 1461)",1.677364
3,"(2, 0, 0)","(1, 1, 1, 1461)",1.78784
1,"(0, 0, 0)","(1, 0, 1, 1461)",17.981192
4,"(0, 0, 0)","(0, 0, 1, 1461)",28.744358


In [37]:
gSearch_temp_cnx = sarimax_randomsearch(temp_train_cnx, temp_test_cnx, pdq, pdqs, y_val=temp_valid_cnx, n_rand=5, verbose=1)
display(gSearch_temp_cnx)

100%|██████████| 16675/16675 [10:51<00:00, 25.61it/s]


ITER#0 (0, 1, 0) (1, 1, 1, 1461) 5.280477537581058


100%|██████████| 16675/16675 [10:36<00:00, 26.20it/s]


ITER#1 (0, 1, 2) (0, 0, 0, 1461) 23.185572179230082


100%|██████████| 16675/16675 [10:39<00:00, 26.06it/s]


ITER#2 (0, 0, 2) (0, 1, 1, 1461) 8.196283492905108


100%|██████████| 16675/16675 [10:39<00:00, 26.09it/s]


ITER#3 (0, 0, 0) (0, 0, 1, 1461) 24.20336426691612


100%|██████████| 16675/16675 [10:45<00:00, 25.84it/s]

ITER#4 (2, 0, 2) (1, 1, 1, 1461) 3.4864314033706747





Unnamed: 0,pdq,pdqs,rmse
4,"(2, 0, 2)","(1, 1, 1, 1461)",3.486431
0,"(0, 1, 0)","(1, 1, 1, 1461)",5.280478
2,"(0, 0, 2)","(0, 1, 1, 1461)",8.196283
1,"(0, 1, 2)","(0, 0, 0, 1461)",23.185572
3,"(0, 0, 0)","(0, 0, 1, 1461)",24.203364


In [38]:
gSearch_temp_kkc = sarimax_randomsearch(temp_train_kkc, temp_test_kkc, pdq, pdqs, y_val=temp_valid_kkc, n_rand=5, verbose=1)
display(gSearch_temp_kkc)

100%|██████████| 16675/16675 [10:46<00:00, 25.78it/s]


ITER#0 (1, 1, 0) (1, 1, 0, 1461) 3.0317014547679464


100%|██████████| 16675/16675 [10:47<00:00, 25.76it/s]


ITER#1 (2, 0, 0) (0, 1, 0, 1461) 2.6614997807605745


100%|██████████| 16675/16675 [10:51<00:00, 25.60it/s]


ITER#2 (2, 0, 0) (1, 0, 0, 1461) 4.500586421943427


100%|██████████| 16675/16675 [10:59<00:00, 25.28it/s]


ITER#3 (1, 0, 1) (0, 1, 1, 1461) 2.8246134456567664


100%|██████████| 16675/16675 [11:01<00:00, 25.23it/s]

ITER#4 (0, 1, 2) (1, 0, 0, 1461) 17.54396255829231





Unnamed: 0,pdq,pdqs,rmse
1,"(2, 0, 0)","(0, 1, 0, 1461)",2.6615
3,"(1, 0, 1)","(0, 1, 1, 1461)",2.824613
0,"(1, 1, 0)","(1, 1, 0, 1461)",3.031701
2,"(2, 0, 0)","(1, 0, 0, 1461)",4.500586
4,"(0, 1, 2)","(1, 0, 0, 1461)",17.543963


In [None]:
gSearch_temp_ray = sarimax_randomsearch(temp_train_ray, temp_test_ray, pdq, pdqs, y_val=temp_valid_ray, n_rand=5, verbose=1)
display(gSearch_temp_ray)

In [None]:
gSearch_temp_sara = sarimax_randomsearch(temp_train_sara, temp_test_sara, pdq, pdqs, y_val=temp_valid_sara, n_rand=5, verbose=1)
display(gSearch_temp_sara)

In [None]:
gSearch_temp_surat = sarimax_randomsearch(temp_train_surat, temp_test_surat, pdq, pdqs, y_val=temp_valid_surat, n_rand=5, verbose=1)
display(gSearch_temp_surat)

In [None]:
# with open('answer_dataset/gSearch_temp_bkk_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_bkk, f)

# with open('answer_dataset/gSearch_temp_cnx_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_cnx, f)
    
# with open('answer_dataset/gSearch_temp_ray_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_bkk, f)

# with open('answer_dataset/gSearch_temp_sara_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_sara, f)

# with open('answer_dataset/gSearch_temp_surat_03.bin',"wb") as f:
#     pickle.dump(gSearch_temp_surat, f)

In [None]:
# temp_order = {province[0]: gSearch_temp_bkk.pdq, province[1]: gSearch_temp_cnx.pdq, province[2]: gSearch_temp_kkc.pdq, province[3]: gSearch_temp_ray.pdq, province[4]: gSearch_temp_sara.pdq, province[5]: gSearch_temp_surat.pdq}
# temp_seasonal_order = {province[0]: gSearch_temp_bkk.pdqs, province[1]: gSearch_temp_cnx.pdqs, province[2]: gSearch_temp_kkc.pdqs, province[3]: gSearch_temp_ray.pdqs, province[4]: gSearch_temp_sara.pdqs, province[5]: gSearch_temp_surat.pdqs}
# print(temp_order)
# print(temp_seasonal_order)

## Tuning parameters for windspeed

In [10]:
windSpeed = exog_columns[1]
windSpeed_train_bkk = pd.DataFrame(exo_train_bkk[windSpeed])
windSpeed_valid_bkk = pd.DataFrame(exo_valid_bkk[windSpeed])
windSpeed_test_bkk = pd.DataFrame(exo_test_bkk[windSpeed])

windSpeed_train_cnx = pd.DataFrame(exo_train_cnx[windSpeed])
windSpeed_valid_cnx = pd.DataFrame(exo_valid_cnx[windSpeed])
windSpeed_test_cnx = pd.DataFrame(exo_test_cnx[windSpeed])

windSpeed_train_kkc = pd.DataFrame(exo_train_kkc[windSpeed])
windSpeed_valid_kkc = pd.DataFrame(exo_valid_kkc[windSpeed])
windSpeed_test_kkc = pd.DataFrame(exo_test_kkc[windSpeed])

windSpeed_train_ray = pd.DataFrame(exo_train_ray[windSpeed])
windSpeed_valid_ray = pd.DataFrame(exo_valid_ray[windSpeed])
windSpeed_test_ray = pd.DataFrame(exo_test_ray[windSpeed])

windSpeed_train_sara = pd.DataFrame(exo_train_sara[windSpeed])
windSpeed_valid_sara = pd.DataFrame(exo_valid_sara[windSpeed])
windSpeed_test_sara = pd.DataFrame(exo_test_sara[windSpeed])

windSpeed_train_surat = pd.DataFrame(exo_train_surat[windSpeed])
windSpeed_valid_surat = pd.DataFrame(exo_valid_surat[windSpeed])
windSpeed_test_surat = pd.DataFrame(exo_test_surat[windSpeed])

In [40]:
gSearch_windSpeed_bkk = sarimax_randomsearch(windSpeed_train_bkk, windSpeed_test_bkk, pdq, pdqs, y_val=windSpeed_valid_bkk, n_rand=3, verbose=1)
display(gSearch_windSpeed_bkk)

100%|██████████| 16675/16675 [10:59<00:00, 25.29it/s]


ITER#0 (1, 1, 2) (1, 0, 0, 1461) 9.37057780540336


100%|██████████| 16675/16675 [11:04<00:00, 25.10it/s]


ITER#1 (2, 0, 0) (1, 1, 0, 1461) 8.169289878267394


100%|██████████| 16675/16675 [11:02<00:00, 25.15it/s]

ITER#2 (2, 0, 2) (0, 0, 1, 1461) 7.774220028092216





Unnamed: 0,pdq,pdqs,rmse
2,"(2, 0, 2)","(0, 0, 1, 1461)",7.77422
1,"(2, 0, 0)","(1, 1, 0, 1461)",8.16929
0,"(1, 1, 2)","(1, 0, 0, 1461)",9.370578


In [41]:
gSearch_windSpeed_cnx = sarimax_randomsearch(windSpeed_train_cnx, windSpeed_test_cnx, pdq, pdqs, y_val=windSpeed_valid_cnx, n_rand=3, verbose=1)
display(gSearch_windSpeed_cnx)

100%|██████████| 16675/16675 [11:03<00:00, 25.15it/s]


ITER#0 (0, 0, 2) (0, 0, 1, 1461) 11.680462501321065


100%|██████████| 16675/16675 [11:08<00:00, 24.96it/s]


ITER#1 (2, 1, 0) (0, 0, 0, 1461) 8.928893507031422


100%|██████████| 16675/16675 [11:18<00:00, 24.59it/s]

ITER#2 (1, 1, 0) (0, 1, 1, 1461) 6.8742635111989605





Unnamed: 0,pdq,pdqs,rmse
2,"(1, 1, 0)","(0, 1, 1, 1461)",6.874264
1,"(2, 1, 0)","(0, 0, 0, 1461)",8.928894
0,"(0, 0, 2)","(0, 0, 1, 1461)",11.680463


In [42]:
gSearch_windSpeed_kkc = sarimax_randomsearch(windSpeed_train_kkc, windSpeed_test_kkc, pdq, pdqs, y_val=windSpeed_valid_kkc, n_rand=3, verbose=1)
display(gSearch_windSpeed_kkc)

100%|██████████| 16675/16675 [11:17<00:00, 24.61it/s]


ITER#0 (1, 0, 1) (0, 1, 0, 1461) 15.216275417234861


100%|██████████| 16675/16675 [11:20<00:00, 24.51it/s]


ITER#1 (1, 0, 0) (1, 0, 1, 1461) 16.22514470043448


100%|██████████| 16675/16675 [11:25<00:00, 24.32it/s]

ITER#2 (1, 0, 1) (1, 1, 1, 1461) 14.854198479120337





Unnamed: 0,pdq,pdqs,rmse
2,"(1, 0, 1)","(1, 1, 1, 1461)",14.854198
0,"(1, 0, 1)","(0, 1, 0, 1461)",15.216275
1,"(1, 0, 0)","(1, 0, 1, 1461)",16.225145


In [None]:
gSearch_windSpeed_ray = sarimax_randomsearch(windSpeed_train_ray, windSpeed_test_ray, pdq, pdqs, y_val=windSpeed_valid_ray, n_rand=3, verbose=1)
display(gSearch_windSpeed_ray)

In [None]:
gSearch_windSpeed_sara = sarimax_randomsearch(windSpeed_train_sara, windSpeed_test_sara, pdq, pdqs, y_val=windSpeed_valid_sara, n_rand=3, verbose=1)
display(gSearch_windSpeed_sara)

In [None]:
gSearch_windSpeed_surat = sarimax_randomsearch(windSpeed_train_surat, windSpeed_test_surat, pdq, pdqs, y_val=windSpeed_valid_surat, n_rand=3, verbose=1)
display(gSearch_windSpeed_surat)

In [None]:
windSpeed_order = {province[0]: gSearch_windSpeed_bkk.pdq, province[1]: gSearch_windSpeed_cnx.pdq, province[2]: gSearch_windSpeed_kkc.pdq, province[3]: gSearch_windSpeed_ray.pdq, province[4]: gSearch_windSpeed_sara.pdq, province[5]: gSearch_windSpeed_surat.pdq}
windSpeed_seasonal_order = {province[0]: gSearch_windSpeed_bkk.pdqs, province[1]: gSearch_windSpeed_cnx.pdqs, province[2]: gSearch_windSpeed_kkc.pdqs, province[3]: gSearch_windSpeed_ray.pdqs, province[4]: gSearch_windSpeed_sara.pdqs, province[5]: gSearch_windSpeed_surat.pdqs}
print(windSpeed_order)
print(windSpeed_seasonal_order)

## Tuning parameters for wind direction

In [12]:
windDir = exog_columns[2]

windDirSin_train_bkk = pd.DataFrame(np.sin(exo_train_bkk[windDir])*10 + 10)
windDirSin_valid_bkk = pd.DataFrame(np.sin(exo_valid_bkk[windDir])*10 + 10)
windDirSin_test_bkk = pd.DataFrame(np.sin(exo_test_bkk[windDir])*10 + 10)
windDirCos_train_bkk = pd.DataFrame(np.cos(exo_train_bkk[windDir])*10 + 10)
windDirCos_valid_bkk = pd.DataFrame(np.cos(exo_valid_bkk[windDir])*10 + 10)
windDirCos_test_bkk = pd.DataFrame(np.cos(exo_test_bkk[windDir])*10 + 10)

windDirSin_train_cnx = pd.DataFrame(np.sin(exo_train_cnx[windDir])*10 + 10)
windDirSin_valid_cnx = pd.DataFrame(np.sin(exo_valid_cnx[windDir])*10 + 10)
windDirSin_test_cnx = pd.DataFrame(np.sin(exo_test_cnx[windDir])*10 + 10)
windDirCos_train_cnx = pd.DataFrame(np.cos(exo_train_cnx[windDir])*10 + 10)
windDirCos_valid_cnx = pd.DataFrame(np.cos(exo_valid_cnx[windDir])*10 + 10)
windDirCos_test_cnx = pd.DataFrame(np.cos(exo_test_cnx[windDir])*10 + 10)

windDirSin_train_kkc = pd.DataFrame(np.sin(exo_train_kkc[windDir])*10 + 10)
windDirSin_valid_kkc = pd.DataFrame(np.sin(exo_valid_kkc[windDir])*10 + 10)
windDirSin_test_kkc = pd.DataFrame(np.sin(exo_test_kkc[windDir])*10 + 10)
windDirCos_train_kkc = pd.DataFrame(np.cos(exo_train_kkc[windDir])*10 + 10)
windDirCos_valid_kkc = pd.DataFrame(np.cos(exo_valid_kkc[windDir])*10 + 10)
windDirCos_test_kkc = pd.DataFrame(np.cos(exo_test_kkc[windDir])*10 + 10)

windDirSin_train_ray = pd.DataFrame(np.sin(exo_train_ray[windDir])*10 + 10)
windDirSin_valid_ray = pd.DataFrame(np.sin(exo_valid_ray[windDir])*10 + 10)
windDirSin_test_ray = pd.DataFrame(np.sin(exo_test_ray[windDir])*10 + 10)
windDirCos_train_ray = pd.DataFrame(np.cos(exo_train_ray[windDir])*10 + 10)
windDirCos_valid_ray = pd.DataFrame(np.cos(exo_valid_ray[windDir])*10 + 10)
windDirCos_test_ray = pd.DataFrame(np.cos(exo_test_ray[windDir])*10 + 10)

windDirSin_train_sara = pd.DataFrame(np.sin(exo_train_sara[windDir])*10 + 10)
windDirSin_valid_sara = pd.DataFrame(np.sin(exo_valid_sara[windDir])*10 + 10)
windDirSin_test_sara = pd.DataFrame(np.sin(exo_test_sara[windDir])*10 + 10)
windDirCos_train_sara = pd.DataFrame(np.cos(exo_train_sara[windDir])*10 + 10)
windDirCos_valid_sara = pd.DataFrame(np.cos(exo_valid_sara[windDir])*10 + 10)
windDirCos_test_sara = pd.DataFrame(np.cos(exo_test_sara[windDir])*10 + 10)

windDirSin_train_surat = pd.DataFrame(np.sin(exo_train_surat[windDir])*10 + 10)
windDirSin_valid_surat = pd.DataFrame(np.sin(exo_valid_surat[windDir])*10 + 10)
windDirSin_test_surat = pd.DataFrame(np.sin(exo_test_surat[windDir])*10 + 10)
windDirCos_train_surat = pd.DataFrame(np.cos(exo_train_surat[windDir])*10 + 10)
windDirCos_valid_surat = pd.DataFrame(np.cos(exo_valid_surat[windDir])*10 + 10)
windDirCos_test_surat = pd.DataFrame(np.cos(exo_test_surat[windDir])*10 + 10)

In [21]:
gSearch_windDirSin_bkk = sarimax_randomsearch(windDirSin_train_bkk, windDirSin_test_bkk, pdq, pdqs, y_val=windDirSin_valid_bkk, n_rand=5, verbose=1)
display(gSearch_windDirSin_bkk)

100%|██████████| 16675/16675 [12:12<00:00, 22.76it/s]


ITER#0 (0, 0, 1) (0, 0, 1, 1461) 11.983172493212237


100%|██████████| 16675/16675 [11:35<00:00, 23.98it/s]


ITER#1 (0, 1, 0) (1, 0, 1, 1461) 11.418517451592411


100%|██████████| 16675/16675 [10:31<00:00, 26.42it/s]


ITER#2 (2, 0, 0) (0, 0, 1, 1461) 11.39115179246043


100%|██████████| 16675/16675 [10:31<00:00, 26.42it/s]


ITER#3 (0, 0, 0) (0, 0, 1, 1461) 12.085902079515982


100%|██████████| 16675/16675 [10:38<00:00, 26.10it/s]

ITER#4 (0, 1, 1) (1, 0, 1, 1461) 11.316666255332384





Unnamed: 0,pdq,pdqs,rmse
4,"(0, 1, 1)","(1, 0, 1, 1461)",11.316666
2,"(2, 0, 0)","(0, 0, 1, 1461)",11.391152
1,"(0, 1, 0)","(1, 0, 1, 1461)",11.418517
0,"(0, 0, 1)","(0, 0, 1, 1461)",11.983172
3,"(0, 0, 0)","(0, 0, 1, 1461)",12.085902


In [22]:
gSearch_windDirCos_bkk = sarimax_randomsearch(windDirCos_train_bkk, windDirCos_test_bkk, pdq, pdqs, y_val=windDirCos_valid_bkk, n_rand=5, verbose=1)
display(gSearch_windDirCos_bkk)

100%|██████████| 16675/16675 [10:47<00:00, 25.74it/s]


ITER#0 (1, 1, 2) (1, 1, 1, 1461) 10.293077915422408


100%|██████████| 16675/16675 [10:40<00:00, 26.02it/s]


ITER#1 (2, 1, 2) (1, 1, 1, 1461) 10.03097845192623


100%|██████████| 16675/16675 [10:15<00:00, 27.08it/s]


ITER#2 (2, 0, 2) (0, 0, 1, 1461) 11.863991082746193


100%|██████████| 16675/16675 [10:18<00:00, 26.96it/s]


ITER#3 (2, 0, 0) (0, 1, 1, 1461) 10.607188231488163


100%|██████████| 16675/16675 [10:20<00:00, 26.87it/s]

ITER#4 (0, 0, 2) (0, 0, 0, 1461) 12.381180463512553





Unnamed: 0,pdq,pdqs,rmse
1,"(2, 1, 2)","(1, 1, 1, 1461)",10.030978
0,"(1, 1, 2)","(1, 1, 1, 1461)",10.293078
3,"(2, 0, 0)","(0, 1, 1, 1461)",10.607188
2,"(2, 0, 2)","(0, 0, 1, 1461)",11.863991
4,"(0, 0, 2)","(0, 0, 0, 1461)",12.38118


In [23]:
gSearch_windDirSin_cnx = sarimax_randomsearch(windDirSin_train_cnx, windDirSin_test_cnx, pdq, pdqs, y_val=windDirSin_valid_cnx, n_rand=5, verbose=1)
display(gSearch_windDirSin_cnx)

100%|██████████| 16675/16675 [10:24<00:00, 26.72it/s]


ITER#0 (2, 1, 2) (0, 1, 0, 1461) 9.645453192745332


100%|██████████| 16675/16675 [10:27<00:00, 26.57it/s]


ITER#1 (2, 1, 0) (0, 1, 0, 1461) 9.635385298430009


100%|██████████| 16675/16675 [10:28<00:00, 26.53it/s]


ITER#2 (0, 0, 2) (0, 1, 0, 1461) 10.476873222447301


100%|██████████| 16675/16675 [10:32<00:00, 26.35it/s]


ITER#3 (1, 1, 1) (0, 1, 0, 1461) 10.03486294654029


100%|██████████| 16675/16675 [10:39<00:00, 26.06it/s]

ITER#4 (1, 1, 1) (1, 1, 0, 1461) 9.652145110307048





Unnamed: 0,pdq,pdqs,rmse
1,"(2, 1, 0)","(0, 1, 0, 1461)",9.635385
0,"(2, 1, 2)","(0, 1, 0, 1461)",9.645453
4,"(1, 1, 1)","(1, 1, 0, 1461)",9.652145
3,"(1, 1, 1)","(0, 1, 0, 1461)",10.034863
2,"(0, 0, 2)","(0, 1, 0, 1461)",10.476873


In [12]:
gSearch_windDirCos_cnx = sarimax_randomsearch(windDirCos_train_cnx, windDirCos_test_cnx, pdq, pdqs, y_val=windDirCos_valid_cnx, n_rand=5, verbose=1)
display(gSearch_windDirCos_cnx)

100%|██████████| 16675/16675 [10:14<00:00, 27.15it/s]


ITER#0 (2, 1, 2) (0, 1, 0, 1461) 10.689303584782444


100%|██████████| 16675/16675 [10:06<00:00, 27.49it/s]


ITER#1 (1, 0, 1) (0, 1, 1, 1461) 10.723334527158466


100%|██████████| 16675/16675 [11:14<00:00, 24.74it/s]


ITER#2 (0, 1, 1) (0, 0, 0, 1461) 12.205322105234673


100%|██████████| 16675/16675 [10:52<00:00, 25.54it/s]


ITER#3 (1, 1, 0) (0, 1, 0, 1461) 10.990206880198611


100%|██████████| 16675/16675 [11:49<00:00, 23.49it/s]

ITER#4 (0, 0, 0) (1, 1, 0, 1461) 10.626829508167845





Unnamed: 0,pdq,pdqs,rmse
4,"(0, 0, 0)","(1, 1, 0, 1461)",10.62683
0,"(2, 1, 2)","(0, 1, 0, 1461)",10.689304
1,"(1, 0, 1)","(0, 1, 1, 1461)",10.723335
3,"(1, 1, 0)","(0, 1, 0, 1461)",10.990207
2,"(0, 1, 1)","(0, 0, 0, 1461)",12.205322


In [34]:
gSearch_windDirSin_kkc = sarimax_randomsearch(windDirSin_train_kkc, windDirSin_test_kkc, pdq, pdqs, y_val=windDirSin_valid_kkc, n_rand=5, verbose=1)
display(gSearch_windDirSin_kkc)

100%|██████████| 16675/16675 [14:10<00:00, 19.61it/s]


ITER#0 (2, 0, 1) (0, 1, 0, 1461) 10.031996246568228


100%|██████████| 16675/16675 [12:23<00:00, 22.43it/s]


ITER#1 (1, 0, 2) (1, 1, 0, 1461) 10.112734878913649


100%|██████████| 16675/16675 [13:17<00:00, 20.90it/s]


ITER#2 (1, 0, 0) (1, 0, 0, 1461) 11.618734396199311


100%|██████████| 16675/16675 [12:53<00:00, 21.55it/s]


ITER#3 (0, 1, 0) (1, 1, 1, 1461) 10.424593479770289


 46%|████▌     | 7612/16675 [05:46<07:55, 19.06it/s]

In [None]:
gSearch_windDirCos_kkc = sarimax_randomsearch(windDirCos_train_kkc, windDirCos_test_kkc, pdq, pdqs, y_val=windDirCos_valid_kkc, n_rand=5, verbose=1)
display(gSearch_windDirCos_kkc)

In [None]:
gSearch_windDirSin_ray = sarimax_randomsearch(windDirSin_train_ray, windDirSin_test_ray, pdq, pdqs, y_val=windDirSin_valid_ray, n_rand=5, verbose=1)
display(gSearch_windDirSin_ray)

In [None]:
gSearch_windDirCos_ray = sarimax_randomsearch(windDirCos_train_ray, windDirCos_test_ray, pdq, pdqs, y_val=windDirCos_valid_ray, n_rand=5, verbose=1)
display(gSearch_windDirCos_ray)

In [None]:
gSearch_windDirSin_sara = sarimax_randomsearch(windDirSin_train_sara, windDirSin_test_sara, pdq, pdqs, y_val=windDirSin_valid_sara, n_rand=5, verbose=1)
display(gSearch_windDirSin_sara)

In [None]:
gSearch_windDirCos_sara = sarimax_randomsearch(windDirCos_train_sara, windDirCos_test_sara, pdq, pdqs, y_val=windDirCos_valid_sara, n_rand=5, verbose=1)
display(gSearch_windDirCos_sara)

In [None]:
gSearch_windDirSin_surat = sarimax_randomsearch(windDirSin_train_surat, windDirSin_test_surat, pdq, pdqs, y_val=windDirSin_valid_surat, n_rand=5, verbose=1)
display(gSearch_windDirSin_surat)

In [None]:
gSearch_windDirCos_surat = sarimax_randomsearch(windDirCos_train_surat, windDirCos_test_surat, pdq, pdqs, y_val=windDirCos_valid_surat, n_rand=5, verbose=1)
display(gSearch_windDirCos_surat)

In [None]:
# windDir_order = {province[0]: gSearch_windDir_bkk.pdq, province[1]: gSearch_windDir_cnx.pdq, province[2]: gSearch_windDir_kkc.pdq, province[3]: gSearch_windDir_ray.pdq, province[4]: gSearch_windDir_sara.pdq, province[5]: gSearch_windDir_surat.pdq}
# windDir_seasonal_order = {province[0]: gSearch_windDir_bkk.pdqs, province[1]: gSearch_windDir_cnx.pdqs, province[2]: gSearch_windDir_kkc.pdqs, province[3]: gSearch_windDir_ray.pdqs, province[4]: gSearch_windDir_sara.pdqs, province[5]: gSearch_windDir_surat.pdqs}
# print(windDir_order)
# print(windDir_seasonal_order)

## Tuning parameters for PM2.5

In [13]:
exog_columns = ['Temp', 'WindSpeed', 'WindDirSin', 'WindDirCos']

In [14]:
model_exog_bkk = {}
exog_order_bkk = {}
exog_seasonal_order_bkk = {}

model_exog_bkk['Temp'] = MinimalSARIMAX(temp_train_bkk, (2,0,1), (0,1,0,1461))
model_exog_bkk['WindSpeed'] = MinimalSARIMAX(windSpeed_train_bkk, (2,0,2), (0,0,4,1461))
model_exog_bkk['WindDirSin'] = MinimalSARIMAX(windDirSin_train_bkk, (2,1,0), (0,1,0,1461))
model_exog_bkk['WindDirCos'] = MinimalSARIMAX(windDirCos_train_bkk, (2,1,2), (1,1,1,1461))

for exog in exog_columns:
    model_exog_bkk[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

In [15]:
result_bkk = sarimax_randomsearch(pm_train_bkk, pm_test_bkk, pdq, pdqs, y_val=pm_valid_bkk, x_train=exo_train_bkk[exog_columns], x_test=exo_test_bkk[exog_columns], x_val=exo_valid_bkk[exog_columns],
                                  model_exog=model_exog_bkk, n_rand=3, verbose=1)

display(result_bkk)

100%|██████████| 16675/16675 [1:26:41<00:00,  3.21it/s]


ITER#0 (2, 1, 1) (1, 1, 0, 1461) 11.84755399262624


100%|██████████| 16675/16675 [1:12:42<00:00,  3.82it/s]


ITER#1 (1, 1, 2) (1, 1, 0, 1461) 12.30284533804268


In [None]:
model_exog_cnx = {}
exog_order_cnx = {}
exog_seasonal_order_cnx = {}

model_exog_cnx['Temp'] = MinimalSARIMAX(temp_train_cnx, (2,0,2), (1,1,1,1461))
model_exog_cnx['WindSpeed'] = MinimalSARIMAX(windSpeed_train_cnx, (1,1,0), (0,1,1,1461))
model_exog_cnx['WindDirSin'] = MinimalSARIMAX(windDirSin_train_cnx, (2,1,0), (0,1,0,1461))
model_exog_cnx['WindDirCos'] = MinimalSARIMAX(windDirCos_train_cnx, (2,1,0), (0,1,0,1461))

for exog in exog_columns:
    model_exog_cnx[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

In [None]:
result_cnx = sarimax_randomsearch(pm_train_cnx, pm_test_cnx, pdq, pdqs, y_val=pm_valid_cnx, x_train=exo_train_cnx[exog_columns], x_test=exo_test_cnx[exog_columns], x_val=exo_valid_cnx[exog_columns],
                                  model_exog=model_exog_cnx, n_rand=3, verbose=1)

display(result_cnx)

In [14]:
model_exog_kkc = {}
exog_order_kkc = {}
exog_seasonal_order_kkc = {}

model_exog_kkc['Temp'] = MinimalSARIMAX(temp_train_kkc, (2,0,0), (0,1,0,1461))
model_exog_kkc['WindSpeed'] = MinimalSARIMAX(windSpeed_train_kkc, (1,0,1), (1,1,1,1461))
model_exog_kkc['WindDirSin'] = MinimalSARIMAX(windDirSin_train_kkc, (2,0,1), (0,1,0,1461))
model_exog_kkc['WindDirCos'] = MinimalSARIMAX(windDirCos_train_kkc, (1,1,2), (1,1,1,1461))

for exog in exog_columns:
    model_exog_kkc[exog].fit(lr=1e-6, lr_decay=0.999, verbose=0)

In [16]:
result_kkc = sarimax_randomsearch(pm_train_kkc, pm_test_kkc, pdq, pdqs, y_val=pm_valid_kkc, x_train=exo_train_kkc[exog_columns], x_test=exo_test_kkc[exog_columns], x_val=exo_valid_kkc[exog_columns],
                                  model_exog=model_exog_kkc, n_rand=5, verbose=1)

display(result_kkc)

100%|██████████| 16675/16675 [1:28:38<00:00,  3.14it/s]


ITER#0 (2, 0, 1) (0, 1, 0, 1461) 11.83773323722376


100%|██████████| 16675/16675 [1:28:10<00:00,  3.15it/s]


ITER#1 (0, 1, 0) (1, 1, 1, 1461) 12.540645548098066


100%|██████████| 16675/16675 [1:25:45<00:00,  3.24it/s]


ITER#2 (1, 0, 1) (1, 1, 1, 1461) 12.275094394190313


100%|██████████| 16675/16675 [1:22:31<00:00,  3.37it/s]


ITER#3 (1, 0, 2) (1, 0, 0, 1461) 11.476778383911642


100%|██████████| 16675/16675 [1:23:38<00:00,  3.32it/s]

ITER#4 (0, 1, 1) (1, 1, 0, 1461) 12.385984818719349





Unnamed: 0,pdq,pdqs,rmse
3,"(1, 0, 2)","(1, 0, 0, 1461)",11.476778
0,"(2, 0, 1)","(0, 1, 0, 1461)",11.837733
2,"(1, 0, 1)","(1, 1, 1, 1461)",12.275094
4,"(0, 1, 1)","(1, 1, 0, 1461)",12.385985
1,"(0, 1, 0)","(1, 1, 1, 1461)",12.540646


In [None]:
result_ray = sarimax_randomsearch(pm_train_ray, pm_test_ray, pdq, pdqs, y_val=pm_valid_ray, x_train=exo_train_ray[exog_columns], x_test=exo_test_ray[exog_columns], x_val=exo_valid_ray[exog_columns])
result_sara = sarimax_randomsearch(pm_train_sara, pm_test_sara, pdq, pdqs, y_val=pm_valid_sara, x_train=exo_train_sara[exog_columns], x_test=exo_test_sara[exog_columns], x_val=exo_valid_sara[exog_columns])
result_surat = sarimax_randomsearch(pm_train_surat, pm_test_surat, pdq, pdqs, y_val=pm_valid_surat, x_train=exo_train_surat[exog_columns], x_test=exo_test_surat[exog_columns], x_val=exo_valid_surat[exog_columns])

In [None]:
# with open('answer_dataset/gridsearch/result_bkk_03.bin',"wb") as f:
#     pickle.dump(result_bkk, f)

# with open('answer_dataset/gridsearch/result_cnx_03.bin',"wb") as f:
#     pickle.dump(result_cnx, f)
    
# with open('answer_dataset/gridsearch/result_ray_03.bin',"wb") as f:
#     pickle.dump(result_bkk, f)

# with open('answer_dataset/gridsearch/result_sara_03.bin',"wb") as f:
#     pickle.dump(result_sara, f)

# with open('answer_dataset/gridsearch/result_surat_03.bin',"wb") as f:
#     pickle.dump(result_surat, f)

In [None]:
# order = {province[0]: result_bkk.pdq, province[1]: result_cnx.pdq, province[2]: result_kkc.pdq, province[3]: result_ray.pdq, province[4]: result_sara.pdq, province[5]: result_surat.pdq}
# seasonal_order = {province[0]: result_bkk.pdqs, province[1]: result_cnx.pdqs, province[2]: result_kkc.pdqs, province[3]: result_ray.pdqs, province[4]: result_sara.pdqs, province[5]: result_surat.pdqs}
# print(order)
# print(seasonal_order)