In [1]:
#Import the libraries
import numpy as np
import pandas as pd
import pickle
import threading
import warnings
warnings.filterwarnings("ignore")

In [2]:
train_data = pickle.load(open('clean_dataset_2022/train_set.bin', 'rb'))

test_data = pickle.load(open('clean_dataset_2022/test_set.bin', 'rb'))

## SARIMAX

In [3]:
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX

from matplotlib.pyplot import figure

### Resample

In [3]:
province = ['BKK','CNX','KKC','RAY','SARA','SURAT']
exog_columns = ['Temp', 'WindSpeed', 'WindDir']

In [4]:
for p in province:
    train_data[p] = train_data[p].resample('6H').mean()

    tmp_df = test_data[p].copy()
    tmp_df['PM25'] = test_data[p]['PM25'].asfreq(freq='6H')
    test_data[p] = test_data[p].resample('6H').mean()
    test_data[p]['PM25'] = tmp_df['PM25']

### Split 70% 30%

In [5]:
train_set = {} ; valid_set = {}

ratio = 0.7

for p in province:
    train_size, valid_size = int(ratio*train_data[p].shape[0]), int((1-ratio)*train_data[p].shape[0])
    train_set[p], valid_set[p] = train_data[p].iloc[:train_size], train_data[p].iloc[train_size: ]

### Training 6 provinces with *minimal_SARIMAX*

In [6]:
from importlib import reload
from custom_function import minimalSARIMAX

reload(minimalSARIMAX)

from custom_function.minimalSARIMAX import MinimalSARIMAX

##### Tuning Parameters

In [7]:
order = [(2, 0, 0), (2, 1, 0), (2, 1, 0), (2, 1, 0), (1, 0, 1), (2, 0, 0)]
seasonal_order = [(1, 0, 0, 1461), (0, 1, 1, 1461), (0, 1, 1, 1461), (0, 0, 0, 1461), (0, 0, 0, 1461), (0, 1, 0, 1461)]

exog_order = {}
exog_order['Temp'] = [(1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 0, 0), (1, 1, 0), (1, 1, 0)]
exog_order['WindSpeed'] = [(1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 1, 0), (1, 0, 0), (1, 1, 0)]
exog_order['WindDir'] = [(1, 1, 0), (1, 0, 0), (2, 0, 0), (1, 1, 0), (1, 0, 0), (1, 0, 1)]

exog_seasonal_order = {}
exog_seasonal_order['Temp'] = [(0, 1, 1, 1461), (0, 1, 1, 1461), (0, 1, 1, 1461), (0, 0, 1, 1461), (0, 1, 0, 1461), (0, 1, 1, 1461)]
exog_seasonal_order['WindSpeed'] = [(0, 1, 1, 1461), (0, 1, 1, 1461), (0, 1, 1, 1461), (0, 1, 1, 1461), (0, 0, 0, 1461), (0, 1, 1, 1461)]
exog_seasonal_order['WindDir'] = [(1, 1, 0, 1461), (0, 1, 0, 1461), (1, 0, 0, 1461), (1, 1, 0, 1461), (0, 1, 1, 1461), (0, 1, 1, 1461)]

In [8]:
model = {}
model_exog = {}

for i, p in enumerate(province):
    model[p] = MinimalSARIMAX(train_data[p][['PM25']],
                order[i],
                seasonal_order[i],
                exog=train_data[p][exog_columns])
    
    model_exog[p] = {}    
    for exog in exog_columns:
        model_exog[p][exog] = MinimalSARIMAX(train_data[p][[exog]],
                                            exog_order[exog][i],
                                            exog_seasonal_order[exog][i])

In [9]:
for p in province:
    model[p].fit(lr=1e-5, lr_decay=0.999 ,verbose=0)

In [10]:
for p in province:
    for exog in exog_columns:
        if exog == 'WindDir':
            model_exog[p][exog].fit(lr=5e-7, lr_decay=0.999, verbose=0)
        else:
            model_exog[p][exog].fit(lr=1e-5, lr_decay=0.999, verbose=0)

In [11]:
Result = {}
val_pred_sav = {}
y_pred_sav = {}
Valid_Error = {}
Test_Error = {}

In [401]:
for p in province:
    if p in ['BKK','CNX','KKC','RAY']: continue
    Result[p] = model[p].predict_step(valid_set[p][['PM25']],
                                        test_data[p][['PM25']], 
                                        val_X_exog=valid_set[p][exog_columns],
                                        y_exog=test_data[p][exog_columns],
                                        model_exog=model_exog[p],
                                        lr=1e-8, lr_decay=0.9999,
                                        learn=True)

    val_pred_sav[p], y_pred_sav[p], Valid_Error[p], Test_Error[p] = Result[p]

    val_pred_sav[p].to_csv('answer_dataset/pred_and_actual/val_pred_sav_'+p+'.csv')
    y_pred_sav[p].to_csv('answer_dataset/pred_and_actual/y_pred_sav_'+p+'.csv')

In [403]:
for p in province:
    y_pred_sav[p].iloc[:,[1]].to_csv('answer_dataset/submission_answer/y_pred_'+p+'.csv')

In [273]:
# cnx_data = pd.concat((cnx_train, cnx_valid, cnx_test), axis=0)
# bkk_data = pd.concat((bkk_train, bkk_valid, bkk_test), axis=0)

In [274]:
# model_cnx.plot(cnx_data['PM25'], cnx_y_pred['PM25'], "Chiangmai PM2.5 Prediction")

In [275]:
# model['BKK'].plot(model['BKK']['PM25'], bkk_y_pred['PM25'], "Bangkok PM2.5 Prediction")

# Grid Search

In [12]:
import pickle
import itertools
from tqdm import tqdm

In [13]:
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))

In [14]:
pdqs = [(x[0], x[1], x[2], 1461) for x in list(itertools.product(p, d, q))]

In [15]:
# Define function
def sarimax_gridsearch(y_train, y_test, pdq, PDQs, y_val = None, x_train = None, x_test = None, x_val = None, verbose=0, model_exog=None):
    '''
    Input: 
        y_train: training data
        y_test: test data
        pdq : ARIMA combinations 
        pdqs : seasonal ARIMA combinations 
        x_train: exogenous training data
        x_test: exogenous test data

    Return:
        Returns dataframe of parameter combinations with the least RMSE
    '''
    tqdm_disable = True
    if verbose: tqdm_disable = False

    ans_df = pd.DataFrame(columns=['pdq', 'pdqs', 'rmse'])
    
    for comb in tqdm(pdq, disable=tqdm_disable):
        for combs in PDQs:
            p, d, q = comb[0], comb[1], comb[2]
            P, D, Q = combs[0], combs[1], combs[2]
            if (p+q <= 2) and (d <= 1) and (D <= 1) and (P <= 1) and (Q <= 1):  
                # try:
                    model = MinimalSARIMAX(y_train, comb, combs, exog=x_train)
                    model.fit(lr=1e-5, lr_decay=0.999 ,verbose=0) 

                    if (y_val is None):
                        y_pred, err = model.predict(y_test, y_exog=x_test, verbose=0)
                        # y_pred = y_pred[:-1] # remove last value since it's a list of all the predicted values which makes the scoring fail (inconsistent shape with the y_test)
                        rmse = model.scoring(y_pred, y_test)

                    else:
                        Result = model.predict_step(y_val, y_test, val_X_exog=x_val, y_exog=x_test,
                                                    model_exog=model_exog, lr=1e-8, lr_decay=0.9999, learn=False)

                        _, y_pred_sav, _, _ = Result
                        
                        rmse = model.scoring(y_pred_sav.iloc[:,[1]], y_pred_sav.iloc[:,[2]])
                    

                    ans_df = ans_df.append({'pdq':comb, 'pdqs':combs, 'rmse':rmse}, ignore_index=True)
                    # print(f'SARIMAX {comb} x {combs} : RMSE Calculated ={rmse}')
                # except Exception as e: 
                #     # print(e)
                #     continue

    # Convert into a dataframe
    # ans_df = pd.DataFrame(ans, columns=['pdq', 'pdqs', 'rmse'])

    # Sort and return a combination with the lowest RMSE
    ans_df = ans_df.sort_values(by=['rmse'],ascending=True)
    
    return ans_df

In [16]:
ratio = 0.7

pm_train_bkk = train_data[province[0]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_bkk = train_data[province[0]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_bkk = test_data[province[0]][['PM25']]
exo_train_bkk = train_data[province[0]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_bkk = train_data[province[0]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_bkk = test_data[province[0]][exog_columns]


pm_train_cnx = train_data[province[1]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_cnx = train_data[province[1]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_cnx = test_data[province[1]][['PM25']]
exo_train_cnx = train_data[province[1]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_cnx = train_data[province[1]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_cnx = test_data[province[1]][exog_columns]


pm_train_kkc = train_data[province[2]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_kkc = train_data[province[2]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_kkc = test_data[province[2]][['PM25']]
exo_train_kkc = train_data[province[2]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_kkc = train_data[province[2]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_kkc = test_data[province[2]][exog_columns]


pm_train_ray = train_data[province[3]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_ray = train_data[province[3]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_ray = test_data[province[3]][['PM25']]
exo_train_ray = train_data[province[3]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_ray = train_data[province[3]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_ray = test_data[province[3]][exog_columns]


pm_train_sara = train_data[province[4]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_sara = train_data[province[4]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_sara = test_data[province[4]][['PM25']]
exo_train_sara = train_data[province[4]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_sara = train_data[province[4]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_sara = test_data[province[4]][exog_columns]


pm_train_surat = train_data[province[5]][['PM25']][:int(ratio*train_data[province[0]].shape[0])]
pm_valid_surat = train_data[province[5]][['PM25']][int(ratio*train_data[province[0]].shape[0]):]
pm_test_surat = test_data[province[5]][['PM25']]
exo_train_surat = train_data[province[5]][exog_columns][:int(ratio*train_data[province[0]].shape[0])]
exo_valid_surat = train_data[province[5]][exog_columns][int(ratio*train_data[province[0]].shape[0]):]
exo_test_surat = test_data[province[5]][exog_columns]

## Tuning parameters for PM2.5

In [None]:
# result_bkk = sarimax_gridsearch(pm_train_bkk, pm_test_bkk, pdq, pdqs, y_val=pm_valid_bkk, x_train=exo_train_bkk, x_test=exo_test_bkk, x_val=exo_valid_bkk, model_exog=model_exog['BKK'])
# result_cnx = sarimax_gridsearch(pm_train_cnx, pm_test_cnx, pdq, pdqs, y_val=pm_valid_cnx, x_train=exo_train_cnx, x_test=exo_test_cnx, x_val=exo_valid_cnx)
# result_kkc = sarimax_gridsearch(pm_train_kkc, pm_test_kkc, pdq, pdqs, y_val=pm_valid_kkc, x_train=exo_train_kkc, x_test=exo_test_kkc, x_val=exo_valid_kkc)
# result_ray = sarimax_gridsearch(pm_train_ray, pm_test_ray, pdq, pdqs, y_val=pm_valid_ray, x_train=exo_train_ray, x_test=exo_test_ray, x_val=exo_valid_ray)
# result_sara = sarimax_gridsearch(pm_train_sara, pm_test_sara, pdq, pdqs, y_val=pm_valid_sara, x_train=exo_train_sara, x_test=exo_test_sara, x_val=exo_valid_sara)
result_surat = sarimax_gridsearch(pm_train_surat, pm_test_surat, pdq, pdqs, y_val=pm_valid_surat, x_train=exo_train_surat, x_test=exo_test_surat, x_val=exo_valid_surat, model_exog=model_exog['SURAT'])

In [17]:
def pm25_BKK_thread(result):
    try:
        print('Starting BKK thread')
        result['BKK'] = sarimax_gridsearch(pm_train_bkk, pm_test_bkk, pdq, pdqs, y_val=pm_valid_bkk, x_train=exo_train_bkk, x_test=exo_test_bkk, x_val=exo_valid_bkk, model_exog=model_exog['BKK'])
        print('Finished BKK thread')
    except Exception as e:
        print(f'Thread BKK failed: {e}')

def pm25_CNX_thread(result):
    try:
        print('Starting CNX thread')
        result['CNX'] = sarimax_gridsearch(pm_train_cnx, pm_test_cnx, pdq, pdqs, y_val=pm_valid_cnx, x_train=exo_train_cnx, x_test=exo_test_cnx, x_val=exo_valid_cnx, model_exog=model_exog['CNX'])
        print('Finished CNX thread')
    except Exception as e:
        print(f'Thread CNX failed: {e}')

def pm25_KKC_thread(result):
    try:
        print('Starting KKC thread')
        result['KKC'] = sarimax_gridsearch(pm_train_kkc, pm_test_kkc, pdq, pdqs, y_val=pm_valid_kkc, x_train=exo_train_kkc, x_test=exo_test_kkc, x_val=exo_valid_kkc, model_exog=model_exog['KKC'])
        print('Finished KKC thread')
    except Exception as e: 
        print(f'Thread KKC failed: {e}')

def pm25_RAY_thread(result):
    try:
        print('Starting RAY thread')
        result['RAY'] = sarimax_gridsearch(pm_train_ray, pm_test_ray, pdq, pdqs, y_val=pm_valid_ray, x_train=exo_train_ray, x_test=exo_test_ray, x_val=exo_valid_ray, model_exog=model_exog['RAY'])
        print('Finished RAY thread')
    except Exception as e:
        print(f'Thread RAY failed: {e}')

def pm25_SARA_thread(result):
    try:
        print('Starting SARA thread')
        result['SARA'] = sarimax_gridsearch(pm_train_sara, pm_test_sara, pdq, pdqs, y_val=pm_valid_sara, x_train=exo_train_sara, x_test=exo_test_sara, x_val=exo_valid_sara, model_exog=model_exog['SARA'])
        print('Finished SARA thread')
    except Exception as e:
        print(f'Thread SARA failed: {e}')

def pm25_SURAT_thread(result):
    try:
        print('Starting SURAT thread')
        result['SURAT'] = sarimax_gridsearch(pm_train_surat, pm_test_surat, pdq, pdqs, y_val=pm_valid_surat, x_train=exo_train_surat, x_test=exo_test_surat, x_val=exo_valid_surat, model_exog=model_exog['SURAT'])
    except Exception as e:
        print(f'Thread SURAT failed: {e}')

In [18]:
threads = {}
pm25_tuning_result = {}

In [19]:
threads['BKK'] = threading.Thread(target=pm25_BKK_thread, args=(pm25_tuning_result,), daemon=True)
threads['CNX'] = threading.Thread(target=pm25_CNX_thread, args=(pm25_tuning_result,), daemon=True)
threads['KKC'] = threading.Thread(target=pm25_KKC_thread, args=(pm25_tuning_result,), daemon=True)
threads['RAY'] = threading.Thread(target=pm25_RAY_thread, args=(pm25_tuning_result,), daemon=True)
threads['SARA'] = threading.Thread(target=pm25_SARA_thread, args=(pm25_tuning_result,), daemon=True)
threads['SURAT'] = threading.Thread(target=pm25_SURAT_thread, args=(pm25_tuning_result,), daemon=True)

In [20]:
threads

{'BKK': <Thread(Thread-7 (pm25_BKK_thread), initial daemon)>,
 'CNX': <Thread(Thread-8 (pm25_CNX_thread), initial daemon)>,
 'KKC': <Thread(Thread-9 (pm25_KKC_thread), initial daemon)>,
 'RAY': <Thread(Thread-10 (pm25_RAY_thread), initial daemon)>,
 'SARA': <Thread(Thread-11 (pm25_SARA_thread), initial daemon)>,
 'SURAT': <Thread(Thread-12 (pm25_SURAT_thread), initial daemon)>}

In [21]:
pm25_tuning_result

{}

In [None]:
if not threads['SURAT'].is_alive():
    threads['SURAT'].start()

In [21]:
# start the threads
for thread in threads:
    th = threads[thread]
    if not th.is_alive():
        th.start()

Starting BKK thread
Starting CNX thread
Starting KKC thread
Starting RAY thread
Starting SARA thread
Starting SURAT thread
Thread SURAT failed: index 0 is out of bounds for axis 0 with size 0


In [17]:
def test(ans):
    for i in range(0, 10):
        ans[i] = i

In [18]:
anss = {}

In [21]:
if( (not 'testTh' in locals()) or (not testTh.is_alive())):
    testTh = threading.Thread(target=test, args=(anss,), daemon=True)
    testTh.start()

In [22]:
anss

{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}

In [24]:

if( (not 'monitor' in locals()) or (not monitor.is_alive())):
    monitor = threading.Thread(target=test, args=(ans, 10), daemon=True)
    monitor.start()

In [None]:
with open('answer_dataset/resulk_bkk_03.bin',"wb") as f:
    pickle.dump(result_bkk, f)

# with open('answer_dataset/resulk_cnx_03.bin',"wb") as f:
#     pickle.dump(result_cnx, f)
    
# with open('answer_dataset/resulk_ray_03.bin',"wb") as f:
#     pickle.dump(result_bkk, f)

# with open('answer_dataset/resulk_sara_03.bin',"wb") as f:
#     pickle.dump(result_sara, f)

# with open('answer_dataset/resulk_surat_03.bin',"wb") as f:
#     pickle.dump(result_surat, f)

In [29]:
order = {province[0]: result_bkk.pdq, province[1]: result_cnx.pdq, province[2]: result_kkc.pdq, province[3]: result_ray.pdq, province[4]: result_sara.pdq, province[5]: result_surat.pdq}
seasonal_order = {province[0]: result_bkk.pdqs, province[1]: result_cnx.pdqs, province[2]: result_kkc.pdqs, province[3]: result_ray.pdqs, province[4]: result_sara.pdqs, province[5]: result_surat.pdqs}
print(order)
print(seasonal_order)

{'BKK': (2, 1, 0), 'CNX': (1, 0, 1), 'KKC': (2, 1, 0), 'RAY': (2, 1, 0), 'SARA': (2, 0, 0), 'SURAT': (2, 1, 0)}
{'BKK': (1, 1, 0, 1461), 'CNX': (0, 1, 1, 1461), 'KKC': (0, 1, 0, 1461), 'RAY': (0, 1, 0, 1461), 'SARA': (0, 0, 0, 1461), 'SURAT': (0, 1, 1, 1461)}


## Tuning parameters for temperature

In [33]:
temp = exog_columns[0]

temp_train_bkk = pd.DataFrame(exo_train_bkk[temp])
temp_valid_bkk = pd.DataFrame(exo_valid_bkk[temp])
temp_test_bkk = pd.DataFrame(exo_test_bkk[temp])

temp_train_cnx = pd.DataFrame(exo_train_cnx[temp])
temp_valid_cnx = pd.DataFrame(exo_valid_cnx[temp])
temp_test_cnx = pd.DataFrame(exo_test_cnx[temp])

temp_train_kkc = pd.DataFrame(exo_train_kkc[temp])
temp_valid_kkc = pd.DataFrame(exo_valid_kkc[temp])
temp_test_kkc = pd.DataFrame(exo_test_kkc[temp])

temp_train_ray = pd.DataFrame(exo_train_ray[temp])
temp_valid_ray = pd.DataFrame(exo_valid_ray[temp])
temp_test_ray = pd.DataFrame(exo_test_ray[temp])

temp_train_sara = pd.DataFrame(exo_train_sara[temp])
temp_valid_sara = pd.DataFrame(exo_valid_sara[temp])
temp_test_sara = pd.DataFrame(exo_test_sara[temp])

temp_train_surat = pd.DataFrame(exo_train_surat[temp])
temp_valid_surat = pd.DataFrame(exo_valid_surat[temp])
temp_test_surat = pd.DataFrame(exo_test_surat[temp])

In [364]:
gSearch_temp_bkk = sarimax_gridsearch(temp_train_bkk, temp_test_bkk, pdq, pdqs, y_val=temp_valid_bkk)
gSearch_temp_cnx = sarimax_gridsearch(temp_train_cnx, temp_test_cnx, pdq, pdqs, y_val=temp_valid_bkk)
gSearch_temp_kkc = sarimax_gridsearch(temp_train_kkc, temp_test_kkc, pdq, pdqs, y_val=temp_valid_bkk)
gSearch_temp_ray = sarimax_gridsearch(temp_train_ray, temp_test_ray, pdq, pdqs, y_val=temp_valid_bkk)
gSearch_temp_sara = sarimax_gridsearch(temp_train_sara, temp_test_sara, pdq, pdqs, y_val=temp_valid_bkk)
gSearch_temp_surat = sarimax_gridsearch(temp_train_surat, temp_test_surat, pdq, pdqs, y_val=temp_valid_bkk)

In [36]:
temp_order = {province[0]: gSearch_temp_bkk.pdq, province[1]: gSearch_temp_cnx.pdq, province[2]: gSearch_temp_kkc.pdq, province[3]: gSearch_temp_ray.pdq, province[4]: gSearch_temp_sara.pdq, province[5]: gSearch_temp_surat.pdq}
temp_seasonal_order = {province[0]: gSearch_temp_bkk.pdqs, province[1]: gSearch_temp_cnx.pdqs, province[2]: gSearch_temp_kkc.pdqs, province[3]: gSearch_temp_ray.pdqs, province[4]: gSearch_temp_sara.pdqs, province[5]: gSearch_temp_surat.pdqs}
print(temp_order)
print(temp_seasonal_order)

{'BKK': (1, 0, 0), 'CNX': (1, 1, 0), 'KKC': (1, 0, 0), 'RAY': (1, 0, 0), 'SARA': (1, 0, 0), 'SURAT': (1, 0, 0)}
{'BKK': (0, 0, 0, 1461), 'CNX': (0, 0, 0, 1461), 'KKC': (0, 0, 0, 1461), 'RAY': (0, 0, 1, 1461), 'SARA': (0, 0, 0, 1461), 'SURAT': (0, 0, 1, 1461)}


In [None]:
with open('answer_dataset/gSearch_temp_bkk_03.bin',"wb") as f:
    pickle.dump(gSearch_temp_bkk, f)

with open('answer_dataset/gSearch_temp_cnx_03.bin',"wb") as f:
    pickle.dump(gSearch_temp_cnx, f)
    
with open('answer_dataset/gSearch_temp_ray_03.bin',"wb") as f:
    pickle.dump(gSearch_temp_bkk, f)

with open('answer_dataset/gSearch_temp_sara_03.bin',"wb") as f:
    pickle.dump(gSearch_temp_sara, f)

with open('answer_dataset/gSearch_temp_surat_03.bin',"wb") as f:
    pickle.dump(gSearch_temp_surat, f)

## Tuning parameters for windspeed

In [372]:
windSpeed = exog_columns[1]
windSpeed_train_bkk = pd.DataFrame(exo_train_bkk[windSpeed])
windSpeed_valid_bkk = pd.DataFrame(exo_valid_bkk[windSpeed])
windSpeed_test_bkk = pd.DataFrame(exo_test_bkk[windSpeed])

windSpeed_train_cnx = pd.DataFrame(exo_train_cnx[windSpeed])
windSpeed_valid_cnx = pd.DataFrame(exo_valid_cnx[windSpeed])
windSpeed_test_cnx = pd.DataFrame(exo_test_cnx[windSpeed])

windSpeed_train_kkc = pd.DataFrame(exo_train_kkc[windSpeed])
windSpeed_valid_kkc = pd.DataFrame(exo_valid_kkc[windSpeed])
windSpeed_test_kkc = pd.DataFrame(exo_test_kkc[windSpeed])

windSpeed_train_ray = pd.DataFrame(exo_train_ray[windSpeed])
windSpeed_valid_ray = pd.DataFrame(exo_valid_ray[windSpeed])
windSpeed_test_ray = pd.DataFrame(exo_test_ray[windSpeed])

windSpeed_train_sara = pd.DataFrame(exo_train_sara[windSpeed])
windSpeed_valid_sara = pd.DataFrame(exo_valid_sara[windSpeed])
windSpeed_test_sara = pd.DataFrame(exo_test_sara[windSpeed])

windSpeed_train_surat = pd.DataFrame(exo_train_surat[windSpeed])
windSpeed_valid_surat = pd.DataFrame(exo_valid_surat[windSpeed])
windSpeed_test_surat = pd.DataFrame(exo_test_surat[windSpeed])

In [None]:
gSearch_windSpeed_bkk = sarimax_gridsearch(windSpeed_train_bkk, windSpeed_test_bkk, pdq, pdqs, y_val=windSpeed_valid_bkk)
gSearch_windSpeed_cnx = sarimax_gridsearch(windSpeed_train_cnx, windSpeed_test_cnx, pdq, pdqs, y_val=windSpeed_valid_cnx)
gSearch_windSpeed_kkc = sarimax_gridsearch(windSpeed_train_kkc, windSpeed_test_kkc, pdq, pdqs, y_val=windSpeed_valid_kkc)
gSearch_windSpeed_ray = sarimax_gridsearch(windSpeed_train_ray, windSpeed_test_ray, pdq, pdqs, y_val=windSpeed_valid_ray)
gSearch_windSpeed_sara = sarimax_gridsearch(windSpeed_train_sara, windSpeed_test_sara, pdq, pdqs, y_val=windSpeed_valid_sara)
gSearch_windSpeed_surat = sarimax_gridsearch(windSpeed_train_surat, windSpeed_test_surat, pdq, pdqs, y_val=windSpeed_valid_surat)

In [40]:
windSpeed_order = {province[0]: gSearch_windSpeed_bkk.pdq, province[1]: gSearch_windSpeed_cnx.pdq, province[2]: gSearch_windSpeed_kkc.pdq, province[3]: gSearch_windSpeed_ray.pdq, province[4]: gSearch_windSpeed_sara.pdq, province[5]: gSearch_windSpeed_surat.pdq}
windSpeed_seasonal_order = {province[0]: gSearch_windSpeed_bkk.pdqs, province[1]: gSearch_windSpeed_cnx.pdqs, province[2]: gSearch_windSpeed_kkc.pdqs, province[3]: gSearch_windSpeed_ray.pdqs, province[4]: gSearch_windSpeed_sara.pdqs, province[5]: gSearch_windSpeed_surat.pdqs}
print(windSpeed_order)
print(windSpeed_seasonal_order)

{'BKK': (1, 1, 0), 'CNX': (1, 1, 0), 'KKC': (1, 1, 0), 'RAY': (1, 1, 0), 'SARA': (1, 0, 0), 'SURAT': (1, 1, 0)}
{'BKK': (0, 1, 1, 1461), 'CNX': (0, 1, 1, 1461), 'KKC': (0, 1, 1, 1461), 'RAY': (0, 1, 1, 1461), 'SARA': (0, 0, 0, 1461), 'SURAT': (0, 1, 1, 1461)}


## Tuning parameters for wind direction

In [44]:
windDir = exog_columns[2]

windDir_train_bkk = pd.DataFrame(exo_train_bkk[windDir])
windDir_valid_bkk = pd.DataFrame(exo_valid_bkk[windDir])
windDir_test_bkk = pd.DataFrame(exo_test_bkk[windDir])

windDir_train_cnx = pd.DataFrame(exo_train_cnx[windDir])
windDir_valid_cnx = pd.DataFrame(exo_valid_cnx[windDir])
windDir_test_cnx = pd.DataFrame(exo_test_cnx[windDir])

windDir_train_kkc = pd.DataFrame(exo_train_kkc[windDir])
windDir_valid_kkc = pd.DataFrame(exo_valid_kkc[windDir])
windDir_test_kkc = pd.DataFrame(exo_test_kkc[windDir])

windDir_train_ray = pd.DataFrame(exo_train_ray[windDir])
windDir_valid_ray = pd.DataFrame(exo_valid_ray[windDir])
windDir_test_ray = pd.DataFrame(exo_test_ray[windDir])

windDir_train_sara = pd.DataFrame(exo_train_sara[windDir])
windDir_valid_sara = pd.DataFrame(exo_valid_sara[windDir])
windDir_test_sara = pd.DataFrame(exo_test_sara[windDir])

windDir_train_surat = pd.DataFrame(exo_train_surat[windDir])
windDir_valid_surat = pd.DataFrame(exo_valid_surat[windDir])
windDir_test_surat = pd.DataFrame(exo_test_surat[windDir])

In [None]:
gSearch_windDir_bkk = sarimax_gridsearch(windDir_train_bkk, windDir_test_bkk, pdq, pdqs, y_val=windDir_valid_bkk)
gSearch_windDir_cnx = sarimax_gridsearch(windDir_train_cnx, windDir_test_cnx, pdq, pdqs, y_val=windDir_valid_bkk)
gSearch_windDir_kkc = sarimax_gridsearch(windDir_train_kkc, windDir_test_kkc, pdq, pdqs, y_val=windDir_valid_bkk)
gSearch_windDir_ray = sarimax_gridsearch(windDir_train_ray, windDir_test_ray, pdq, pdqs, y_val=windDir_valid_bkk)
gSearch_windDir_sara = sarimax_gridsearch(windDir_train_sara, windDir_test_sara, pdq, pdqs, y_val=windDir_valid_bkk)
gSearch_windDir_surat = sarimax_gridsearch(windDir_train_surat, windDir_test_surat, pdq, pdqs, y_val=windDir_valid_bkk)

In [46]:
windDir_order = {province[0]: gSearch_windDir_bkk.pdq, province[1]: gSearch_windDir_cnx.pdq, province[2]: gSearch_windDir_kkc.pdq, province[3]: gSearch_windDir_ray.pdq, province[4]: gSearch_windDir_sara.pdq, province[5]: gSearch_windDir_surat.pdq}
windDir_seasonal_order = {province[0]: gSearch_windDir_bkk.pdqs, province[1]: gSearch_windDir_cnx.pdqs, province[2]: gSearch_windDir_kkc.pdqs, province[3]: gSearch_windDir_ray.pdqs, province[4]: gSearch_windDir_sara.pdqs, province[5]: gSearch_windDir_surat.pdqs}
print(windDir_order)
print(windDir_seasonal_order)

{'BKK': (1, 1, 0), 'CNX': (1, 1, 0), 'KKC': (2, 1, 0), 'RAY': (1, 0, 1), 'SARA': (1, 1, 0), 'SURAT': (1, 1, 1)}
{'BKK': (0, 1, 1, 1461), 'CNX': (1, 1, 0, 1461), 'KKC': (1, 1, 0, 1461), 'RAY': (0, 1, 0, 1461), 'SARA': (0, 0, 1, 1461), 'SURAT': (0, 1, 0, 1461)}


In [None]:
with open('answer_dataset/gSearch_windDir_bkk_03.bin',"wb") as f:
    pickle.dump(gSearch_windDir_bkk, f)

with open('answer_dataset/gSearch_windDir_cnx_03.bin',"wb") as f:
    pickle.dump(gSearch_windDir_cnx, f)
    
with open('answer_dataset/gSearch_windDir_ray_03.bin',"wb") as f:
    pickle.dump(gSearch_windDir_bkk, f)

with open('answer_dataset/gSearch_windDir_sara_03.bin',"wb") as f:
    pickle.dump(gSearch_windDir_sara, f)

with open('answer_dataset/gSearch_windDir_surat_03.bin',"wb") as f:
    pickle.dump(gSearch_windDir_surat, f)