In [1]:
import sys, os
import pandas as pd
sys.path.append("../../../../")

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"{device}" " is available.")

  from .autonotebook import tqdm as notebook_tqdm


cpu is available.


### Prepare Dataset

In [2]:
### 1. Read data from .CSV
dataFolderName = "data_integrated_result"
dataFolderPath = os.path.join(os.getcwd(), dataFolderName)

dataName_X = "IntegrationTrainX"
dataName_y = "IntegrationTrainy"

fileName_X = os.path.join(dataFolderPath, dataName_X +'.csv')
fileName_y = os.path.join(dataFolderPath, dataName_y +'.csv')

data_X = pd.read_csv(fileName_X, index_col='datetime', infer_datetime_format=True, parse_dates=['datetime'])
data_y = pd.read_csv(fileName_y, index_col='datetime', infer_datetime_format=True, parse_dates=['datetime'])

### 2. Preprocessing (e.g., cleaning, scaling, ...)
from Clust.clust.ML.common import ML_pipeline

scaler_param='scale'
scale_method='minmax'
scaler_path = './scaler/'

# X Data Scaling
from Clust.clust.ML.tool import scaler
scalerRootPath_X = os.path.join(scaler_path, dataName_X)
dataX_scaled, X_scalerFilePath = scaler.get_data_scaler('scale', scalerRootPath_X, data_X, scale_method)   
    
# y Data Scaling
scalerRootPath_y = os.path.join(scaler_path, dataName_y)
datay_scaled, y_scalerFilePath = scaler.get_data_scaler('scale', scalerRootPath_y, data_y, scale_method)

### 3. Split train/val data
from Clust.clust.ML.common import tool
from Clust.clust.transformation.purpose import machineLearning as ML

split_ratio = 0.8
split_mode = 'windows_split'
day_window_size = tool.get_default_day_window_size(dataX_scaled)

train_X, val_X = ML.split_data_by_ratio(dataX_scaled, split_ratio, split_mode, day_window_size)
train_y, val_y = ML.split_data_by_ratio(datay_scaled, split_ratio, None, None)

### 4. Transfrom train/val data
max_nan_limit_ratio = 0.5
transformParameter = {
    'past_step':day_window_size,
    'max_nan_limit_ratio': max_nan_limit_ratio
}

train_X_array, train_y_array = ML_pipeline.transform_data_by_split_mode(split_mode, transformParameter, train_X, train_y)
val_X_array, val_y_array = ML_pipeline.transform_data_by_split_mode(split_mode, transformParameter, val_X, val_y)

New json file is created from data.json file
['RH_1', 'RH_2', 'RH_3', 'RH_4', 'RH_5', 'RH_6', 'RH_7', 'RH_8', 'RH_9', 'T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'Press_mm_hg', 'RH_out', 'T_out', 'Tdewpoint', 'Visibility', 'Windspeed']
Make New scaler File
New json file is created from data.json file
['value']
Make New scaler File
window_size: 144 nan_limit_num: 72
(10944, 24) (76, 144, 24)
(76, 1) (76, 1)
window_size: 144 nan_limit_num: 72
(2736, 24) (19, 144, 24)
(19, 1) (19, 1)


### Set Model and train parameter

In [3]:
# RNN models (RNN, LSTM, GRU) parameters
seq_len, input_size = train_X_array.shape[1], train_X_array.shape[2]
model_method = 'GRU_rg' # Set model methods i.e., 'LSTM_rg', 'GRU_rg', 'CNN_1D_rg', 'LSTM_FCNs_rg', 'FC_rg' 
if model_method == 'LSTM_rg' or model_method == 'GRU_rg':
    modelParameter = {
        'rnn_type': 'lstm',
        'input_size': input_size, 
        'hidden_size': 64,
        'num_layers': 2,
        'output_dim': 1, 
        'dropout': 0.1, 
        'bidirectional': True
    }
# CNN_1D model parameters
elif model_method == 'CNN_1D_rg':
    modelParameter = {
    'input_size': input_size,
    'seq_len': seq_len,
    'output_channels': 64,
    'kernel_size': 3,
    'stride': 1,
    'padding': 0, 
    'dropout': 0.1
    }
# LSTM_FCNs model parameters
elif model_method == 'LSTM_FCNs_rg':
    modelParameter = {
    'input_size': input_size,
    'num_layers': 2,
    'lstm_dropout': 0.4,
    'fc_dropout': 0.1
    }
# FC model parameters
elif model_method == 'FC_rg':
    modelParameter = {
    'input_size': input_size,
    'dropout': 0.1,
    'bias': True
    }
    
train_parameter = {
    'lr': 1e-4,
    'weight_decay': 1e-6, 
    'device': 'cpu', 
    'n_epochs': 10, 
    'batch_size': 16
}
model_name = None
model_file_path = None

### Start Training

In [4]:
model_file_path = "./"

from Clust.clust.ML.regression_YK.train import RegressionTrain as RML

rml = RML()
rml.set_param(train_parameter)
rml.set_model(model_method, modelParameter)
rml.set_data(train_X_array, train_y_array, val_X_array, val_y_array)
rml.train()
#rml.save_best_model(model_file_path)

Start training model
[1/10] Training loss: 0.2012	 Validation loss: 0.1556
[2/10] Training loss: 0.2028	 Validation loss: 0.1438
[3/10] Training loss: 0.1854	 Validation loss: 0.1323
[4/10] Training loss: 0.1698	 Validation loss: 0.1210
[5/10] Training loss: 0.1597	 Validation loss: 0.1100
[6/10] Training loss: 0.1489	 Validation loss: 0.0992
[7/10] Training loss: 0.1277	 Validation loss: 0.0887
[8/10] Training loss: 0.1307	 Validation loss: 0.0782
[9/10] Training loss: 0.1124	 Validation loss: 0.0683
[10/10] Training loss: 0.1008	 Validation loss: 0.0587

Training complete in 0m 12s
