# Train Random Forest

In [None]:
from utils.ML import time_series_dataframe_ML, make_dataframe
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
import numpy as np
import torch
from utils.DL import *

In [None]:
# Load Data
df_ts_ML, _, _ = time_series_dataframe_ML()
window_size, stride = 120, 40

# Load Dataset
df_ML = make_dataframe(window_size, stride)
X = df_ML.iloc[:, :9].values
y = df_ML['label'].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42, shuffle = True)

# Scale Data
scaler_ML = MinMaxScaler()
X_train_scaled = scaler_ML.fit_transform(X_train)
x_test_scaled = scaler_ML.transform(X_test)

In [None]:
# Build Random Forest
RFC = RandomForestClassifier(n_estimators=50, max_depth = 30, random_state = 42, min_samples_leaf=8, min_samples_split=8)

# Train Random Forest
RFC.fit(X_train_scaled, y_train)

# Predict
RFC_predict = RFC.predict(x_test_scaled)

# Evaluate
RFC_ac = accuracy_score(y_test, RFC_predict)

print("Random Forest Classifier Precition Accuracy : ", RFC_ac)

# Second Step

In [None]:
# Hyper Parameter
input_window = 240
output_window = 120
epochs = 500
batch_size = 1024
df_DL = time_series_dataframe_ML()

# Split Train Test
df_DL_temp = df_DL['TEMP'].values
df_DL_label = df_DL['label'].values

scaler_train = MinMaxScaler()
scaler_test = MinMaxScaler()

train_len = int(len(df_DL) * 0.8)

train_data = df_DL_temp[:train_len]
test_data = df_DL_temp[train_len:]
train_label = df_DL_label[:train_len]
test_label = df_DL_label[train_len:]

train_data = scaler_train.fit_transform(train_data.reshape(-1,1)).reshape(-1)
test_data = scaler_test.fit_transform(test_data.reshape(-1,1)).reshape(-1)

# Make Data to PyTorch Tensor
train_data, _ = new_multistep_time_series(train_data, train_label, input_window, output_window)
test_data, test_label = new_multistep_time_series(test_data, test_label, input_window, output_window)

# Setting
lr = 0.001
device = 'cuda' if torch.cuda.is_available else 'cpu'
model = TransAm().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)

train_data = train_data.to(device)
test_data = test_data.to(device)

In [None]:
best_val_loss = float("inf")
best_model = None

for epoch in range(1, epochs + 1):
    start_time = time.time()
    train_tmp(model, train_data,batch_size, optimizer, criterion, input_window, output_window, epoch, scheduler)
    
    if (epoch % 20 == 0):
        truth, test_result, result_to_ML, val_loss = plot_and_loss2(model, test_data, criterion,input_window, output_window, scaler_test)
    else:
        val_loss = evaluate2(model, test_data, criterion, output_window, input_window)
    
    print('-' * 90)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f} |'.format(epoch, (time.time() - start_time),
                                        val_loss, math.exp(val_loss)))
    print('-' * 90)
    
    scheduler.step()
    

PATH = "./weights/"
torch.save(model, PATH + 'model_150.pt')
torch.save(model.state_dict(), PATH + '150_model_state_dict.pt')

In [None]:
best_val_loss = float("inf")
best_model = None

for epoch in range(1, epochs + 1):
    start_time = time.time()
    train_tmp(model, train_data,batch_size, optimizer, criterion, input_window, output_window, epoch, scheduler)
    
    if (epoch % 20 == 0):
        truth, test_result, result_to_ML, val_loss = plot_and_loss2(model, test_data, criterion,input_window, output_window, scaler_test)
    else:
        val_loss = evaluate2(model, test_data, criterion, output_window, input_window)
    
    print('-' * 90)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f} |'.format(epoch, (time.time() - start_time),
                                        val_loss, math.exp(val_loss)))
    print('-' * 90)
    
    scheduler.step()
    

PATH = "./weights/"
torch.save(model, PATH + '240_model.pt')
torch.save(model.state_dict(), PATH + '240_model_state_dict.pt')

# Infernece

In [None]:
import torch
import sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error
# sklearn.set_config(array_api_dispatch=True)
PATH = './weights/final/'
model = torch.load(PATH + '120_model.pt') 
model.load_state_dict(torch.load(PATH + '120_model_state_dict.pt'))  

DataFrame = {'MEAN_TEMP' : [], 'STD' : [], 'MIN' : [], 'MAX' : [], 'SKEW' : [], 'KURT' : [], 'MEDIAN':[], '25%' : [], '75%' : [], 'label' : []}

def DF_to_DataFrame(data):
    mean = np.round(np.mean(data), 3)
    min = np.min(data)
    max = np.max(data)
    std = np.std(data)
    median = data.median()
    sk = data.skew()
    kurt = data.kurt()
    a, b = np.percentile(data, q = [25,75])
    
    return mean, min, max, std, median, sk, kurt, a, b

test_input = test_data[:, 0, :]
test_input = test_input.unsqueeze(1)
test_input = test_input.transpose(0, 2)
test_input.shape

test_target = test_data[:, 1, :]
test_target = test_target.unsqueeze(1)
test_target = test_target.transpose(0, 2)
test_target.shape

def test_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def test_input_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def test_target_batch(data, index):
    data = data[:, :, index].unsqueeze(2)
    return data

def r2_score(target, output):
    avg = target.mean()
    avg = torch.full_like(target, avg)
    up = mean_squared_error(target, output)
    down = mean_squared_error(target, avg)
    r2 = 1- (up / down)
    return r2

def test_rmse_mae_r2(target, output):
    rmse = np.sqrt(mean_squared_error(target, output))
    mae = mean_absolute_error(target, output)
    r2 = r2_score(target, output)
    return rmse, mae, r2

rmse_list = []
mae_list = []
r2_list = []

model.eval()
with torch.no_grad():
    for i in tqdm(range(len(test_data))):
        input, target = test_input_batch(test_input, i), test_target_batch(test_target, i)
        output = model(input)

        output = output[-120:].detach().cpu()
        target = target[-120:].detach().cpu()

        rmse, mae, r2 = test_rmse_mae_r2(target[-120:].view(-1), output[-120:].view(-1))
        rmse_list.append(rmse)
        mae_list.append(mae)
        r2_list.append(r2)

        output = scaler_test.inverse_transform(output.reshape(-1,1)).reshape(-1)
        output = list(output)
        output = pd.DataFrame({'TEMP' : output})
        A = output['TEMP']
        mean, min, max, std, median, skew, kurt, a, b, = DF_to_DataFrame(A)
        DataFrame['label'].append(test_label[i])
        DataFrame['MEAN_TEMP'].append(mean)
        DataFrame['MIN'].append(min)
        DataFrame['MAX'].append(max)
        DataFrame['STD'].append(std)
        DataFrame['SKEW'].append(skew)
        DataFrame['KURT'].append(kurt)
        DataFrame['MEDIAN'].append(np.round(median,3))
        DataFrame['25%'].append(np.round(a,3))
        DataFrame['75%'].append(np.round(b,3))
    
print(f"Average RMSE : {np.mean(rmse_list)}")
print(f"Average MAE : {np.mean(mae_list)}")
print(f"Average R2 : {np.mean(r2_list)}")
DF = pd.DataFrame(DataFrame)
DF