In [1]:
import config
import utils
import torch.nn as nn

import torch
import pickle
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import os
import config
import utils
import main_multi as mt

from tqdm import tqdm

# seed 고정
random_seed = 42

torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)


In [2]:
SOURCE_DATASET = 'BeijingPM'

data_root_dir = f'./data/{SOURCE_DATASET}/'
file = [file for file in os.listdir(data_root_dir) if file.endswith('.csv')]
data = pd.read_csv(os.path.join(data_root_dir, file[0]))
data.dropna(inplace=True)
data = data.reset_index(drop=True)

data = data[[ 'DEWP', 'TEMP', 'PRES', 'Iws', 'pm2.5', 'cbwd']] # Multi-task learning

def sequence_preprocessing(data_x, data_y_1, data_y_2, timestep, shift_size):
    X = []
    targets_1 = []
    targets_2 = []

    # Slicing
    for start_idx in  tqdm(range(0, data_x.shape[0] - timestep + 1, shift_size)):
        X.append(data_x[start_idx:start_idx + timestep])

        ### Method1. Last (Window의 마지막 값을 Label로 활용)

        targets_1.append(data_y_1.values[start_idx + timestep - 1])
        targets_2.append(data_y_2.values[start_idx + timestep - 1])

    # Make to array 
    X = np.array(X)
    targets_1 = np.array(targets_1)
    targets_2 = np.array(targets_2)
    

    # (Instace, Features, Timestep)
    X = X.transpose(0, 2, 1)

    return X, targets_1, targets_2

data_target = data.copy()

data_x = data_target[['DEWP', 'TEMP', 'PRES', 'Iws']]
data_y_1 = data_target[['cbwd']]
data_y_2 = data_target[['pm2.5']]

seq_len = 24
x, y_1, y_2 = sequence_preprocessing(data_x, data_y_1, data_y_2, timestep=seq_len, shift_size=1)

label_encoder = LabelEncoder()
y_1 = label_encoder.fit_transform(y_1)

data_type = 'data_multi'

if data_type == 'data_single_1':
    x = x.copy()
    y = y_1.copy()
    
elif data_type == 'data_single_2':
    x = x.copy()
    y = y_2.copy()

elif data_type == 'data_multi':
    x = x.copy()
    y = np.concatenate([y_1.reshape(-1,1), y_2], axis=1)
    

split_ratio = 0.2
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=split_ratio, shuffle=False, random_state=502)
# TODO: Add scaler

train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=split_ratio, shuffle=True, random_state=502)

input_size = train_x.shape[1]

if data_type == 'data_single_1':
    num_classes = 1

elif data_type == 'data_single_2':
    num_classes = 4

    
else:
    num_classes_1 = 4
    num_classes_2 = 1

100%|██████████| 41734/41734 [00:01<00:00, 26248.06it/s]


In [3]:
model_name = 'LSTM_FCNs_multi'
model_params = config.model_config[model_name]

model_params['parameter']['input_size'] = input_size
model_params['best_model_path'] = f'./ckpt/{SOURCE_DATASET}/lstm_fcn_pre_cls_fine.pt'

data_target = mt.Multilearning(model_params,'self') 

pred, acc, mse, MAPE, MAE, R2 = data_target.pred_data(test_x, test_y, best_model_path = model_params['best_model_path'])

pred.to_csv(f'./save/lstm_fcn_pre_cls_fine.csv', index=False)
print(acc)
print(mse)
print(MAPE)
print(MAE)
print(R2)

0.6258536000958428
8888.908669055698
2.3142763978638214
69.81605159146666
0.00046365049896623933


In [4]:
model_name = 'LSTM_FCNs_multi'
model_params = config.model_config[model_name]

model_params['parameter']['input_size'] = input_size
model_params['best_model_path'] = f'./ckpt/{SOURCE_DATASET}/lstm_fcn_pre_reg_fine.pt'

data_target = mt.Multilearning(model_params,'self') 

pred, acc, mse, MAPE, MAE, R2 = data_target.pred_data(test_x, test_y, best_model_path = model_params['best_model_path'])

pred.to_csv(f'./save/lstm_fcn_pre_reg_fine.csv', index=False)
print(acc)
print(mse)
print(MAPE)
print(MAE)
print(R2)

0.6572421229184138
8959.45117766987
2.21100940084827
68.78223809479553
-0.007468684523252556
