In [2]:
from tqdm import tqdm
from utils import set_logger
from dataset import FinedustDataset
from model import FinedustLSTM
from utils import prepare_data
from sklearn.model_selection import train_test_split
import os
import logging
from interpolate import simple_interpolate
import numpy as np
from preprocess import minmax_scaling
from train import train
from permutation import compute_permutation_importance

import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader

* 서쪽 지방 추가 task
* 가설: 서부 지역은 중국발 미세먼지 영향을 크게 받을 것이다.

In [4]:
regions = ["Seoul", "Jeonju", "Gwangju"]

In [7]:
config = {
    "learning_rate": 1e-4,
    "epochs": 500,
    "batch_size": 32,
    "num_layers": 2,
    "hidden_size": 128,
    "window_size": 24,
    "output_size": 1,
    "dropout": 0.2,
    "patience": 10,
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
for region in regions:
  if region == "Jeonju":
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
  else:
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
  df = prepare_data(region.lower(), columns_to_remove, include_china=True)
  df = simple_interpolate(df, method="linear")
  df, scaler = minmax_scaling(df)

  dataset = FinedustDataset(df,
                            window_size=config["window_size"],
                            prediction_length=config["output_size"],
                            time_window=3)
  train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
  train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

  model = FinedustLSTM(input_size=len(dataset.feature_columns),
                              hidden_size=config['hidden_size'],
                              num_layers=config['num_layers'],
                              output_size=config['output_size'],
                              dropout_prob=config['dropout']).to(device)

  total_preds, losses = train(model,
                              train_dataset,
                              val_dataset,
                              dataset.feature_columns,
                              region, "LSTM", config,
                              device)
  pred_y = np.concatenate(total_preds, axis=0)
  val_y = np.concatenate([y for x, y in val_dataset])