In [1]:
from tqdm import tqdm
from utils import set_logger
from dataset import MLPDataset
from model import MLP
from utils import prepare_data, get_model_size
from sklearn.model_selection import train_test_split
import os
import logging
from interpolate import simple_interpolate
import numpy as np
from preprocess import minmax_scaling
from train import train

import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader

In [2]:
config = {
    "learning_rate": 1e-4,
    "epochs": 500,
    "batch_size": 32,
    "output_size": 1,
    "dropout_prob": 0.2,
    "patience": 10,
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# MLP

In [3]:
region = "Andong"

if region == "Jeonju":
  columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
else:
  columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
df = prepare_data(region.lower(), columns_to_remove)
df = simple_interpolate(df, method="linear")
df, scaler = minmax_scaling(df)

dataset = MLPDataset(df)
dataset[0]

  data = data.interpolate(method='linear')


(tensor([ 0.8889,  0.1064,  0.2118,  0.3671,  0.5900,  0.0848,  0.2484,  0.1375,
         46.0000]),
 tensor([46.]))

In [4]:
regions = ["Andong", "Seoul", "Jeonju", "Daegu", "Gwangju"]

pred_list, loss_list = [], []

for region in regions:
  if region == "Jeonju":
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
  else:
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
  df = prepare_data(region.lower(), columns_to_remove)
  df = simple_interpolate(df, method="linear")
  df, scaler = minmax_scaling(df)

  dataset = MLPDataset(df)
  train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
  train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

  model = MLP(input_size=len(dataset.feature_columns),
              dropout_prob=config["dropout_prob"]).to(device)

  total_preds, losses = train(model,
                              train_dataset,
                              val_dataset,
                              dataset.feature_columns,
                              region, "LSTM", config,
                              device)
  pred_y = np.concatenate(total_preds, axis=0)
  val_y = np.concatenate([y for x, y in val_dataset])

  pred_list.append((pred_y, val_y))
  loss_list.append(losses)

  data = data.interpolate(method='linear')
  0%|          | 0/500 [00:01<?, ?it/s]


AttributeError: '_pickle.Pickler' object attribute 'persistent_id' is read-only

* 전주와 광주에서 underfitting 발생

* hidden_size => 256

In [35]:
config = {
    "learning_rate": 1e-4,
    "epochs": 200,
    "batch_size": 32,
    "num_layers": 2,
    "hidden_size": 128,
    "window_size": 24,
    "output_size": 1,
    "dropout": 0.1,
    "patience": 10,
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [34]:
region = "Gwangju"

if region == "Jeonju":
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
else:
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
    
df = prepare_data(region.lower(), columns_to_remove)
df = simple_interpolate(df, method="linear")
df, scaler = minmax_scaling(df)

dataset = FinedustDataset(df,
                          window_size=config["window_size"],
                          prediction_length=config["output_size"],
                          time_window=1)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

model = FinedustLSTM(input_size=len(dataset.feature_columns),
                            hidden_size=config['hidden_size'],
                            num_layers=config['num_layers'],
                            output_size=config['output_size'],
                            dropout_prob=config['dropout']).to(device)

total_preds, losses = train(model,
                            train_dataset,
                            val_dataset,
                            dataset.feature_columns,
                            region, "LSTM", config,
                            device)
pred_y = np.concatenate(total_preds, axis=0)
val_y = np.concatenate([y for x, y in val_dataset])

  data = data.interpolate(method='linear')
  0%|          | 1/200 [00:02<06:40,  2.01s/it]

Epoch 1/200, Train Loss: 30.3775, Val Loss: 24.3033


  1%|          | 2/200 [00:04<06:37,  2.01s/it]

Epoch 2/200, Train Loss: 29.4037, Val Loss: 24.2723


  2%|▏         | 3/200 [00:06<06:35,  2.01s/it]

Epoch 3/200, Train Loss: 29.3252, Val Loss: 24.3024


  2%|▏         | 4/200 [00:08<06:32,  2.00s/it]

Epoch 4/200, Train Loss: 29.3586, Val Loss: 24.4422


  2%|▎         | 5/200 [00:10<06:29,  2.00s/it]

Epoch 5/200, Train Loss: 29.2659, Val Loss: 24.2836


  3%|▎         | 6/200 [00:11<06:26,  1.99s/it]

Epoch 6/200, Train Loss: 29.3774, Val Loss: 24.3041


  4%|▎         | 7/200 [00:14<06:26,  2.00s/it]

Epoch 7/200, Train Loss: 28.9108, Val Loss: 24.2548


  4%|▍         | 8/200 [00:16<06:24,  2.00s/it]

Epoch 8/200, Train Loss: 29.1398, Val Loss: 24.0023


  4%|▍         | 9/200 [00:17<06:21,  2.00s/it]

Epoch 9/200, Train Loss: 27.8519, Val Loss: 22.2044


  5%|▌         | 10/200 [00:19<06:19,  2.00s/it]

Epoch 10/200, Train Loss: 27.5199, Val Loss: 21.8355


  6%|▌         | 11/200 [00:22<06:18,  2.00s/it]

Epoch 11/200, Train Loss: 27.2310, Val Loss: 21.8318


  6%|▌         | 12/200 [00:24<06:18,  2.02s/it]

Epoch 12/200, Train Loss: 27.0363, Val Loss: 21.7011


  6%|▋         | 13/200 [00:26<06:19,  2.03s/it]

Epoch 13/200, Train Loss: 27.0999, Val Loss: 21.6421


  7%|▋         | 14/200 [00:28<06:18,  2.04s/it]

Epoch 14/200, Train Loss: 26.8827, Val Loss: 21.6241


  8%|▊         | 15/200 [00:30<06:17,  2.04s/it]

Epoch 15/200, Train Loss: 26.8700, Val Loss: 22.0341


  8%|▊         | 16/200 [00:32<06:15,  2.04s/it]

Epoch 16/200, Train Loss: 26.7769, Val Loss: 21.2514


  8%|▊         | 17/200 [00:34<06:13,  2.04s/it]

Epoch 17/200, Train Loss: 26.5006, Val Loss: 21.6641


  9%|▉         | 18/200 [00:36<06:12,  2.05s/it]

Epoch 18/200, Train Loss: 26.5181, Val Loss: 21.1155


 10%|▉         | 19/200 [00:38<06:10,  2.05s/it]

Epoch 19/200, Train Loss: 26.2873, Val Loss: 20.8790


 10%|█         | 20/200 [00:40<06:12,  2.07s/it]

Epoch 20/200, Train Loss: 26.0401, Val Loss: 21.4747


 10%|█         | 21/200 [00:42<06:09,  2.06s/it]

Epoch 21/200, Train Loss: 26.0532, Val Loss: 21.1493


 11%|█         | 22/200 [00:44<06:05,  2.05s/it]

Epoch 22/200, Train Loss: 26.0168, Val Loss: 21.2493


 12%|█▏        | 23/200 [00:46<06:04,  2.06s/it]

Epoch 23/200, Train Loss: 26.0997, Val Loss: 20.6171


 12%|█▏        | 24/200 [00:48<06:00,  2.05s/it]

Epoch 24/200, Train Loss: 25.8641, Val Loss: 20.6194


 12%|█▎        | 25/200 [00:50<05:59,  2.06s/it]

Epoch 25/200, Train Loss: 26.0739, Val Loss: 21.3766


 13%|█▎        | 26/200 [00:52<05:57,  2.05s/it]

Epoch 26/200, Train Loss: 25.7618, Val Loss: 22.2660


 14%|█▎        | 27/200 [00:54<05:56,  2.06s/it]

Epoch 27/200, Train Loss: 25.7764, Val Loss: 23.2514


 14%|█▍        | 28/200 [00:56<05:54,  2.06s/it]

Epoch 28/200, Train Loss: 25.6278, Val Loss: 20.3114


 14%|█▍        | 29/200 [00:59<05:53,  2.07s/it]

Epoch 29/200, Train Loss: 25.6266, Val Loss: 20.5773


 15%|█▌        | 30/200 [01:01<05:51,  2.07s/it]

Epoch 30/200, Train Loss: 25.2725, Val Loss: 20.2125


 16%|█▌        | 31/200 [01:03<05:49,  2.07s/it]

Epoch 31/200, Train Loss: 25.4393, Val Loss: 20.5389


 16%|█▌        | 32/200 [01:05<05:48,  2.07s/it]

Epoch 32/200, Train Loss: 25.8532, Val Loss: 20.2217


 16%|█▋        | 33/200 [01:07<05:46,  2.07s/it]

Epoch 33/200, Train Loss: 25.5386, Val Loss: 20.5461


 17%|█▋        | 34/200 [01:09<05:45,  2.08s/it]

Epoch 34/200, Train Loss: 24.7752, Val Loss: 20.5525


 18%|█▊        | 35/200 [01:11<05:40,  2.06s/it]

Epoch 35/200, Train Loss: 25.9238, Val Loss: 20.3416


 18%|█▊        | 36/200 [01:13<05:35,  2.04s/it]

Epoch 36/200, Train Loss: 26.4463, Val Loss: 20.6761


 18%|█▊        | 37/200 [01:15<05:31,  2.04s/it]

Epoch 37/200, Train Loss: 25.2591, Val Loss: 20.2208


 19%|█▉        | 38/200 [01:17<05:30,  2.04s/it]

Epoch 38/200, Train Loss: 25.2861, Val Loss: 20.5120


 20%|█▉        | 39/200 [01:19<05:27,  2.03s/it]

Epoch 39/200, Train Loss: 24.8693, Val Loss: 20.0092


 20%|██        | 40/200 [01:21<05:24,  2.03s/it]

Epoch 40/200, Train Loss: 24.8397, Val Loss: 20.0626


 20%|██        | 41/200 [01:23<05:22,  2.03s/it]

Epoch 41/200, Train Loss: 24.7317, Val Loss: 20.2366


 21%|██        | 42/200 [01:25<05:20,  2.03s/it]

Epoch 42/200, Train Loss: 24.9119, Val Loss: 20.4847


 22%|██▏       | 43/200 [01:27<05:16,  2.02s/it]

Epoch 43/200, Train Loss: 24.1853, Val Loss: 20.0322


 22%|██▏       | 44/200 [01:29<05:14,  2.02s/it]

Epoch 44/200, Train Loss: 24.4714, Val Loss: 21.6948


 22%|██▎       | 45/200 [01:31<05:12,  2.02s/it]

Epoch 45/200, Train Loss: 24.2151, Val Loss: 22.4740


 23%|██▎       | 46/200 [01:33<05:08,  2.01s/it]

Epoch 46/200, Train Loss: 24.5161, Val Loss: 19.9242


 24%|██▎       | 47/200 [01:35<05:08,  2.02s/it]

Epoch 47/200, Train Loss: 24.5564, Val Loss: 19.7680


 24%|██▍       | 48/200 [01:37<05:06,  2.02s/it]

Epoch 48/200, Train Loss: 23.7142, Val Loss: 19.9748


 24%|██▍       | 49/200 [01:39<05:04,  2.02s/it]

Epoch 49/200, Train Loss: 24.1512, Val Loss: 19.9158


 25%|██▌       | 50/200 [01:41<05:02,  2.02s/it]

Epoch 50/200, Train Loss: 24.0537, Val Loss: 19.6592


 26%|██▌       | 51/200 [01:43<05:01,  2.02s/it]

Epoch 51/200, Train Loss: 25.3580, Val Loss: 20.7361


 26%|██▌       | 52/200 [01:45<04:59,  2.02s/it]

Epoch 52/200, Train Loss: 24.6943, Val Loss: 20.0116


 26%|██▋       | 53/200 [01:47<04:57,  2.02s/it]

Epoch 53/200, Train Loss: 23.8922, Val Loss: 20.3265


 27%|██▋       | 54/200 [01:49<04:56,  2.03s/it]

Epoch 54/200, Train Loss: 23.7886, Val Loss: 20.2400


 28%|██▊       | 55/200 [01:51<04:53,  2.03s/it]

Epoch 55/200, Train Loss: 24.4157, Val Loss: 20.6088


 28%|██▊       | 56/200 [01:53<04:51,  2.02s/it]

Epoch 56/200, Train Loss: 24.7995, Val Loss: 19.9049


 28%|██▊       | 57/200 [01:55<04:47,  2.01s/it]

Epoch 57/200, Train Loss: 23.6750, Val Loss: 19.9756


 29%|██▉       | 58/200 [01:57<04:44,  2.01s/it]

Epoch 58/200, Train Loss: 24.0980, Val Loss: 20.6970


 30%|██▉       | 59/200 [01:59<04:42,  2.00s/it]

Epoch 59/200, Train Loss: 23.9938, Val Loss: 19.5168


 30%|███       | 60/200 [02:01<04:39,  2.00s/it]

Epoch 60/200, Train Loss: 23.9725, Val Loss: 20.1608


 30%|███       | 61/200 [02:03<04:37,  2.00s/it]

Epoch 61/200, Train Loss: 23.6038, Val Loss: 23.3401


 31%|███       | 62/200 [02:05<04:35,  2.00s/it]

Epoch 62/200, Train Loss: 24.5552, Val Loss: 20.2552


 32%|███▏      | 63/200 [02:07<04:34,  2.00s/it]

Epoch 63/200, Train Loss: 24.3800, Val Loss: 19.5372


 32%|███▏      | 64/200 [02:09<04:31,  2.00s/it]

Epoch 64/200, Train Loss: 23.3832, Val Loss: 20.5423


 32%|███▎      | 65/200 [02:11<04:29,  1.99s/it]

Epoch 65/200, Train Loss: 23.8855, Val Loss: 20.6251


 33%|███▎      | 66/200 [02:13<04:27,  2.00s/it]

Epoch 66/200, Train Loss: 24.0106, Val Loss: 19.9890


 34%|███▎      | 67/200 [02:15<04:27,  2.01s/it]

Epoch 67/200, Train Loss: 23.9221, Val Loss: 19.2508


 34%|███▍      | 68/200 [02:17<04:26,  2.02s/it]

Epoch 68/200, Train Loss: 23.8614, Val Loss: 19.5137


 34%|███▍      | 69/200 [02:19<04:25,  2.03s/it]

Epoch 69/200, Train Loss: 23.4156, Val Loss: 20.4969


 35%|███▌      | 70/200 [02:22<04:24,  2.04s/it]

Epoch 70/200, Train Loss: 24.4978, Val Loss: 20.0998


 36%|███▌      | 71/200 [02:24<04:22,  2.04s/it]

Epoch 71/200, Train Loss: 22.8663, Val Loss: 20.8340


 36%|███▌      | 72/200 [02:26<04:19,  2.03s/it]

Epoch 72/200, Train Loss: 23.0604, Val Loss: 22.6961


 36%|███▋      | 73/200 [02:28<04:17,  2.03s/it]

Epoch 73/200, Train Loss: 24.1852, Val Loss: 19.2914


 37%|███▋      | 74/200 [02:30<04:14,  2.02s/it]

Epoch 74/200, Train Loss: 23.6965, Val Loss: 22.1401


 38%|███▊      | 75/200 [02:32<04:13,  2.03s/it]

Epoch 75/200, Train Loss: 23.4287, Val Loss: 24.2997


 38%|███▊      | 76/200 [02:34<04:12,  2.03s/it]

Epoch 76/200, Train Loss: 25.2223, Val Loss: 19.6870


 38%|███▊      | 76/200 [02:36<04:14,  2.06s/it]

Epoch 77/200, Train Loss: 23.3147, Val Loss: 19.3380





In [None]:
region = "Gwangju"

if region == "Jeonju":
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
else:
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
    
df = prepare_data(region.lower(), columns_to_remove)
df = simple_interpolate(df, method="linear")
df, scaler = minmax_scaling(df)

dataset = FinedustDataset(df,
                          window_size=config["window_size"],
                          prediction_length=config["output_size"],
                          time_window=1)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

model = FinedustLSTM(input_size=len(dataset.feature_columns),
                            hidden_size=config['hidden_size'],
                            num_layers=config['num_layers'],
                            output_size=config['output_size'],
                            dropout_prob=config['dropout']).to(device)

total_preds, losses = train(model,
                            train_dataset,
                            val_dataset,
                            dataset.feature_columns,
                            region, "LSTM", config,
                            device)
pred_y = np.concatenate(total_preds, axis=0)
val_y = np.concatenate([y for x, y in val_dataset])

In [36]:
import numpy as np
from copy import deepcopy
import torch
import torch.nn as nn

def compute_permutation_importance(model, val_loader, feature_columns, device, loss_fn=nn.L1Loss()):
    model.eval()

    # Compute baseline performance
    baseline_loss = 0.0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device)
            y = y.to(device)
            preds = model(x)
            loss = loss_fn(preds, y)
            baseline_loss += loss.item() * x.size(0)
            all_preds.append(preds.cpu().numpy())
            all_targets.append(y.cpu().numpy())

    baseline_loss /= len(val_loader.dataset)
    print(f"Baseline MSE Loss: {baseline_loss:.4f}")

    # Initialize importance dictionary
    feature_importances = {}

    # Convert validation data to a single tensor for manipulation
    all_x = []
    all_y = []
    for x, y in val_loader:
        all_x.append(x)
        all_y.append(y)
    all_x = torch.cat(all_x, dim=0).to(device)
    all_y = torch.cat(all_y, dim=0).to(device)

    for i, feature in enumerate(feature_columns):
        # Shuffle the i-th feature
        x_permuted = deepcopy(all_x)
        # Shuffle along the batch dimension
        perm = torch.randperm(x_permuted.size(0))
        x_permuted[:, :, i] = x_permuted[perm, :, i]

        # Compute loss with permuted feature
        preds = model(x_permuted)
        loss = loss_fn(preds, all_y).item()

        # Importance is the increase in loss
        importance = loss - baseline_loss
        feature_importances[feature] = importance
        print(f"Feature: {feature}, Permutation Importance: {importance:.4f}")

    return feature_importances

## Extract important features

In [38]:
regions

['Andong', 'Seoul', 'Jeonju', 'Daegu']

In [None]:
output_dir = f"importance/LSTM"
os.makedirs(output_dir, exist_ok=True)

for region in regions:
  if region == "Jeonju":
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "IX", "PS", "지점", "위도", "경도"]
  else:
    columns_to_remove = ["CA_TOT", "CA_MID", "STN", "IR", "PA", "PS", "지점", "위도", "경도"]
  df = prepare_data(region.lower(), columns_to_remove)
  df = simple_interpolate(df, method="linear")
  df, scaler = minmax_scaling(df)

  dataset = FinedustDataset(df,
                            window_size=config["window_size"],
                            prediction_length=config["output_size"],
                            time_window=1)
  train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)
  train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
  val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False)

  if region == "Jeonju":
    config["hidden_size"] = 256
  else:
    config["hidden_size"] = 128
  model = FinedustLSTM(input_size=len(dataset.feature_columns),
                              hidden_size=config['hidden_size'],
                              num_layers=config['num_layers'],
                              output_size=config['output_size'],
                              dropout_prob=config['dropout']).to(device)
  save_path = f"models/LSTM/{region}.pth"
  model.load_state_dict(torch.load(save_path))
  model.to(device)

  feature_importances = compute_permutation_importance(model, val_loader, dataset.feature_columns, device)
  output_path = f"{output_dir}/{region}.txt"

  with open(output_path, "w") as f:
    f.write("Feature Importances:\n")
    for feature, importance in sorted(feature_importances.items(), key=lambda x: x[1], reverse=True):
        f.write(f"{feature}: {importance:.4f}\n")
  print(f"Feature importances saved to {output_path}")

  data = data.interpolate(method='linear')


Baseline MSE Loss: 6.4699
Feature: WD, Permutation Importance: 3.6131
Feature: WS, Permutation Importance: 3.5572
Feature: TA, Permutation Importance: 4.3083
Feature: TD, Permutation Importance: 1.9318
Feature: HM, Permutation Importance: 7.6094
Feature: PV, Permutation Importance: 4.6644
Feature: VS, Permutation Importance: 12.2706
Feature: TS, Permutation Importance: 3.0322
Feature importances saved to importance/LSTM/Andong.txt


  data = data.interpolate(method='linear')


Baseline MSE Loss: 13.5851
Feature: WD, Permutation Importance: 6.2550
Feature: WS, Permutation Importance: 3.6059
Feature: TA, Permutation Importance: 1.1206
Feature: TD, Permutation Importance: 1.1049
Feature: HM, Permutation Importance: 5.1800
Feature: PV, Permutation Importance: 5.0815
Feature: VS, Permutation Importance: 10.2841
Feature: TS, Permutation Importance: 1.3686
Feature: TE_005, Permutation Importance: 0.7640
Feature: TE_01, Permutation Importance: 4.8736
Feature: TE_02, Permutation Importance: 5.2185
Feature: TE_03, Permutation Importance: 1.7073
Feature importances saved to importance/LSTM/Seoul.txt


  data = data.interpolate(method='linear')


RuntimeError: Error(s) in loading state_dict for FinedustLSTM:
	size mismatch for lstm.lstm.weight_ih_l0: copying a param with shape torch.Size([1024, 8]) from checkpoint, the shape in current model is torch.Size([512, 8]).
	size mismatch for lstm.lstm.weight_hh_l0: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for lstm.lstm.bias_ih_l0: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for lstm.lstm.bias_hh_l0: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for lstm.lstm.weight_ih_l1: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for lstm.lstm.weight_hh_l1: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for lstm.lstm.bias_ih_l1: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for lstm.lstm.bias_hh_l1: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for lstm.fc1.weight: copying a param with shape torch.Size([768, 256]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for lstm.fc1.bias: copying a param with shape torch.Size([768]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for lstm.fc2.weight: copying a param with shape torch.Size([256, 768]) from checkpoint, the shape in current model is torch.Size([128, 384]).
	size mismatch for lstm.fc2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for fc1.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([128, 128]).
	size mismatch for fc1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for fc2.weight: copying a param with shape torch.Size([1, 256]) from checkpoint, the shape in current model is torch.Size([1, 128]).

# Transformer

In [None]:
import torch
from transformers import TimeSeriesTransformerForPrediction

model = TimeSeriesTransformerForPrediction.from_pretrained(
    "huggingface/time-series-transformer-tourism-monthly"
)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 6))
plt.plot(val_y[:1000], label="val_y", color="blue")
plt.plot(pred_y[:1000], label="pred_y", color="orange")
plt.title(f"MLP (loss={np.min(losses[:, 1]): .2f})")
plt.legend()
plt.show()