In [1]:
!pip install lightning torchmetrics --quiet

In [2]:
import os
import sys

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from sklearn.preprocessing import MinMaxScaler, StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.utils as utils
from torch.utils.data import Dataset, TensorDataset, DataLoader, random_split

import lightning as L
from lightning import seed_everything
from lightning.pytorch.callbacks import TQDMProgressBar

import torchmetrics
from torchmetrics.functional import r2_score, mean_absolute_error, mean_squared_error

import transformers
from transformers import get_linear_schedule_with_warmup

import matplotlib as  mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
import google.colab
google.colab.drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/projects/temporature-hackathon/')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
output = pd.read_pickle('output.pkl')
train_df = pd.read_pickle('train_df.pkl')
output = torch.tensor(output.values)
train_data = torch.tensor(train_df.values)
output.shape, train_data.shape

(torch.Size([358]), torch.Size([22653, 10]))

In [5]:
input_tensor = torch.load('input.pt')
output_tensor = torch.load('output.pt')

In [6]:
dataset = TensorDataset(input_tensor.type(torch.float32)[:-1], output_tensor.type(torch.float32)[:-1])
len(dataset)

60

In [7]:
train_set, val_set = torch.utils.data.random_split(dataset, [56, 4])

In [8]:
train_dataloader = DataLoader(train_set, batch_size=56, shuffle=False)
val_dataloader = DataLoader(val_set, batch_size=4, shuffle=False)

In [9]:
class CNN_LSTM_Attention(L.LightningModule):
    def __init__(self, hidden_size=16, num_layers=3, output_len=358):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_len = output_len
        self.encoder = nn.LSTM(16, hidden_size, num_layers, batch_first=True)
        self.decoder = nn.LSTM(1, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, 1),
        )
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3, 7), padding=(1, 0)),
            nn.GELU()
        )

        self.criterion = nn.MSELoss()

        self.training_stats = []

    def forward(self, prev_time_series):
        batch_size = len(prev_time_series)
        prev_time_series = prev_time_series.view(batch_size, 1, -1, 7)
        x = self.cnn1(prev_time_series)
        x = x.squeeze()
        x = x.view(batch_size, -1, 16)
        hidden = torch.zeros([self.num_layers, batch_size, self.hidden_size]).type_as(x)
        cell = torch.zeros([self.num_layers, batch_size, self.hidden_size]).type_as(x)
        encoder_hiddens = []
        for i in range(x.shape[1]):
            encoder_output, (hidden, cell) = self.encoder(x[:, i, :].unsqueeze(1), (hidden, cell))
            encoder_hiddens.append(hidden[-1])
        encoder_hiddens = torch.stack(encoder_hiddens)
        encoder_hiddens = encoder_hiddens.view(batch_size, -1, self.hidden_size)

        decoder_input = torch.stack([torch.arange(0, self.output_len) / self.output_len for i in range(batch_size)])
        decoder_input = decoder_input.unsqueeze(-1)
        decoder_input = decoder_input.type_as(x)

        outputs = []
        for i in range(self.output_len):
            decoder_output, (hidden, cell) = self.decoder(decoder_input[:, i, :].unsqueeze(1), (hidden, cell))
            attention_score = F.softmax(torch.bmm(hidden[-1].unsqueeze(1), encoder_hiddens.transpose(1, 2)), dim=0)
            attention_context = torch.bmm(attention_score, encoder_hiddens)
            attention_vector = torch.cat([hidden[-1], attention_context.squeeze()], dim=1)
            outputs.append(self.fc(attention_vector))

        x = torch.stack(outputs)
        x = x.view(batch_size, self.output_len)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        self.log('train_loss', loss, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y = y.squeeze()
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_r2', r2_score(y_hat, y), prog_bar=True)
        self.log('val_mae', mean_absolute_error(y_hat, y), prog_bar=True)
        self.log('val_mse', mean_squared_error(y_hat, y), prog_bar=True)
        self.training_stats.append(
            {
                'val_loss': loss,
                'val_r2': r2_score(y_hat, y),
                'val_mae': mean_absolute_error(y_hat, y),
                'val_mse': mean_squared_error(y_hat, y),
            }
        )

    def on_test_epoch_end(self):
        pass

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=0.002)
        #scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=500)
        scheduler = CosineAnnealingLR(optimizer, T_max=900)
        scheduler = {
            'scheduler': scheduler,
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

In [10]:
class CNN_LSTM(L.LightningModule):
    def __init__(self, hidden_size=16, num_layers=3, output_len=358):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_len = output_len
        self.encoder = nn.LSTM(16, hidden_size, num_layers, batch_first=True)
        self.decoder = nn.LSTM(1, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 1),
        )
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3, 7), padding=(1, 0)),
            nn.GELU()
        )
        self.dropout = nn.Dropout2d(0.2)

        self.criterion = nn.MSELoss()

        self.training_stats = []

    def forward(self, prev_time_series):
        batch_size = len(prev_time_series)
        prev_time_series = prev_time_series.view(batch_size, 1, -1, 7)
        x = self.cnn1(prev_time_series)
        x = self.dropout(x)

        x = x.squeeze()
        x = x.view(batch_size, -1, 16)
        encoder_output, (hidden, cell) = self.encoder(x,
         (torch.zeros([self.num_layers, batch_size, self.hidden_size]).type_as(x),
          torch.zeros([self.num_layers, batch_size, self.hidden_size]).type_as(x)
          )
        )
        decoder_input = torch.stack([torch.arange(0, self.output_len) / self.output_len for i in range(batch_size)])
        decoder_input = decoder_input.unsqueeze(-1)
        decoder_input = decoder_input.type_as(x)
        decoder_output, (hidden, cell) = self.decoder(decoder_input, (hidden, cell))
        x = self.fc(decoder_output)
        x = x.squeeze()
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        self.log('train_loss', loss, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y = y.squeeze()
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_r2', r2_score(y_hat, y), prog_bar=True)
        self.log('val_mae', mean_absolute_error(y_hat, y), prog_bar=True)
        self.log('val_mse', mean_squared_error(y_hat, y), prog_bar=True)
        self.training_stats.append(
            {
                'val_loss': loss,
                'val_r2': r2_score(y_hat, y),
                'val_mae': mean_absolute_error(y_hat, y),
                'val_mse': mean_squared_error(y_hat, y),
            }
        )

    def on_test_epoch_end(self):
        pass

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=0.002)
        #scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=500)
        scheduler = CosineAnnealingLR(optimizer, T_max=900)
        scheduler = {
            'scheduler': scheduler,
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

In [11]:
model = CNN_LSTM()

In [12]:
trainer = L.Trainer(
    accelerator='auto',
    max_epochs=900,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
)

trainer.fit(model, train_dataloaders=train_dataloader)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type       | Params
-----------------------------------------
0 | encoder   | LSTM       | 6.5 K 
1 | decoder   | LSTM       | 5.6 K 
2 | fc        | Sequential | 17    
3 | cnn1      | Sequential | 352   
4 | dropout   | Dropout2d  | 0     
5 | criterion | MSELoss    | 0     
-----------------------------------------
12.5 K    Trainable params

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=900` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=900` reached.


In [13]:
trainer = L.Trainer(
    accelerator='auto',
    max_epochs=500,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
)

trainer.fit(model, train_dataloaders=train_dataloader)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type       | Params
-----------------------------------------
0 | encoder   | LSTM       | 6.5 K 
1 | decoder   | LSTM       | 5.6 K 
2 | fc        | Sequential | 17    
3 | cnn1      | Sequential | 352   
4 | dropout   | Dropout2d  | 0     
5 | criterion | MSELoss    | 0     
-----------------------------------------
12.5 K    Trainable params

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=500` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=500` reached.


In [14]:
trainer = L.Trainer(
    accelerator='auto',
    max_epochs=500,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
)

trainer.fit(model, train_dataloaders=train_dataloader)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type       | Params
-----------------------------------------
0 | encoder   | LSTM       | 6.5 K 
1 | decoder   | LSTM       | 5.6 K 
2 | fc        | Sequential | 17    
3 | cnn1      | Sequential | 352   
4 | dropout   | Dropout2d  | 0     
5 | criterion | MSELoss    | 0     
-----------------------------------------
12.5 K    Trainable params

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=500` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=500` reached.


In [15]:
trainer.test(model, dataloaders=val_dataloader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=1` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'val_loss': 29.279895782470703,
  'val_r2': -1.3148934841156006,
  'val_mae': 4.293636322021484,
  'val_mse': 29.279895782470703}]

In [16]:
class CNN_Transformer(L.LightningModule):
    def __init__(self, hidden_size=32, num_layers=10, output_len=358):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_len = output_len
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            dim_feedforward=hidden_size * 2,
            activation=F.gelu,
            num_encoder_layers=2,
            num_decoder_layers=1,
            nhead=2
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 1),
        )
        self.cnn1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=(3, 7), padding=(1, 0)),
            nn.GELU()
        )
        self.cnn2 = nn.Sequential(
            nn.Conv1d(16, hidden_size, kernel_size=21, padding=10),
            nn.GELU()
        )

        self.criterion = nn.MSELoss()

        self.training_stats = []

    def forward(self, prev_time_series):
        batch_size = len(prev_time_series)
        prev_time_series = prev_time_series.view(batch_size, 1, -1, 7)
        x = self.cnn1(prev_time_series)
        x = x.squeeze()
        x = self.cnn2(x)
        x = x.view(batch_size, -1, self.hidden_size)
        x = self.transformer(x, x)
        x = self.fc(x[:, :self.output_len, :])
        x = x.squeeze()
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        self.log('train_loss', loss, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y = y.squeeze()
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_r2', r2_score(y_hat, y), prog_bar=True)
        self.log('val_mae', mean_absolute_error(y_hat, y), prog_bar=True)
        self.log('val_mse', mean_squared_error(y_hat, y), prog_bar=True)
        self.training_stats.append(
            {
                'val_loss': loss,
                'val_r2': r2_score(y_hat, y),
                'val_mae': mean_absolute_error(y_hat, y),
                'val_mse': mean_squared_error(y_hat, y),
            }
        )

    def on_test_epoch_end(self):
        pass

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=0.002)
        #scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=50, num_training_steps=500)
        scheduler = CosineAnnealingLR(optimizer, T_max=400)
        scheduler = {
            'scheduler': scheduler,
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

In [17]:
ct_model = CNN_Transformer()



In [18]:
ct_trainer = L.Trainer(
    accelerator='auto',
    max_epochs=600,
    callbacks=[TQDMProgressBar(refresh_rate=20)],
)

ct_trainer.fit(ct_model, train_dataloaders=train_dataloader)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type        | Params
--------------------------------------------
0 | transformer | Transformer | 30.0 K
1 | fc          | Sequential  | 33    
2 | cnn1        | Sequential  | 352   
3 | cnn2        | Sequential  | 10.8 K
4 | criterion   | MSELoss     | 0     
--------------------------------------------
41.2 K    Trainable params
0         N

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=600` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=600` reached.


In [19]:
ct_trainer.test(ct_model, dataloaders=val_dataloader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'val_loss': 126.38018798828125,
  'val_r2': -14.519453048706055,
  'val_mae': 9.520318984985352,
  'val_mse': 126.38018798828125}]

In [20]:
train_df = pd.read_pickle('test-input.pkl')
train_df.head()

Unnamed: 0_level_0,max_temp,temp_diff,rainfall,avg_humidity,avg_wind_speed,sunshine,sunshine_hours,avg_temp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1960-01-01,-1.388027,-0.419454,-0.329749,0.212881,-0.718724,-0.021898,-0.098542,-1.6
1960-01-02,-1.481363,-0.625852,-0.329749,1.546328,-1.140887,-1.0,-0.098542,-1.9
1960-01-03,-0.781341,0.75013,-0.329749,1.106428,0.653306,-1.0,-0.098542,4.0
1960-01-04,-0.585335,0.337336,-0.329749,0.996453,2.130877,-0.620438,-0.098542,7.5
1960-01-05,-1.47203,0.302936,-0.329749,-1.457365,2.869662,0.19708,-0.098542,-4.6


In [21]:
predict_tensor = (torch.tensor(train_df.iloc[-358 * 3:].drop(columns=['avg_temp'])
                                                       .values).type(torch.float32).unsqueeze(0))
predict_tensor.shape

torch.Size([1, 1074, 7])

In [22]:
model.eval()
ct_model.eval()
output = model(predict_tensor.unsqueeze(0)).squeeze()

In [23]:
output = output.detach().numpy()

In [24]:
output.shape

(358,)

In [25]:
test_df = pd.read_csv('sample_submission.csv')

In [26]:
test_df.head()

Unnamed: 0,일시,평균기온
0,2023-01-01,-0.094653
1,2023-01-02,-0.723125
2,2023-01-03,-1.179326
3,2023-01-04,-1.497328
4,2023-01-05,-1.702457


In [27]:
test_df['평균기온'] = output
test_df.head()

Unnamed: 0,일시,평균기온
0,2023-01-01,-2.151984
1,2023-01-02,-2.222678
2,2023-01-03,-2.186467
3,2023-01-04,-2.117572
4,2023-01-05,-2.035434


In [28]:
test_df.to_csv('sample_submission.csv', index=False)