In [27]:
import random
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
import os

In [28]:
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [29]:
print(type(device))

<class 'torch.device'>


EPOCHS: 전체 train dataset에 대하여 얼마나 training 할 것인가 <br>
LR(learning rate): step size라고도 하며 gradient descent 할 때 얼마나 변경할 것인가 <br>
BS(batch size): batch 1개에 dataset의 개수<br>
SEED: random 값을 고정

In [30]:
EPOCHS = 400 
LR = 1e-2
BS = 500
SEED = 41

In [31]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED) # Seed 고정

In [32]:
train_df = pd.read_csv('./data/train.csv')
train_df = train_df.drop(columns=['ID'])
val_df = pd.read_csv('./data/val.csv')
val_df = val_df.drop(columns=['ID'])

In [33]:
class MyDataset(Dataset):
    def __init__(self, df, eval_mode):
        self.df = df
        self.eval_mode = eval_mode
        if self.eval_mode:
            self.labels = self.df['Class'].values
            self.df = self.df.drop(columns=['Class']).values
        else:
            self.df = self.df.values
        
    def __getitem__(self, index):
        if self.eval_mode:
            self.x = self.df[index]
            self.y = self.labels[index]
            return torch.Tensor(self.x), self.y
        else:
            self.x = self.df[index]
            return torch.Tensor(self.x)
        
    def __len__(self):
        return len(self.df)

In [34]:
train_dataset = MyDataset(df=train_df, eval_mode=False)
train_loader = DataLoader(train_dataset, batch_size=BS, shuffle=True, num_workers=2)

val_dataset = MyDataset(df = val_df, eval_mode=True)
val_loader = DataLoader(val_dataset, batch_size=BS, shuffle=False, num_workers=2)

## Model architecture
원래 Autoencoder는 차원 축소 후 복원이지만 이번 task는 input dimension이 30 밖에 안되기 때문에 encoder로 차원을 확대한 후 decoder로 차원을 줄여 복원하는 방식으로 구현

# Chage hyperparameters in Neural Network
1. LeakyReLU -> GELU </br>
    최고 macro f1 score까지 더 빨리 왔지만 더이상 올라가지 않음 <br>
    suppose: 아마 오답에 매우 가까운 정답이 있거나 정답에 매우 가까운 오답이 있음, 전자일 확률이 높음<br>
    A: 정답에 매우 가까운 오답들이 너무 많음....(feat. EDA) 
    
2. Linear layer를 더 늘려보자 =>  macro f1-score가 올라가다가 다시 내려감 결국 50으로...
3. threshold를 더 늘려보자. 95% -> 98% => 85% 정도에서 고정..
4. 원래 방식대로 차원을 줄이고 나서 늘려볼까? => 성능 더 안 좋음..
5. diff를 cosinesimilarity -> ts-ss => 더 안 좋음.. <br>
    Linear layer를 더 늘려보니 더 안 좋음..

In [35]:
class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.Encoder = nn.Sequential(
            nn.Linear(30,64),
            nn.BatchNorm1d(64),
            nn.GELU(),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.GELU(),
        )
        self.Decoder = nn.Sequential(
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.GELU(),
            nn.Linear(64,30),
        )
        
    def forward(self, x):
        x = self.Encoder(x)
        x = self.Decoder(x)
        return x

In [36]:
from utils.vector_sim import TS_SS

In [37]:
class Trainer():
    def __init__(self, model, optimizer, train_loader, val_loader, scheduler, device):
        self.model = model
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.scheduler = scheduler
        self.device = device
        # Loss Function
        self.criterion = nn.L1Loss().to(self.device)
        
    def fit(self, ):
        self.model.to(self.device)
        best_score = 0
        for epoch in range(EPOCHS):
            self.model.train()
            train_loss = []
            for x in iter(self.train_loader):
                x = x.float().to(self.device)
                self.optimizer.zero_grad()

                _x = self.model(x)
                loss = self.criterion(x, _x)

                loss.backward()
                self.optimizer.step()

                train_loss.append(loss.item())

            score = self.validation(self.model, 0.5)
            print(f'Epoch : [{epoch}] Train loss : [{np.mean(train_loss)}] Val Score : [{score}])')

            if self.scheduler is not None:
                self.scheduler.step(score)

            if best_score < score:
                best_score = score
                torch.save(model.module.state_dict(), './checkpoint/best_model.pth', _use_new_zipfile_serialization=False)
    
    def validation(self, eval_model, thr):
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        eval_model.eval()
        pred = []
        true = []
        with torch.no_grad():
            for x, y in iter(self.val_loader):
                # x: (batch size, 30)
                x = x.float().to(self.device)

                _x = self.model(x)
                
                similarity = TS_SS(device=device)
                diff = similarity(x, _x).cpu().tolist()
                print(f"Min: {min(diff)} / Max: {max(diff)}")
                batch_pred = np.where(np.array(diff)<thr, 0,1).tolist()
                pred += batch_pred
                true += y.tolist()

        return f1_score(true, pred, average='macro')

In [38]:
model = nn.DataParallel(AutoEncoder())
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = LR)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=10, threshold_mode='abs', min_lr=1e-8, verbose=True)

trainer = Trainer(model, optimizer, train_loader, val_loader, scheduler, device)
trainer.fit()

Min: 0.04066041111946106 / Max: 13.058443069458008
Min: 0.4521833658218384 / Max: 711.9341430664062
Min: 0.30006861686706543 / Max: 1520.49365234375
Min: 0.06453196704387665 / Max: 418.74267578125
Min: 0.03941349312663078 / Max: 64.83128356933594
Min: 0.02541986107826233 / Max: 12.927700996398926
Min: 0.10074016451835632 / Max: 321.26837158203125
Min: 0.03484658896923065 / Max: 40.298213958740234
Min: 0.1392998844385147 / Max: 1302.53515625
Min: 0.058998603373765945 / Max: 380.80548095703125
Min: 0.09318613260984421 / Max: 1403.1656494140625
Min: 0.052320633083581924 / Max: 332.38153076171875
Min: 0.046349093317985535 / Max: 224.4523468017578
Min: 0.046389494091272354 / Max: 89.66425323486328
Min: 0.0459352470934391 / Max: 48.308311462402344
Min: 0.07738015800714493 / Max: 332.77117919921875
Min: 0.023810576647520065 / Max: 25.834081649780273
Min: 0.03307259827852249 / Max: 37.37318801879883
Min: 0.03616560250520706 / Max: 17.179140090942383
Min: 0.044841356575489044 / Max: 134.0590820

In [39]:
model = AutoEncoder()
model.load_state_dict(torch.load('./checkpoint/best_model.pth'))
model = nn.DataParallel(model)
model.eval()

DataParallel(
  (module): AutoEncoder(
    (Encoder): Sequential(
      (0): Linear(in_features=30, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate=none)
      (3): Linear(in_features=64, out_features=128, bias=True)
      (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): GELU(approximate=none)
    )
    (Decoder): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU(approximate=none)
      (3): Linear(in_features=64, out_features=30, bias=True)
    )
  )
)