## Import

In [76]:
import random
import pandas as pd
import numpy as np
import os
import glob

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [77]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [79]:
CFG = {
    'EPOCHS':1000,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':64,
    'SEED':41
}

## Fixed RandomSeed

In [80]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [83]:
all_input_list = sorted(glob.glob('./data/train_input/*.csv'))
all_target_list = sorted(glob.glob('./data/train_target/*.csv'))

In [84]:
all_input_list

[]

In [62]:
train_input_list = all_input_list[:25]
train_target_list = all_target_list[:25]

val_input_list = all_input_list[25:]
val_target_list = all_target_list[25:]

## CustomDataset

In [63]:
class CustomDataset(Dataset):
    def __init__(self, input_paths, target_paths, infer_mode):
        self.input_paths = input_paths
        self.target_paths = target_paths
        self.infer_mode = infer_mode
        
        self.data_list = []
        self.label_list = []
        print('Data Pre-processing..')
        for input_path, target_path in tqdm(zip(self.input_paths, self.target_paths)):
            input_df = pd.read_csv(input_path)
            target_df = pd.read_csv(target_path)
            
            input_df = input_df.drop(columns=['obs_time'])
            input_df = input_df.fillna(method='ffill')
            
            input_length = int(len(input_df)/24)
            target_length = int(len(target_df))
            
            for idx in range(target_length):
                time_series = input_df[24*idx:24*(idx+1)].values
                self.data_list.append(torch.Tensor(time_series))
            for label in target_df["predicted_weight_g"]:
                self.label_list.append(label)
        print('Done.')
              
    def __getitem__(self, idx):
        data = self.data_list[idx]
        label = self.label_list[idx]
        if self.infer_mode == False:
            return data, label
        else:
            return data
        
    def __len__(self):
        return len(self.data_list)

In [64]:
train_dataset = CustomDataset(train_input_list, train_target_list, False)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

val_dataset = CustomDataset(val_input_list, val_target_list, False)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

Data Pre-processing..


0it [00:00, ?it/s]

Done.
Data Pre-processing..


0it [00:00, ?it/s]

Done.


## Model Define

In [65]:
class Conv1d_LSTM(nn.Module):
    def __init__(self, in_channel=15, out_channel=1):
        super(Conv1d_LSTM, self).__init__()
        self.conv2d_1 = nn.Conv1d(in_channels=in_channel,
                                out_channels=64,
                                kernel_size=1,
                                stride=1,
                                padding=1)
        
        self.maxpool = nn.MaxPool1d(10, stride=1)
        self.lstm = nn.LSTM(input_size=64,
                    hidden_size=32,
                    num_layers=1,
                    bias=True,
                    bidirectional=False,
                    batch_first=True)
        
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(32, 16)
        self.dense2 = nn.Linear(16, out_channel)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        
    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.conv2d_1(x)
        x = self.maxpool(x)
        x = self.relu(x)
        
        x = x.transpose(1, 2)

        self.lstm.flatten_parameters()
        _, (hidden, _) = self.lstm(x)
        
        x = hidden[-1]
        
        x = self.tanh(x)        
        x = self.dropout(x)
        
        x = self.dense1(x)

        x = self.dense2(x)

        return x

## Train

In [66]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.L1Loss().to(device)
    
    best_loss = 9999
    best_model = None
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for X, Y in iter(train_loader):
            X = X.to(device)
            Y = Y.to(device)
            optimizer.zero_grad()
            
            output = model(X)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        val_loss = validation(model, val_loader, criterion, device)
        
        print(f'Train Loss : [{np.mean(train_loss):.5f}] Valid Loss : [{val_loss:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
    return best_model

In [67]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    with torch.no_grad():
        for X, Y in iter(val_loader):
            X = X.float().to(device)
            Y = Y.float().to(device)
            
            model_pred = model(X)
            loss = criterion(model_pred, Y)
            
            val_loss.append(loss.item())
            
    return np.mean(val_loss)

## Run!!

In [68]:
model = Conv1d_LSTM()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, threshold_mode='abs',min_lr=1e-8, verbose=True)

best_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 00003: reducing learning rate of group 0 to 5.0000e-04.
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 00006: reducing learning rate of group 0 to 2.5000e-04.
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 00009: reducing learning rate of group 0 to 1.2500e-04.
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 00012: reducing learning rate of group 0 to 6.2500e-05.
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 00015: reducing learning rate of group 0 to 3.1250e-05.
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Train Loss : [nan] Valid Loss : [nan]
Epoch 

## Inference

In [69]:
test_input_list = sorted(glob.glob('./data/test_input/*.csv'))
test_target_list = sorted(glob.glob('./data/test_target/*.csv'))

In [70]:
def inference_per_case(model, test_loader, test_path, device):
    model.to(device)
    model.eval()
    pred_list = []
    with torch.no_grad():
        for X in iter(test_loader):
            X = X.float().to(device)
            
            model_pred = model(X)
            
            model_pred = model_pred.cpu().numpy().reshape(-1).tolist()
            
            pred_list += model_pred
    
    submit_df = pd.read_csv(test_path)
    submit_df['predicted_weight_g'] = pred_list
    submit_df.to_csv(test_path, index=False)

In [71]:
for test_input_path, test_target_path in zip(test_input_list, test_target_list):
    test_dataset = CustomDataset([test_input_path], [test_target_path], True)
    test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)
    inference_per_case(best_model, test_loader, test_target_path, device)

## Submission

In [72]:
import zipfile
os.chdir("./test_target/")
submission = zipfile.ZipFile("../submission.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()

FileNotFoundError: [WinError 2] 지정된 파일을 찾을 수 없습니다: './test_target/'