- cnn architecture 실험

In [None]:
import pandas as pd
import os
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import platform
plt.style.use('seaborn')
from datetime import datetime
import json

from metric import E1_loss, E2_loss, total_loss
from models import classifier, cnn_model, conv_block, cnn_parallel
from utils import train_model, eval_model, dfDataset, weights_init

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F

from sklearn.model_selection import KFold, train_test_split
from torchsummary import summary

In [None]:
SEED = 34
def fix_seed(SEED):
    torch.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(SEED)
fix_seed(SEED)

### class, function...

In [None]:
class Noise(object):
    def __init__(self, mu, sd, shape):
        self.mu = mu
        self.sd = sd
        self.shape = shape
    
    def __call__(self, x):
        noise = np.random.normal(self.mu, self.sd, self.shape)
        #noise = torch.FloatTensor(noise)
        return x + noise.astype(np.float32)

class dfDataset(Dataset):
    def __init__(self, x, y, transform = None):
        self.data = x
        self.target = y
        self.transform = transform
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        batchX, batchY = self.data[index], self.target[index]
        if self.transform:
            batchX = self.transform(batchX)
        return batchX, batchY
    
def weights_init(m, initializer = nn.init.kaiming_uniform_):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        initializer(m.weight)
        
def train_model(model, train_data, weight, optimizer, scheduler, ep, loss_func):
    
    model.train()
    loss_sum = 0
    for i, (x, y) in enumerate(train_data):
        optimizer.zero_grad()
        x = x.cuda()
        y = y.cuda()
        
        pred = model(x)
        loss = loss_func(pred, y)
        loss.backward()
        optimizer.step()
        #scheduler.step(ep+ i/len(train_data)) for cosine annealing
        loss_sum += loss.item()
    
    return loss_sum / len(train_data)

def eval_model(model, val_data, loss_func):
    model.eval()
    with torch.no_grad():
        loss = 0
        for i, (x, y) in enumerate(val_data):
            x = x.cuda()
            y = y.cuda()
            
            pred = model(x)
            loss += loss_func(pred, y).item()
    return loss / len(val_data)

def E1_loss(y_pred, y_true):
    _t, _p = y_true, y_pred
    
    return torch.mean(torch.mean((_t - _p) ** 2, axis = 1)) / 2e+04

def E2_loss(y_pred, y_true):
    _t, _p = y_true, y_pred
    
    return torch.mean(torch.mean((_t - _p) ** 2 / (_t + 1e-06), axis = 1))

In [None]:
class custom_cnn(nn.Module):
    def __init__(self, n_feature, out_len):
        super(custom_cnn, self).__init__()
        self.conv_kernel = (3, 1)
        self.pool_kernel = (2, 1)
        
        self.fe = nn.Sequential(
            nn.Conv2d(in_channels=2, out_channels=16, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size = self.pool_kernel),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size = self.pool_kernel),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size = self.pool_kernel),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size = self.pool_kernel),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size = self.pool_kernel),
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size = self.conv_kernel, stride = 1),
            nn.ELU(),
            nn.BatchNorm2d(512),
            nn.MaxPool2d(kernel_size = self.pool_kernel)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(512*3*n_feature, 128),
            nn.ELU(),
            nn.Linear(128, 64),
            nn.ELU(),
            nn.Linear(64, 32),
            nn.ELU(),
            nn.Linear(32, 16),
            nn.ELU(),
            nn.Linear(16, out_len)
        )
    def forward(self, x):
        x = self.fe(x)
        return self.fc(x.view(x.size(0), -1))

### Configuration

In [None]:
EPOCH = 150
base_lr = 0.001
now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
save_path = './model/{}'.format(now)
initialize = True
print_summary = True
batch_size = 256
nfold = 10

### load dataset

In [None]:
def normalize(x, axis = 2):
    mu = np.expand_dims(x.mean(axis = 2), axis = axis)
    sd = np.expand_dims(x.std(axis = 2), axis = axis)

    normalized = (x - mu) / sd
    return normalized

In [None]:
if platform.system() == 'Windows':
    root_dir = 'D:/datasets/KAERI_dataset/'
else:
    root_dir = '/home/bskim/project/kaeri/KAERI_dataset/'

train_f = pd.read_csv(os.path.join(root_dir, 'train_features.csv'))
train_t = pd.read_csv(os.path.join(root_dir, 'train_target.csv'))
test_f = pd.read_csv(os.path.join(root_dir, 'test_features.csv'))

train_f = train_f[['Time','S1','S2','S3','S4']].values
train_f = train_f.reshape((-1, 1, 375, 5))#.astype(np.float32)

test_f = test_f[['Time','S1','S2','S3','S4']].values
test_f = test_f.reshape((-1, 1, 375, 5))#.astype(np.float32)

# concatenate normalized data
train_norm = normalize(train_f)
test_norm = normalize(test_f)

train_f = np.concatenate((train_f, train_norm), axis = 1)
test_f = np.concatenate((test_f, test_norm), axis = 1)

test_f = torch.FloatTensor(test_f)

### Train

In [None]:
def kfold_train(name, feature, target):
    print('{} train...'.format(name))
    n_features = feature.shape[-1]
    os.makedirs(save_path) if not os.path.exists(save_path) else None
    # make dataset
    train_target = target[list(name)].values

    fold = KFold(nfold, shuffle = True, random_state= 25)
    loss_per_cv = []
    noise_add = Noise(0, 0.001, feature.shape[1:])
    for i, (train_idx, val_idx) in enumerate(fold.split(feature, y = train_target)):
        print('fold {}'.format(i+1))
        trainx = feature[train_idx]
        valx = feature[val_idx]
        trainy = train_target[train_idx]
        valy = train_target[val_idx]

        train_dataset = dfDataset(trainx.astype(np.float32), trainy, transform = noise_add)
        train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
        val_dataset = dfDataset(valx.astype(np.float32), valy)
        val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)

        model = custom_cnn(n_features, len(name))
        if name == 'XY':
            criterion = E1_loss
        else:
            criterion = E2_loss

        model = model.cuda()
        if initialize:
            model.apply(weights_init)

        curr_loss = 1e+7
        optimizer = torch.optim.Adam(model.parameters(curr_loss), lr = base_lr)
        #scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 4)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.5, patience = 10, threshold = 1e-6, verbose = True)
        #train
        for ep in range(1, EPOCH + 1):
            loss = train_model(model, train_loader, criterion, optimizer, scheduler, ep, criterion)
            val_loss =eval_model(model, val_loader, criterion)
            if curr_loss > val_loss:
                print('[{}] : train loss {:4f}, val loss drop {:.4f} to {:.4f}'.format(ep, np.mean(loss), curr_loss, val_loss))
                curr_loss = val_loss
                torch.save(model.state_dict(), os.path.join(save_path, 'model_{}_fold{}.pt'.format(name, i+1)))
            scheduler.step(val_loss)
        loss_per_cv.append(curr_loss)
    return loss_per_cv           

In [None]:
# train XY
loss_xy = kfold_train('XY',train_f, train_t)

add_feature = train_t[['X','Y']].values.reshape((2800, 1, 1, 2))
add_feature = np.repeat(add_feature, 375, axis = 2)
add_feature = np.repeat(add_feature, 2, axis = 1)
trainX = np.concatenate((train_f, add_feature), axis = -1)

# train V using XY
loss_v = kfold_train('V',trainX, train_t)

add_feature = train_t[['V']].values.reshape((2800, 1, 1, 1))
add_feature = np.repeat(add_feature, 375, axis = 2)
add_feature = np.repeat(add_feature, 2, axis = 1)
trainX = np.concatenate((trainX, add_feature), axis = -1)

# train V using XY
loss_m = kfold_train('M',trainX, train_t)

In [None]:
loss_per_model = {'xy':loss_xy, 'v':loss_v, 'm':loss_m}
with open(os.path.join(save_path, 'loss_info.json'), 'w') as f:
    for k in loss_per_model:
        loss_per_model[k] = np.mean(loss_per_model[k])
    f.write(json.dumps(loss_per_model))

In [None]:
loss_per_model # leaky relu

## test

In [None]:
def predict_fold(model,nfold, save_path, name, test_data):
    pred_array = []
    for i in range(1, nfold+1):
        model.load_state_dict(torch.load(os.path.join(save_path, 'model_{}_fold{}.pt'.format(name, i))))
        model = model.cuda()
        
        with torch.no_grad():
            predict = model(test_data.cuda())
        pred_array.append(predict.detach().cpu().numpy())
    result = np.mean(pred_array, axis = 0)
    return result

In [None]:
# predict XY
submission = pd.read_csv(os.path.join(root_dir, 'sample_submission.csv'))
name = 'XY'
n_features = test_f.size()[-1]
# define model
model = custom_cnn(n_features, len(name))
result = predict_fold(model, nfold, save_path ,name, test_f)
submission[list(name)] = result

In [None]:
n_samples = test_f.shape[0]
add_feature_t = result.reshape((n_samples, 1, 1, len(name)))
add_feature_t = np.repeat(add_feature_t, 375, axis = 2)
add_feature_t = np.repeat(add_feature_t, 2, axis = 1)
add_feature_t = torch.FloatTensor(add_feature_t)

test_f_add = torch.cat([test_f, add_feature_t], dim = -1)

In [None]:
# predict V
name = 'V'
n_features = test_f_add.size()[-1]

# define model
model = custom_cnn(n_features, len(name))

result = predict_fold(model, nfold, save_path,name, test_f_add)
submission[list(name)] = result

In [None]:
n_samples = test_f_add.shape[0]
add_feature_t = result.reshape((n_samples, 1, 1, len(name)))
add_feature_t = np.repeat(add_feature_t, 375, axis = 2)
add_feature_t = np.repeat(add_feature_t, 2, axis = 1)
add_feature_t = torch.FloatTensor(add_feature_t)

test_f_add = torch.cat([test_f_add, add_feature_t], dim = -1)

In [None]:
# predict M
name = 'M'
n_features = test_f_add.size()[-1]

# define model
model = custom_cnn(n_features, len(name))

result = predict_fold(model, nfold, save_path,name, test_f_add)
submission[list(name)] = result

In [None]:
submission.head()

In [None]:
submission.to_csv(os.path.join(save_path, '{}.csv'.format(save_path.split('/')[-1])), index = False)