- 6x1, 7x1, 8x1 세 개의 filter size마다 cv 모델 생성후 ensemble

In [30]:
import pandas as pd
import os
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import platform
plt.style.use('seaborn')
from datetime import datetime
import json

from metric import E1_loss, E2_loss, total_loss
from models import classifier, cnn_model, conv_block, cnn_parallel
from utils import train_model, eval_model, dfDataset, weights_init

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F

from sklearn.model_selection import KFold, train_test_split
from torchsummary import summary

### class, function...

In [31]:
class Noise(object):
    def __init__(self, mu, sd, shape):
        self.mu = mu
        self.sd = sd
        self.shape = shape
    
    def __call__(self, x):
        noise = np.random.normal(self.mu, self.sd, self.shape)
        #noise = torch.FloatTensor(noise)
        return x + noise.astype(np.float32)

class dfDataset(Dataset):
    def __init__(self, x, y, transform = None):
        self.data = x
        self.target = y
        self.transform = transform
    
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, index):
        batchX, batchY = self.data[index], self.target[index]
        if self.transform:
            batchX = self.transform(batchX)
        return batchX, batchY
    
def weights_init(m, initializer = nn.init.kaiming_uniform_):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        initializer(m.weight)
        
def train_model(model, train_data, weight, optimizer, loss_func):
    model.train()
    loss_sum = 0
    for i, (x, y) in enumerate(train_data):
        optimizer.zero_grad()
        x = x.cuda()
        y = y.cuda()
        pred = model(x)
        loss = loss_func(pred, y)
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
    
    return loss_sum / len(train_data)

def eval_model(model, val_data, loss_func):
    model.eval()
    with torch.no_grad():
        loss = 0
        for i, (x, y) in enumerate(val_data):
            x = x.cuda()
            y = y.cuda()
            pred = model(x)
            loss += loss_func(pred, y).item()
    return loss / len(val_data)

class conv_bn(nn.Module):
    def __init__(self, i_f, o_f, fs):
        super(conv_bn, self).__init__()
        self.conv = nn.Conv2d(i_f, o_f, fs)
        self.act = nn.ELU()
        self.bn = nn.BatchNorm2d(o_f)
        self.pool = nn.MaxPool2d(kernel_size=(2, 1), stride= (2, 1))
    def forward(self, x):
        x = self.bn(self.act(self.conv(x)))
        return self.pool(x)
        #return x
    
class conv_block(nn.Module):
    def __init__(self, h_list, input_shape, fs):
        '''
        input_shape : not include batch_size
        '''
        
        super(conv_block, self).__init__()
        self.input_shape = input_shape
        self.fs = fs
        convs = []
        for i in range(len(h_list)):
            if i == 0:
                convs.append(conv_bn(self.input_shape[0], h_list[i], fs))
            else:
                convs.append(conv_bn(h_list[i-1], h_list[i], fs))
        self.convs = nn.Sequential(*convs)
    
    def forward(self, x):
        return self.convs(x)
    
class classifier(nn.Module):
    def __init__(self, h_list, input_size, output_size):
        super(classifier, self).__init__()
        layers = []
        for i in range(len(h_list)):
            if i == 0:
                layers.append(nn.Linear(input_size, h_list[0]))
            else:
                layers.append(nn.Linear(h_list[i-1], h_list[i]))
            layers.append(nn.ELU())
            
        layers.append(nn.Linear(h_list[i], output_size))
        self.layers = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.layers(x)
    
class cnn_model(nn.Module):
    def __init__(self, cnn_block, fc_block):
        super(cnn_model, self).__init__()
        self.cnn = cnn_block
        self.fc = fc_block
    def forward(self, x):
        x = self.cnn(x)
        x = x.flatten(start_dim = 1)
        return self.fc(x)

def E1_loss(y_pred, y_true):
    _t, _p = y_true, y_pred
    
    return torch.mean(torch.mean((_t - _p) ** 2, axis = 1)) / 2e+04

def E2_loss(y_pred, y_true):
    _t, _p = y_true, y_pred
    
    return torch.mean(torch.mean((_t - _p) ** 2 / (_t + 1e-06), axis = 1))

- augmentation(noise add)
- channel concat

### Configuration

In [32]:
EPOCH = 100
base_lr = 0.001
now = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
save_path = './model/{}'.format(now)
initialize = True
print_summary = True
batch_size = 256
nfold = 10

### load dataset

- 6x1 : 256* 6 * 4
- 7x1 : 256* 5 * 4
- 8x1 : 256* 4 * 4

In [33]:
if platform.system() == 'Windows':
    root_dir = 'D:/datasets/KAERI_dataset/'
else:
    root_dir = '/home/bskim/project/kaeri/KAERI_dataset/'

train_f = pd.read_csv(os.path.join(root_dir, 'train_features.csv'))
train_t = pd.read_csv(os.path.join(root_dir, 'train_target.csv'))
test_f = pd.read_csv(os.path.join(root_dir, 'test_features.csv'))

train_f = train_f[['Time','S1','S2','S3','S4']].values
train_f = train_f.reshape((-1, 1, 375, 5))#.astype(np.float32)

test_f = test_f[['Time','S1','S2','S3','S4']].values
test_f = test_f.reshape((-1, 1, 375, 5))#.astype(np.float32)
test_f = torch.FloatTensor(test_f)

### Train

In [34]:
def kfold_train(name, feature, target):
    print('{} train...'.format(name))
    n_features = feature.shape[-1]
    os.makedirs(save_path) if not os.path.exists(save_path) else None
    # make dataset
    train_target = target[list(name)].values

    fold = KFold(nfold, shuffle = True, random_state= 25)
    loss_per_cv = []
    noise_add = Noise(0, 0.001, feature.shape[1:])
    for i, (train_idx, val_idx) in enumerate(fold.split(feature, y = train_target)):
        print('fold {}'.format(i+1))
        trainx = feature[train_idx]
        valx = feature[val_idx]
        trainy = train_target[train_idx]
        valy = train_target[val_idx]

        train_dataset = dfDataset(trainx.astype(np.float32), trainy, transform = noise_add)
        train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
        val_dataset = dfDataset(valx.astype(np.float32), valy)
        val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = True)

        conv = conv_block([32, 64, 128, 256, 512], [1, 375, n_features], (6, 1))
        fc = classifier([128, 64, 32, 16], input_size = 512*6*n_features, output_size = len(name))
        # define model
        model = cnn_model(conv, fc)
        #model = get_model()
        optimizer = torch.optim.Adam(model.parameters(), lr = base_lr)

        if name == 'XY':
            criterion = E1_loss
        else:
            criterion = E2_loss

        model = model.cuda()
        if initialize:
            model.apply(weights_init)

        curr_loss = 1e+7
        #train
        for ep in range(1, EPOCH + 1):
            loss = train_model(model, train_loader, criterion, optimizer, criterion)
            val_loss =eval_model(model, val_loader, criterion)
            if curr_loss > val_loss:
                print('[{}] : train loss {:4f}, val loss drop {:.4f} to {:.4f}'.format(ep, np.mean(loss), curr_loss, val_loss))
                curr_loss = val_loss
                torch.save(model.state_dict(), os.path.join(save_path, 'model_{}_fold{}.pt'.format(name, i+1)))
        loss_per_cv.append(curr_loss)
    return loss_per_cv           

In [35]:
# train XY
loss_xy = kfold_train('XY',train_f, train_t)

add_feature = train_t[['X','Y']].values.reshape((2800, 1, 1, 2))
add_feature = np.repeat(add_feature, 375, axis = 2)
trainX = np.concatenate((train_f, add_feature), axis = -1)

# train V using XY
loss_v = kfold_train('V',trainX, train_t)

add_feature = train_t[['V']].values.reshape((2800, 1, 1, 1))
add_feature = np.repeat(add_feature, 375, axis = 2)
trainX = np.concatenate((trainX, add_feature), axis = -1)

# train V using XY
loss_m = kfold_train('M',trainX, train_t)

XY train...
fold 1
[1] : train loss 2.066870, val loss drop 10000000.0000 to 0.5910
[3] : train loss 0.084366, val loss drop 0.5910 to 0.1397
[4] : train loss 0.030894, val loss drop 0.1397 to 0.0450
[5] : train loss 0.013724, val loss drop 0.0450 to 0.0168
[6] : train loss 0.006933, val loss drop 0.0168 to 0.0098
[7] : train loss 0.004332, val loss drop 0.0098 to 0.0055
[9] : train loss 0.002308, val loss drop 0.0055 to 0.0048
[10] : train loss 0.001696, val loss drop 0.0048 to 0.0023
[16] : train loss 0.001262, val loss drop 0.0023 to 0.0015
[20] : train loss 0.001133, val loss drop 0.0015 to 0.0013
[21] : train loss 0.000844, val loss drop 0.0013 to 0.0011
[23] : train loss 0.001119, val loss drop 0.0011 to 0.0009
[34] : train loss 0.001034, val loss drop 0.0009 to 0.0008
[37] : train loss 0.000647, val loss drop 0.0008 to 0.0007
[46] : train loss 0.000657, val loss drop 0.0007 to 0.0007
[47] : train loss 0.000428, val loss drop 0.0007 to 0.0006
[75] : train loss 0.000632, val loss 

In [36]:
loss_per_model = {'xy':loss_xy, 'v':loss_v, 'm':loss_m}

In [37]:
with open(os.path.join(save_path, 'loss_info.json'), 'w') as f:
    for k in loss_per_model:
        loss_per_model[k] = np.mean(loss_per_model[k])
    f.write(json.dumps(loss_per_model))

## test

In [38]:
def predict_fold(model,nfold, save_path, name, test_data):
    pred_array = []
    for i in range(1, nfold+1):
        model.load_state_dict(torch.load(os.path.join(save_path, 'model_{}_fold{}.pt'.format(name, i))))
        model = model.cuda()
        
        with torch.no_grad():
            predict = model(test_data.cuda())
        pred_array.append(predict.detach().cpu().numpy())
    result = np.mean(pred_array, axis = 0)
    return result

In [39]:
# predict XY
submission = pd.read_csv(os.path.join(root_dir, 'sample_submission.csv'))
name = 'XY'
n_features = test_f.size()[-1]
# define model
conv = conv_block([32, 64, 128, 256, 512], [1, 375, n_features], (6, 1))
fc = classifier([128, 64, 32, 16], input_size = 512*6*n_features, output_size = len(name))
model = cnn_model(conv, fc)

result = predict_fold(model, nfold, save_path ,name, test_f)
submission[list(name)] = result

In [40]:
n_samples = test_f.shape[0]
add_feature_t = result.reshape((n_samples, 1, 1, len(name)))
add_feature_t = np.repeat(add_feature_t, 375, axis = 2)

add_feature_t = torch.FloatTensor(add_feature_t)

test_f_add = torch.cat([test_f, add_feature_t], dim = -1)

In [41]:
# predict V
name = 'V'
n_features = test_f_add.size()[-1]
# define model
conv = conv_block([32, 64, 128, 256, 512], [1, 375, n_features], (6, 1))
fc = classifier([128, 64, 32, 16], input_size = 512*6*n_features, output_size = len(name))
model = cnn_model(conv, fc)

result = predict_fold(model, nfold, save_path,name, test_f_add)
submission[list(name)] = result

In [42]:
n_samples = test_f_add.shape[0]
add_feature_t = result.reshape((n_samples, 1, 1, len(name)))
add_feature_t = np.repeat(add_feature_t, 375, axis = 2)

add_feature_t = torch.FloatTensor(add_feature_t)

test_f_add = torch.cat([test_f_add, add_feature_t], dim = -1)

In [43]:
# predict M
name = 'M'
n_features = test_f_add.size()[-1]
# define model
conv = conv_block([32, 64, 128, 256, 512], [1, 375, n_features], (6, 1))
fc = classifier([128, 64, 32, 16], input_size = 512*6*n_features, output_size = len(name))
model = cnn_model(conv, fc)

result = predict_fold(model, nfold, save_path,name, test_f_add)
submission[list(name)] = result

In [44]:
submission.head()

Unnamed: 0,id,X,Y,M,V
0,2800,-262.549469,-43.139381,112.939957,0.493597
1,2801,305.662506,-280.862122,85.90097,0.483432
2,2802,-231.270584,138.257217,30.53426,0.396861
3,2803,153.628372,282.001404,27.178936,0.399658
4,2804,-161.359268,184.475433,130.145233,0.46603


In [45]:
submission.to_csv(os.path.join(save_path, 'submit.csv'), index = False)