In [1]:
import gc
import time
import argparse
from fastai.layers import swish

import numpy as np
import pandas as pd

from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

from fastai.callback.schedule import Learner
from fastai.data.core import DataLoaders
from fastai.losses import L1LossFlat
from fastai.callback.core import Callback
from fastai.callback.tracker import ReduceLROnPlateau, SaveModelCallback

from HW_torch import dataLoads_build, net_parameter_count, hw_layer
from HW_base import evaluate_build, focus_build

In [2]:
fname               = '(F5-E128-F80)_RES(LSTM-FC-HW)x8-(FC-SELU-FC)'
evaluate_num        = 128
focus_min           = 0.8
net_block_num       = 8
hw_active           = True
epoch_num_first     = 100
batch_size_first    = 100
epoch_num_second    = 200
batch_size_second   = 1000

In [3]:
class Net_block(torch.nn.Module):
    def __init__(self, input_dims, internal_dims, output_dims, bias=True, bidirectional=True, batch_first=True, **kwargs):
        super(Net_block, self).__init__()
        self.lstm = nn.LSTM(input_dims, internal_dims, num_layers=1, bias=True, bidirectional=True, batch_first=True)
        if bidirectional:
            self.fc =nn.Linear(internal_dims*2, output_dims)
        else:
            self.fc =nn.Linear(internal_dims, output_dims)
            
    def forward(self, x, hw=None):
        y, _ = self.lstm(x)
        y = self.fc(y) + x
        if hw is not None:
            y = y * hw
        return y
        
class Net_test(torch.nn.Module):
    def __init__(self, evaluate_dic_list, net_block_num=net_block_num, hw_active=hw_active, **kwargs):
        super(Net_test, self).__init__()
        self.hw_layer = hw_layer(evaluate_dic_list)
        self.hw_dims = self.hw_layer.channels
        
        self.net_block_list = nn.ModuleList()
        for _ in range(net_block_num):
            self.net_block_list.append(Net_block(self.hw_dims, self.hw_dims, self.hw_dims))
        
        self.fc = nn.Sequential(nn.Linear(self.hw_dims, 32, bias=False),
                                nn.SELU(),
                                nn.Linear(32, 1, bias=True))
        
    def forward(self, x):
        hw = self.hw_layer(x)
        x = hw
        
        for net_block in self.net_block_list:
            x = net_block(x, hw)
        
        x = self.fc(x)
        return x

In [4]:
data_train_df = pd.read_csv('./Database/train.csv')
data_test_df = pd.read_csv('./Database/test.csv')

In [5]:
drop_columns = ['pressure','id', 'breath_id','one','count','breath_id_lag','breath_id_lag2','breath_id_lagsame','breath_id_lag2same','u_out_lag2']
x_columns = [col for col in data_train_df.columns if col not in drop_columns]
y_columns = ['pressure']

In [6]:
data_train = data_train_df[x_columns].values.astype(np.float32)
data_train = data_train.reshape(-1, 80, data_train.shape[-1])

target_train = data_train_df[y_columns].values.astype(np.float32)
target_train = target_train.reshape(-1, 80, target_train.shape[-1])

data_test = data_test_df[x_columns].values.astype(np.float32)
data_test = data_test.reshape(-1, 80, data_test.shape[-1])

In [7]:
print(data_train.shape)

(75450, 80, 5)


In [8]:
np.random.seed(121212)
data_idx = np.arange(len(data_train))
np.random.shuffle(data_idx)

train_index = data_idx[:int(len(data_idx)*0.9)]
valid_index = data_idx[int(len(data_idx)*0.1):]

In [9]:
x_train, y_train = data_train[train_index], target_train[train_index]
x_valid, y_valid = data_train[valid_index], target_train[valid_index]
x_test = data_test

In [10]:
del data_train_df
del data_test_df
gc.collect()

22

In [11]:
evaluate_list = [evaluate_build(x_test[..., i], evaluate_num) for i in range(x_test.shape[-1])]
evaluate_focus_list = []
for evaluate in evaluate_list:
    focus = 1 - (len(evaluate) - 1)/10
    if focus < focus_min:
        focus = focus_min
    evaluate_focus = focus_build(evaluate, focus)
    evaluate_focus_list.append(evaluate_focus)

evaluate_num:   3,focus:0.8000: 3it [00:00, 3005.23it/s]
evaluate_num:   3,focus:0.8000: 3it [00:00, ?it/s]
evaluate_num: 127,focus:0.6000: 127it [00:00, 317.56it/s]
evaluate_num:  99,focus:0.6000: 99it [00:00, 294.55it/s]
evaluate_num:   2,focus:0.9000: 2it [00:00, ?it/s]


In [12]:
model = Net_test(evaluate_focus_list)
print(model)

train_parameter_num, freeze_parameter_num = net_parameter_count(model)
print(train_parameter_num, freeze_parameter_num)

Net_test(
  (hw_layer): hw_layer(
    (evaluate_list): ModuleList(
      (0): Embedding(3, 1)
      (1): Embedding(3, 1)
      (2): Embedding(127, 1)
      (3): Embedding(99, 1)
      (4): Embedding(2, 1)
    )
    (focus_list): ModuleList(
      (0): Embedding(3, 1)
      (1): Embedding(3, 1)
      (2): Embedding(127, 1)
      (3): Embedding(99, 1)
      (4): Embedding(2, 1)
    )
  )
  (net_block_list): ModuleList(
    (0): Net_block(
      (lstm): LSTM(234, 234, batch_first=True, bidirectional=True)
      (fc): Linear(in_features=468, out_features=234, bias=True)
    )
    (1): Net_block(
      (lstm): LSTM(234, 234, batch_first=True, bidirectional=True)
      (fc): Linear(in_features=468, out_features=234, bias=True)
    )
    (2): Net_block(
      (lstm): LSTM(234, 234, batch_first=True, bidirectional=True)
      (fc): Linear(in_features=468, out_features=234, bias=True)
    )
    (3): Net_block(
      (lstm): LSTM(234, 234, batch_first=True, bidirectional=True)
      (fc): Linear

In [None]:
dataLoads = dataLoads_build(x_train, y_train, x_valid, y_valid, batch_size_first)
learn = Learner(dataLoads, model, loss_func=L1LossFlat())
learn.lr_find()
learn.fit_one_cycle(epoch_num_first, lr_max=2e-3, cbs=[ReduceLROnPlateau(monitor='valid_loss', min_delta=0.5, patience=10),
                                                       SaveModelCallback(monitor='valid_loss', fname=f'{fname}_B{batch_size_first}_best')])

epoch,train_loss,valid_loss,time
0,1.914038,1.684997,01:31
1,0.923415,0.892227,01:31
2,0.707767,0.714288,01:32
3,0.607676,0.614365,01:31
4,0.539098,0.556849,01:30


Better model found at epoch 0 with valid_loss value: 1.6849974393844604.
Better model found at epoch 1 with valid_loss value: 0.8922269940376282.
Better model found at epoch 2 with valid_loss value: 0.7142876386642456.
Better model found at epoch 3 with valid_loss value: 0.6143646240234375.
Better model found at epoch 4 with valid_loss value: 0.5568487644195557.


In [None]:
dataLoads = dataLoads_build(x_train, y_train, x_valid, y_valid, batch_size_second)
learn = Learner(dataLoads, model, loss_func=L1LossFlat())
learn.lr_find()
learn.fit_one_cycle(epoch_num_second, lr_max=2e-3, cbs=[ReduceLROnPlateau(monitor='valid_loss', min_delta=0.5, patience=10),
                                                        SaveModelCallback(monitor='valid_loss', fname=f'{fname}_B{batch_size_second}_best')])

In [None]:
from HW_torch import torch_valid, torch_predict

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

state_dict = torch.load(fname=f'{fname}_B{batch_size_second}_best.pth')
model.load_state_dict(state_dict)

loss = torch_valid([model.to(device)], L1LossFlat(), (x_train, y_train),  batch_size_second, to_device=device)
valid_loss = torch_valid([model.to(device)], L1LossFlat(), (x_valid, y_valid),  batch_size_second, to_device=device)

print(loss, valid_loss)

In [None]:
predict = torch_predict([model.to(device)], x_test, batch_size_second, to_device=device)
predict = np.reshape(predict, (-1))

In [None]:
df = pd.read_csv('Database/sample_submission.csv', index_col=0)
df['pressure'] = predict

df.to_csv(f'Submission/{fname}.csv')