In [1]:
import pandas as pd
import numpy as np
import time

# Data Preprocessing
### one hot encode feature R and C and their combination

In [2]:
from sklearn.preprocessing import RobustScaler, normalize
print("Loading data...")
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')
masks=np.array(train['u_out']==0).reshape(-1, 80) # every 80 rows belongs to the same id
targets = train[['pressure']].to_numpy().reshape(-1, 80)

print("One hot encoding...")
for dset in ('train','test'):
    df = eval(dset)
    df['R'] = df['R'].astype(str)
    df['C'] = df['C'].astype(str)
    df = pd.get_dummies(df)
    exec(f'{dset}=df')


print("Dropping id and labels...")
train.drop(['pressure', 'id', 'breath_id'], axis=1, inplace=True)
test = test.drop(['id', 'breath_id'], axis=1)

print("Normalizing...")
RS = RobustScaler()
train = RS.fit_transform(train)
test = RS.transform(test)

print("Reshaping...")
train = train.reshape(-1, 80, train.shape[-1])
test = test.reshape(-1, 80, train.shape[-1])

Loading data...
One hot encoding...
Dropping id and labels...
Normalizing...
Reshaping...


In [3]:
print('train:',train.shape)
print('test:',test.shape)

train: (75450, 80, 9)
test: (50300, 80, 9)


# Split dataset into train/val

In [4]:
from sklearn.model_selection import KFold


kf = KFold(n_splits=10,random_state=0,shuffle=True)

train_features=[train[i] for i in list(kf.split(train))[0][0]]
val_features=[train[i] for i in list(kf.split(train))[0][1]]
train_targets=[targets[i] for i in list(kf.split(targets))[0][0]]
val_targets=[targets[i] for i in list(kf.split(targets))[0][1]]
train_masks=[masks[i] for i in list(kf.split(targets))[0][0]]
val_masks=[masks[i] for i in list(kf.split(targets))[0][1]]

print(f"{len(train_features):5d} samples to train")
print(f"{len(val_features):5d} samples to validate")

67905 samples to train
 7545 samples to validate


# Create dataloader

In [5]:
import numpy as np
from torch.utils.data import Dataset, DataLoader
import random


batch_size=128

class TrainDataset(Dataset):
    def __init__(self, features, targets, masks, train=True):
        super(TrainDataset, self).__init__()
        self.features = features
        self.targets = targets
        self.masks = masks

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        return self.features[index].astype('float32'),self.targets[index].astype('float32'),self.masks[index].astype('bool')

class TestDataset(Dataset):
    def __init__(self, features): #HDKIM 100
        super(TestDataset, self).__init__()
        self.features = features

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):

        return self.features[index].astype('float32')

train_dataset = TrainDataset(train_features,train_targets,train_masks)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
del train_features

val_dataset = TrainDataset(val_features,val_targets,val_masks)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
del val_features

In [7]:
import torch
from torch import nn
from torch.nn import functional as F

class PosEncoder(nn.Module):

    def __init__(self, embd_dim, rnn='GRU'):
        assert rnn in ('GRU','LSTM'), 'rnn must be either GRU or LSTM'
        super(PosEncoder, self).__init__()
        exec(f"self.rnn = nn.{rnn}(embd_dim,embd_dim,num_layers=1,bidirectional=True)")
        self.mlp = nn.Sequential(nn.Linear(embd_dim*2, 1024), # if not bidirecional, use embd instead of embd * 2
                                 nn.ReLU(),
                                 nn.Linear(1024, embd_dim))

    def forward(self, x):
        return x+self.mlp(self.rnn(x)[0])
    
class MyModel(nn.Module):
    def __init__(self, in_dim, out_dim, max_seq=100, embd_dim=128, 
                 rnn_type = 'GRU',n_rnn_layers=2,
                 n_transformer_layers=6, dropout=0, nheads=8, 
                 use_conv=False):
        
        super(MyModel, self).__init__()
        self.use_conv = use_conv
        
        # embedding layer
        self.embedding = nn.Linear(in_dim, embd_dim)
        
        # trainable positional encoder
        self.pos_encoder = nn.ModuleList([PosEncoder(embd_dim,rnn=rnn_type) 
                                          for i in range(n_rnn_layers)])
        self.pos_encoder_dropout = nn.Dropout(dropout)
        self.pos_encoder_ln = nn.LayerNorm(embd_dim)
        
        # transformer layers
        transformer_layers = [nn.TransformerEncoderLayer(embd_dim, nhead=nheads, dropout=dropout) 
                              for i in range(n_transformer_layers)]
        self.transformer_layers = nn.ModuleList(transformer_layers)
        self.downsample = nn.Linear(embd_dim*2,embd_dim) 
        self.clf = nn.Linear(embd_dim, out_dim)
        
        # optional conv and deconv layers
        if self.use_conv:
            nlayers = n_transformer_layers
            self.conv_layers = nn.ModuleList([
                                                nn.Conv1d(embd_dim,embd_dim,
                                                         (nlayers-i)*2-1,stride=1,padding=0) 
                                                for i in range(nlayers)
                                                ])
            
            self.conv_ln = nn.ModuleList([nn.LayerNorm(embd_dim) for i in range(nlayers)])
            
            self.deconv_layers = nn.ModuleList([
                                                nn.ConvTranspose1d(embd_dim,embd_dim,
                                                                   (nlayers-i)*2-1,stride=1,padding=0) 
                                                for i in range(nlayers)
                                                ])
            self.deconv_ln = nn.ModuleList([nn.LayerNorm(embd_dim) for i in range(nlayers)])
        

    def forward(self, x):
        device = x.device
        x=self.embedding(x)
        x = x.permute(1, 0, 2) # (L,N,feature_dim)
        
        for pos_encoder_layer in self.pos_encoder:
            pos_encoder_layer.rnn.flatten_parameters()
            x=pos_encoder_layer(x)

        x = self.pos_encoder_dropout(x)
        x = self.pos_encoder_ln(x)
        
        if not self.use_conv:
            for transformer_layer in self.transformer_layers:
                x = transformer_layer(x)
        else:
            enhanced_transformer_layers = zip(self.conv_layers,self.conv_ln,
                                              self.transformer_layers,
                                              self.deconv_layers,self.deconv_ln)
            for conv, convln, transformer_layer, deconv, deconvln in enhanced_transformer_layers:
                x_ = convln(F.relu(conv(x.permute(1,2,0)).permute(2,0,1)))
                x_ = transformer_layer(x_)
                x_ = deconvln(F.relu(deconv(x_.permute(1,2,0)).permute(2,0,1)))
                x += x_
                

        x = x.permute(1, 0, 2)

        output = self.clf(x)

        return output.squeeze(-1)
    

In [8]:
model = MyModel(in_dim = train.shape[-1], out_dim = 1,embd_dim=128,
                n_transformer_layers=6, nheads = 8, dropout=0,
                n_rnn_layers=2, rnn_type = 'GRU',use_conv = True).cuda()

In [9]:
model

MyModel(
  (embedding): Linear(in_features=9, out_features=128, bias=True)
  (pos_encoder): ModuleList(
    (0): PosEncoder(
      (rnn): GRU(128, 128, bidirectional=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ReLU()
        (2): Linear(in_features=1024, out_features=128, bias=True)
      )
    )
    (1): PosEncoder(
      (rnn): GRU(128, 128, bidirectional=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ReLU()
        (2): Linear(in_features=1024, out_features=128, bias=True)
      )
    )
  )
  (pos_encoder_dropout): Dropout(p=0, inplace=False)
  (pos_encoder_ln): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (transformer_layers): ModuleList(
    (0): TransformerEncoderLayer(
      (self_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
      )
      (linear1): Linear(in_f

In [10]:
#install ranger optimizer
#! git clone https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
#! pip install -e Ranger-Deep-Learning-Optimizer
! pip install pytorch_ranger



In [11]:
#optimizer and criterion
from pytorch_ranger import Ranger
optimizer = Ranger(model.parameters(), lr=8e-4)
criterion = nn.L1Loss(reduction='none')

# Training loop

In [12]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(comment='smallfs_modelsmall')

In [13]:
optimizer = Ranger(model.parameters(), lr=8e-5)
epochs=150
val_metric = 100
best_metric = 100
cos_epoch=int(epochs*0.75)
scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,(epochs-cos_epoch)*len(train_dataloader))
steps_per_epoch=len(train_dataloader)
val_steps=len(val_dataloader)

In [14]:
for epoch in range(epochs):
    model.train()
    train_loss=0
    t=time.time()
    for step,batch in enumerate(train_dataloader):
        #series=batch.to(device)#.float()
        features,targets,mask=batch
        features=features.cuda()
        targets=targets.cuda()
        mask=mask.cuda()
        #exit()

        optimizer.zero_grad()
        output=model(features)
        #exit()
        #exit()

        loss=criterion(output,targets)#*loss_weight_vector
        loss=torch.masked_select(loss,mask)
        loss=loss.mean()
        loss.backward()
        # with amp.scale_loss(loss, optimizer) as scaled_loss:
        #     scaled_loss.backward()
        optimizer.step()

        train_loss+=loss.item()
        #scheduler.step()
        print ("Step [{}/{}] Loss: {:.3f} Time: {:.1f}"
                           .format(step+1, steps_per_epoch, train_loss/(step+1), time.time()-t),end='\r',flush=True)
        if epoch > cos_epoch:
            scheduler.step()
        #break
    print('')
    train_loss/=(step+1)
    
    writer.add_scalar('Loss/train', train_loss, epoch)

    #exit()
    model.eval()
    val_metric=[]
    val_loss=0
    t=time.time()
    preds=[]
    truths=[]
    masks=[]
    for step,batch in enumerate(val_dataloader):
        features,targets,mask=batch
        features=features.cuda()
        targets=targets.cuda()
        mask=mask.cuda()
        with torch.no_grad():
            output=model(features)

            loss=criterion(output,targets)
            loss=torch.masked_select(loss,mask)
            loss=loss.mean()
            val_loss+=loss.item()
            preds.append(output.cpu())
            truths.append(targets.cpu())
            masks.append(mask.cpu())
        print ("Validation Step [{}/{}] Loss: {:.3f} Time: {:.1f}"
                           .format(step+1, val_steps, val_loss/(step+1), time.time()-t),end='\r',flush=True)

    preds=torch.cat(preds).numpy()
    truths=torch.cat(truths).numpy()
    masks=torch.cat(masks).numpy()
    val_metric=(np.abs(truths-preds)*masks).sum()/masks.sum()#*stds['pressure']
    print('')
    val_loss/=(step+1)
    writer.add_scalar('Loss/Validation', val_loss, epoch)


    if val_metric < best_metric:
        best_metric=val_metric
        torch.save(model.state_dict(),'smallfs_modelsmall.pt')

Step [3/531] Loss: 17.873 Time: 0.7

	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448278899/work/torch/csrc/utils/python_arg_parser.cpp:1025.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


Step [116/531] Loss: 9.015 Time: 7.7

KeyboardInterrupt: 