# Model building

https://www.kaggle.com/vadbeg/pytorch-nn-with-embeddings-and-catboost/notebook#PyTorch

mostly based off this example

In [1]:
# import load_data function from 
%load_ext autoreload
%autoreload 2

# fix system path
import sys
sys.path.append("/home/jovyan/work")

In [2]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

In [3]:
import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministick = True
    torch.backends.cudnn.benchmark = False 
    
set_seed(27)

In [8]:
from src.data.sets import load_sets

X_train, y_train, X_val, y_val, X_test, y_test = load_sets()

In [9]:
X_test.shape

(317320, 5)

In [10]:
X_train.shape

(951959, 5)

In [11]:
# need to convert to tensors
from src.models.pytorch import EmbeddingDataset

In [36]:
train_dataset = EmbeddingDataset(X_train, 
                                      targets=y_train,
                                      cat_cols_idx=[0],
                                      cont_cols_idx=[1,2,3,4])

val_dataset = EmbeddingDataset(X_val, 
                                      targets=y_val,
                                      cat_cols_idx=[0],
                                      cont_cols_idx=[1,2,3,4],
                                      is_train=False)


test_dataset = EmbeddingDataset(X_test,
                                     cat_cols_idx=[0],
                                     cont_cols_idx=[1,2,3,4],
                                     is_train=False)

In [37]:
print(f'First element of train_dataset: {train_dataset[1]}',
      f'First element of test_dataset: {test_dataset[1]}', sep='\n')

First element of train_dataset: {'data': [tensor([4918.]), tensor([-3.2047, -2.1777, -0.3572, -0.4001])], 'target': tensor(13)}
First element of test_dataset: {'data': [tensor([701.]), tensor([ 0.3790,  0.2570,  0.3757, -0.4001])]}


In [14]:
# embedding example
class ClassificationEmbdNN(torch.nn.Module):
    
    def __init__(self, emb_dims, no_of_cont=None):
        super(ClassificationEmbdNN, self).__init__()
        
        self.emb_layers = torch.nn.ModuleList([torch.nn.Embedding(x, y)
                                               for x, y in emb_dims])
        
        no_of_embs = sum([y for x, y in emb_dims])
        self.no_of_embs = no_of_embs
        self.emb_dropout = torch.nn.Dropout(0.2)
        
        self.no_of_cont = 0
        if no_of_cont:
            self.no_of_cont = no_of_cont
            self.bn_cont = torch.nn.BatchNorm1d(no_of_cont)
        
        self.fc1 = torch.nn.Linear(in_features=self.no_of_embs + self.no_of_cont, 
                                   out_features=208)
        self.dropout1 = torch.nn.Dropout(0.2)
        self.bn1 = torch.nn.BatchNorm1d(208)
        self.act1 = torch.nn.ReLU()
        
        self.fc2 = torch.nn.Linear(in_features=208, 
                                   out_features=208)
        self.dropout2 = torch.nn.Dropout(0.2)
        self.bn2 = torch.nn.BatchNorm1d(208)
        self.act2 = torch.nn.ReLU()
        
#         self.fc3 = torch.nn.Linear(in_features=256, 
#                                    out_features=64)
#         self.dropout3 = torch.nn.Dropout(0.2)
#         self.bn3 = torch.nn.BatchNorm1d(64)
#         self.act3 = torch.nn.ReLU()
        
        self.fc3 = torch.nn.Linear(in_features=208, 
                                   out_features=104)
        self.act3 = torch.nn.Softmax()
        
    def forward(self, x_cat, x_cont=None):
        if self.no_of_embs != 0:
            x = [emb_layer(x_cat[:, i])
                 for i, emb_layer in enumerate(self.emb_layers)]
        
            x = torch.cat(x, 1)
            x = self.emb_dropout(x)
            
        if self.no_of_cont != 0:
            x_cont = self.bn_cont(x_cont)
            
            if self.no_of_embs != 0:
                x = torch.cat([x, x_cont], 1)
            else:
                x = x_cont
        
        x = self.fc1(x)
        x = self.dropout1(x)
        x = self.bn1(x)
        x = self.act1(x)
        
        x = self.fc2(x)
        x = self.dropout2(x)
        x = self.bn2(x)
        x = self.act2(x)
        
#         x = self.fc3(x)
#         x = self.dropout3(x)
#         x = self.bn3(x)
#         x = self.act3(x)
        
        x = self.fc3(x)
        x = self.act3(x)
        
        return x

In [65]:
model = ClassificationEmbdNN(emb_dims=[[5742, 252]], 
                             no_of_cont=4)

In [19]:
from src.models.pytorch import get_device

device = get_device()
model.to(device)

ClassificationEmbdNN(
  (emb_layers): ModuleList(
    (0): Embedding(5724, 200)
  )
  (emb_dropout): Dropout(p=0.2, inplace=False)
  (bn_cont): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=204, out_features=208, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (bn1): BatchNorm1d(208, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU()
  (fc2): Linear(in_features=208, out_features=208, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (bn2): BatchNorm1d(208, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act2): ReLU()
  (fc3): Linear(in_features=208, out_features=104, bias=True)
  (act3): Softmax(dim=None)
)

In [53]:
print(model)

ClassificationEmbdNN(
  (emb_layers): ModuleList(
    (0): Embedding(5724, 1000)
  )
  (emb_dropout): Dropout(p=0.2, inplace=False)
  (bn_cont): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=1004, out_features=208, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (bn1): BatchNorm1d(208, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU()
  (fc2): Linear(in_features=208, out_features=208, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (bn2): BatchNorm1d(208, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act2): ReLU()
  (fc3): Linear(in_features=208, out_features=104, bias=True)
  (act3): Softmax(dim=None)
)


In [22]:
criterion = torch.nn.CrossEntropyLoss()

In [23]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [24]:
BATCH_SIZE = 300
N_EPOCHS = 10

In [38]:
valid_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE)

train_loader = DataLoader(val_dataset,batch_size=BATCH_SIZE)

In [39]:
next(iter(train_loader))

{'data': [tensor([[4884.],
          [ 163.],
          [ 217.],
          [ 163.],
          [2262.],
          [5378.],
          [4280.],
          [ 413.],
          [5003.],
          [2461.],
          [2461.],
          [2316.],
          [3841.],
          [5066.],
          [4453.],
          [2461.],
          [3341.],
          [ 151.],
          [4830.],
          [2239.],
          [5014.],
          [3237.],
          [2514.],
          [5378.],
          [5014.],
          [5448.],
          [3189.],
          [5242.],
          [2523.],
          [1605.],
          [2601.],
          [5364.],
          [ 163.],
          [5078.],
          [ 224.],
          [2689.],
          [4132.],
          [ 375.],
          [ 360.],
          [3888.],
          [2239.],
          [5392.],
          [1334.],
          [4598.],
          [1951.],
          [4680.],
          [1306.],
          [5330.],
          [ 154.],
          [3804.],
          [4950.],
          [ 386.],
    

In [26]:
from tqdm import tqdm_notebook as tqdm

In [27]:
from sklearn.metrics import roc_auc_score

In [67]:
def train_network(model, train_loader, valid_loader,
                  loss_func, optimizer, n_epochs=20,
                  saved_model='model.pt'):
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    train_losses = list()
    valid_losses = list()
    
    valid_loss_min = np.Inf
    
    for epoch in range(n_epochs):
        train_loss = 0.0
        valid_loss = 0.0
        
        train_auc = 0.0
        valid_auc = 0.0
        
        model.train()
        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            
            output = model(batch['data'][0].to(device, 
                                               dtype=torch.long),
                           batch['data'][1].to(device, 
                                               dtype=torch.float))
            
            
            loss = loss_func(output, batch['beer_style'].to(device, 
                                                        dtype=torch.float))
            
            loss.backward()
            optimizer.step()
            
            train_auc += metrics.roc_auc_score(batch['beer_style'].cpu().numpy(),
                                               output.detach().cpu().numpy())

            train_loss += loss.item() * batch['data'][0].size(0)  #!!!
    

        model.eval()
        for batch in tqdm(valid_loader):
            output = model(batch['data'][0].to(device, 
                                               dtype=torch.long),
                           batch['data'][1].to(device, 
                                               dtype=torch.float))
            
            
            loss = loss_func(output, batch['beer_style'].to(device, 
                                                        dtype=torch.float))
            
            valid_auc += metrics.roc_auc_score(batch['beer_style'].cpu().numpy(),
                                               output.detach().cpu().numpy())
            valid_loss += loss.item() * batch['data'][0].size(0)  #!!!
           
        
        train_loss = np.sqrt(train_loss / len(train_loader.sampler.indices))
        valid_loss = np.sqrt(valid_loss / len(valid_loader.sampler.indices))

        train_auc = train_auc / len(train_loader)
        valid_auc = valid_auc / len(valid_loader)
        
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        print('Epoch: {}. Training loss: {:.6f}. Validation loss: {:.6f}'
              .format(epoch, train_loss, valid_loss))
        print('Training AUC: {:.6f}. Validation AUC: {:.6f}'
              .format(train_auc, valid_auc))
        
        if valid_loss < valid_loss_min:  # let's save the best weights to use them in prediction
            print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model...'
                  .format(valid_loss_min, valid_loss))
            
            torch.save(model.state_dict(), saved_model)
            valid_loss_min = valid_loss
            
    
    return train_losses, valid_losses

In [68]:
train_losses, valid_losses = train_network(model=model, 
                                           train_loader=train_loader, 
                                           valid_loader=valid_loader, 
                                           loss_func=criterion, 
                                           optimizer=optimizer,
                                           n_epochs=N_EPOCHS, 
                                           saved_model='../models/embed_3layers.pt')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=1058.0), HTML(value='')))



KeyError: 'beer_style'

In [64]:
from torch import nn
input_dim = 11
embedding_dim = 2
embedding = nn.Embedding(input_dim, embedding_dim)
err = False
if err:
    #Any input more than input_dim - 1, here input_dim = 10
    #Any input less than zero
    input_to_embed = torch.tensor([10])
else:
    input_to_embed = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
embed = embedding(input_to_embed)
print(embed)

tensor([[-0.6896,  0.3590],
        [ 0.4217, -0.5570],
        [ 0.7313, -0.0464],
        [ 0.3036, -0.0141],
        [ 1.1071, -0.7534],
        [ 1.0179, -0.7225],
        [ 0.9024, -1.2163],
        [ 0.2739, -0.0718],
        [-0.5686,  0.6407],
        [ 2.6649,  0.9455]], grad_fn=<EmbeddingBackward>)
