# **stage2** (tweet-aware stacking) - Demo
This notebook shows how we combined results from different models with stacking in a model that is also aware of the input tweet, using pytorch. It was run in Google colab.


## Sources
- pytorch (https://pytorch.org/)
- Google colab (https://colab.research.google.com/)

## Reproducibility
Files to run this notebook have not been made available.Itw as used to create Submission (**#109820**) on AIcrowd:

| Accuracy | F1 |
|:---:|:---:|
| 89.9% | 90.0% |

### Loading results from each model

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

df = pd.read_csv('gdrive/My Drive/pred_all_models_train.csv').drop(columns = 'Unnamed: 0')

X = df.iloc[:, 1:].values

textRep = True
if textRep:
  embedding = 'cbow' # or cbow
  X1_0 = np.load('gdrive/My Drive/'+embedding + '_train_neg_full_u.npy')
  X1_1 = np.load('gdrive/My Drive/'+embedding + '_train_pos_full_u.npy')
  X1 = np.concatenate((X1_0, X1_1))
  X = np.concatenate((X, X1), axis = 1)

y = df.iloc[:, 0].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.05, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)

Mounted at /content/gdrive


In [None]:
class trainData(Dataset):
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

dim_pred = df.shape[1] - 1
xdim = X.shape[1]
del df, X, y
train_data = trainData(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
val_data = trainData(torch.FloatTensor(X_val), torch.FloatTensor(y_val))
del X_train, X_val, y_train, y_val
BATCH_SIZE = 64
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_loader = DataLoader(dataset=val_data, batch_size=BATCH_SIZE, drop_last = True)  

In [None]:
class clf3(nn.Module):
    def __init__(self):
        super(clf3, self).__init__()
        self.dense = nn.Linear(xdim, xdim) 
        self.output_layer = nn.Linear(xdim, 1)
        self.activation = nn.Tanh()
        self.drop = nn.Dropout(p=0.1)

        
    def forward(self, inputs):
        x = inputs
        x = self.drop(x)
        x = self.dense(x)
        x = self.activation(x)
        x = self.drop(x)
        x = self.output_layer(x)
        
        return x

class clf3_sep(nn.Module):
    def __init__(self):
        super(clf3_sep, self).__init__()
        self.dense = nn.Linear(xdim-dim_pred, 4) 
        self.output_layer = nn.Linear(4 + dim_pred, 1)
        self.activation = nn.Tanh()
        self.drop = nn.Dropout(p=0.1)

        
    def forward(self, inputs):
        
        important_features = inputs[:,:dim_pred]
        x = inputs[:,dim_pred:]
        x = self.drop(x)
        x = self.dense(x)

        important_features = important_features

        
        x_all = third_tensor = torch.cat((x, important_features), axis = 1)

        x_all = self.activation(x_all)
        x_all = self.drop(x_all)
        x_all = self.output_layer(x_all)
        
        return x_all


class clf3_sep_2(nn.Module):
    def __init__(self):
        super(clf3_sep_2, self).__init__()

        self.activation = nn.Tanh()
        opt_gain = torch.nn.init.calculate_gain('tanh')

        self.drop = nn.Dropout(p=0.1)

        self.dense = nn.Linear(xdim-dim_pred, 4) 
        torch.nn.init.xavier_uniform_(self.dense.weight, gain = opt_gain)

        self.dense_feat = nn.Linear(dim_pred, dim_pred) 
        torch.nn.init.xavier_uniform_(self.dense_feat.weight, gain = opt_gain)

        self.output_layer = nn.Linear(4 + dim_pred, 1)
        torch.nn.init.xavier_uniform_(self.output_layer.weight, gain = 1)

        
    def forward(self, inputs):
        
        important_features = inputs[:,:dim_pred]
        x = inputs[:,dim_pred:]
        x = self.drop(x)
        x = self.dense(x)

        important_features = self.dense_feat(important_features)

        
        x_all = third_tensor = torch.cat((x, important_features), axis = 1)

        x_all = self.activation(x_all)
        x_all = self.drop(x_all)
        x_all = self.output_layer(x_all)
        
        return x_all

class stage2clf(nn.Module):
    def __init__(self):
        super(stage2clf, self).__init__()
        self.hidden_layer_1 = nn.Linear(xdim, 64) 
        self.hidden_layer_2 = nn.Linear(64, 64)
        self.output_layer = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(p=0.1)
        self.batch_norm = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = inputs
        x = self.relu(self.hidden_layer_1(x))
        x = self.batch_norm(x)
        x = self.relu(self.hidden_layer_2(x))
        x = self.batch_norm(x)
        x = self.drop(x)
        x = self.output_layer(x)
        
        return x

class stage2clf2(nn.Module):
    def __init__(self):
        super(stage2clf2, self).__init__()
        self.hidden_layer_1 = nn.Linear(xdim, 128) 
        self.hidden_layer_2 = nn.Linear(128, 64)
        self.output_layer = nn.Linear(64, 1) 
        
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(p=0.1)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = inputs
        x = self.relu(self.hidden_layer_1(x))
        x = self.batch_norm1(x)
        x = self.relu(self.hidden_layer_2(x))
        x = self.batch_norm2(x)
        x = self.drop(x)
        x = self.output_layer(x)
        
        return x

class stage2clf2_init(nn.Module):
    def __init__(self):
        super(stage2clf2_init, self).__init__()
        self.activation = nn.ReLU()
        opt_gain = torch.nn.init.calculate_gain('relu')

        self.hidden_layer_1 = nn.Linear(xdim, 128) 
        torch.nn.init.xavier_uniform_(self.hidden_layer_1.weight, gain = opt_gain)

        self.hidden_layer_2 = nn.Linear(128, 64)
        torch.nn.init.xavier_uniform_(self.hidden_layer_2.weight, gain = opt_gain)


        self.output_layer = nn.Linear(64, 1) 
        torch.nn.init.xavier_uniform_(self.output_layer.weight, gain = 1)
        
        self.drop = nn.Dropout(p=0.1)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.batch_norm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = inputs
        x = self.activation(self.hidden_layer_1(x))
        x = self.batch_norm1(x)
        x = self.activation(self.hidden_layer_2(x))
        x = self.batch_norm2(x)
        x = self.drop(x)
        x = self.output_layer(x)
        
        return x

In [None]:
model = stage2clf2_init()
model.to(device)

criterion = nn.BCEWithLogitsLoss()
LEARNING_RATE = 0.001
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

In [None]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    return acc

In [None]:
def validation_stats(network, loader):

    acc = []
    with torch.no_grad():
        for x, y in loader:
          x, y = x.to(device), y.to(device)
          y_pred = network(x)
          acc.append(binary_acc(y_pred, y.unsqueeze(1)))

    acc = torch.Tensor(acc)
    return acc.mean()

In [None]:
EPOCHS = 20


model.train()
for e in range(1, EPOCHS+1):
    epoch_loss = 0
    epoch_acc = 0
    
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        
        y_pred = model(X_batch)
        
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        acc = binary_acc(y_pred, y_batch.unsqueeze(1))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    val_acc = validation_stats(model, val_loader)
    print('Epoch {:d}.\tLoss: {:.5f}\tAccuracy: {:.3f}% (train) / {:.3f}% (val)'.format(e, 100*epoch_loss/len(train_loader), epoch_acc/len(train_loader), val_acc))
    scheduler.step()


Epoch 1.	Loss: 21.62770	Accuracy: 91.370% (train) / 91.619% (val)
Epoch 2.	Loss: 20.90907	Accuracy: 91.639% (train) / 91.607% (val)
Epoch 3.	Loss: 20.79532	Accuracy: 91.673% (train) / 91.642% (val)
Epoch 4.	Loss: 20.73058	Accuracy: 91.688% (train) / 91.701% (val)
Epoch 5.	Loss: 20.69027	Accuracy: 91.693% (train) / 91.677% (val)
Epoch 6.	Loss: 20.64886	Accuracy: 91.712% (train) / 91.691% (val)
Epoch 7.	Loss: 20.60532	Accuracy: 91.732% (train) / 91.664% (val)
Epoch 8.	Loss: 20.58396	Accuracy: 91.731% (train) / 91.700% (val)
Epoch 9.	Loss: 20.55064	Accuracy: 91.750% (train) / 91.702% (val)
Epoch 10.	Loss: 20.52824	Accuracy: 91.756% (train) / 91.707% (val)
Epoch 11.	Loss: 20.49924	Accuracy: 91.761% (train) / 91.701% (val)
Epoch 12.	Loss: 20.47783	Accuracy: 91.781% (train) / 91.735% (val)
Epoch 13.	Loss: 20.44536	Accuracy: 91.779% (train) / 91.739% (val)
Epoch 14.	Loss: 20.42752	Accuracy: 91.790% (train) / 91.745% (val)
Epoch 15.	Loss: 20.40386	Accuracy: 91.803% (train) / 91.741% (val)
Epoc

In [None]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
X_t = pd.read_csv('gdrive/My Drive/pred_all_models_test.csv').drop(columns = 'Unnamed: 0')

if textRep:
  X1 = np.load('gdrive/My Drive/'+ embedding + '_test.npy')
  X_t = np.concatenate((X_t, X1), axis = 1)
X_t = scaler.transform(X_t)

In [None]:
class testData(Dataset):
    def __init__(self, X_data):
        self.X_data = X_data
        
    def __getitem__(self, index):
        return self.X_data[index]
        
    def __len__ (self):
        return len(self.X_data)
    

test_data = testData(torch.FloatTensor(X_t))
test_loader = DataLoader(dataset=test_data, batch_size=10, shuffle=False, drop_last=False)

In [None]:
y_pred_list = []
model.eval()
with torch.no_grad():
    for X_batch in test_loader:
        X_batch = X_batch.to(device)
        pred = torch.round(torch.sigmoid(model(X_batch)))
        y_pred_list.append(pred.cpu().numpy())

y_pred = [a.squeeze().tolist() for a in y_pred_list]
predictions = []
for row in y_pred:
  predictions += row
predictions = np.array(predictions)
preds = pd.DataFrame((2*predictions-1).astype(int), columns = ['Prediction'], index = np.arange(1, len(predictions)+1))
preds.index.names = ['Id']
preds.to_csv('stage2_nn_init.csv')