# Task B

In [1]:
import os
import pandas as pd
import numpy as np
import torch
import warnings
import random
import math
import time

# setting
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", 20)

# ===================================

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

In [2]:
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter
from torchtext import datasets
from torchtext.vocab import GloVe
from torchtext.vocab import vocab

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [4]:
# reproducibility (global setting)

def seed_everything(seed=12):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

### load data

In [5]:
dataset = pd.read_csv('preprocessedtrain_deep.csv',
                      index_col = 0,
                      converters = {'reviewTexttokenized': eval,
                                    'summarytokenized': eval}
                     )


In [6]:
dataset.head(3)

Unnamed: 0,rating,reviewText,summary,reviewTextCharCount,overlapScore,summaryCharCount,reviewTextUpperCount,summaryUpperCount,reviewTexttokenized,summarytokenized
0,3,plot storyline : 5 starsthis novel accomplishe...,3 1/4 stars,3370,0.0,11,72,1,"[plot, storyline, 5, starsthis, novel, accompl...","[3, 1/4, star]"
1,3,i did not like how el ended this one . i do no...,"it was going great , then just ... ended",363,0.428571,39,8,1,"[not, like, how, al, end, one, not, want, ruin...","[go, great, just, ..., end]"
2,5,i love how old fashioned this family is - they...,loved all 4!,287,0.0,12,4,8,"[love, how, old, fashion, family, see, someone...","[love, all, 4, !]"


In [7]:
counter = Counter()

MaxLengthReview = 0
MaxLengthSummary = 0

for row in dataset['reviewTexttokenized']:
    counter.update(row)
    if len(row) > MaxLengthReview:
        MaxLengthReview = len(row)

for row in dataset['summarytokenized']:
    counter.update(row)
    if len(row) > MaxLengthSummary:
        MaxLengthSummary = len(row)
        
print('MaxLengthReview', MaxLengthReview)
print('MaxLengthSummary',MaxLengthSummary)

MaxLengthReview 1270
MaxLengthSummary 17


In [8]:
GLovevectors = GloVe(name='6B',
                     dim=200,
                     cache='D:/program files/jupyter notebook/usyd/6850/cache')

In [9]:
# all the config in the model and training
class Config():

    #embedding config
       #how many number of words in glove embedding dict
       #if error occurs - change 400001 to 400000
    embed_vocab_num = 400001
    
    embed_dim = 200 # dimension of the embedding
    embed_trainable = False # whether train(fine tune) the weight of embedding
    
    #Bi-LSTM config
    hidden_size = 100
    output_size = 1
    dropout = 0.1
    lstm_layers = 1
    
    # CNN config
    kernel_num = 32 # number of kernels
    kernel_size = [2,3,4] # CNN filter size - similar to n-gram
    
    max_seq_len_review = MaxLengthReview
    max_seq_len_summary = MaxLengthSummary
    
    batch_size = 128
    epoch = 12
    
    learning_rate = 0.05
    
config = Config()

In [10]:
def Token2EmbedIndex(row):
    """
    replace the token with the index in the glove dictionary
    if the word cannot be located in the dictionary, it will be assigned
    with the same index as <unk> - the last key in the dictionary
    """
    transferedList = []
    for token in row:
        try:
            transferedList.append(GLovevectors.stoi[token])
        except KeyError:
            transferedList.append(400000)
    return transferedList

dataset['reviewTexttokenized'] = dataset['reviewTexttokenized'].apply(Token2EmbedIndex)
dataset['summarytokenized'] = dataset['summarytokenized'].apply(Token2EmbedIndex)

def paddingfunc(row, length):
    """padding the list to the same length with 0"""
    if len(row) == length:
        pass
    elif len(row) < length:
        for i in range(length - len(row)):
            row.append(0)
    elif len(row) > length:
        row = row[:length]
    return np.array(row)

dataset['reviewTexttokenized'] = dataset['reviewTexttokenized'].apply(lambda x: paddingfunc(x,length = config.max_seq_len_review))
dataset['summarytokenized'] = dataset['summarytokenized'].apply(lambda x: paddingfunc(x,length = config.max_seq_len_summary))

In [11]:
# unknown embedding
GLovevectors.vectors = torch.cat((GLovevectors.vectors,GLovevectors.vectors.mean(axis=0).unsqueeze(0)),0)

In [12]:
ally = dataset[['rating']].apply(lambda x:x-1)
allX = dataset[['reviewTexttokenized',
                'summarytokenized']]

# splitting the test
## =============
X_tr_va, X_test, y_tr_va, y_test = train_test_split(allX,
                                                    ally,
                                                    test_size=1/6,
                                                    random_state=12,
                                                    stratify=ally)

X_train, X_valid, y_train, y_valid = train_test_split(X_tr_va,
                                                      y_tr_va,
                                                      test_size=1/5,
                                                      random_state=12,
                                                      stratify=y_tr_va)

#X_tr_va.reset_index(drop = True, inplace = True)
#X_test.reset_index(drop = True, inplace = True)
#y_tr_va.reset_index(drop = True, inplace = True)
#y_test.reset_index(drop = True, inplace = True)

## the rest will be cross validation - use stratified kfold
#SKF = StratifiedKFold(n_splits = 5, random_state = 12, shuffle  = True)
#DFlist = []
#for train_index, valid_index in SKF.split(X_tr_va, y_tr_va):
#    X_train, X_valid = X_tr_va.iloc[train_index], X_tr_va.iloc[valid_index]
#    y_train, y_valid = y_tr_va.iloc[train_index], y_tr_va.iloc[valid_index]
#    
#    DFlist.append((X_train, X_valid, y_train, y_valid))

In [13]:
# DFlist[1][1].head(2)

In [14]:
traindata = allX.join(ally)

In [15]:
ratings = torch.tensor(ally.values, dtype=torch.float)

In [16]:
def task_importance_weights(label_array):
    uniq = torch.unique(label_array)
    num_examples = label_array.size(0)

    m = torch.zeros(uniq.shape[0])

    for i, t in enumerate(torch.arange(torch.min(uniq), torch.max(uniq))):
        m_k = torch.max(torch.tensor([label_array[label_array > t].size(0), 
                                      num_examples - label_array[label_array > t].size(0)]))
        m[i] = torch.sqrt(m_k.float())

    imp = m/torch.max(m)
    return imp

imp = task_importance_weights(ratings)
imp = imp[0:4]

In [17]:
# dataset loader
class ratingDataset(Dataset):

    def __init__(self, review, summary, rating):

        df = traindata
        self.rating = rating
        self.review = review
        self.summary = summary

    def __getitem__(self, index):      
        review = torch.Tensor(self.review.iloc[index]).long()
        summary= torch.Tensor(self.summary.iloc[index]).long()
        label = self.rating.iloc[index]
        levels = [1]*label + [0]*(5 - 1 - label) #encoding the target
        levels = torch.tensor(levels, dtype=torch.float32)

        return review, summary, label, levels

    def __len__(self):
        return self.rating.shape[0]

In [18]:
train_dataset = ratingDataset(X_train['reviewTexttokenized'],
                              X_train['summarytokenized'],
                              y_train['rating'])

valid_dataset = ratingDataset(X_valid['reviewTexttokenized'],
                              X_valid['summarytokenized'],
                              y_valid['rating'])

test_dataset = ratingDataset(X_test['reviewTexttokenized'],
                             X_test['summarytokenized'],
                             y_test['rating'])

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=config.batch_size,
                          shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=config.batch_size,
                          shuffle=False)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=config.batch_size,
                         shuffle=False)

## Model

In [19]:
class BiLSTM_TextCNN(nn.Module):
    def __init__(self, config):
        super(BiLSTM_TextCNN, self).__init__()
        #embedding layer
        
        self.embedding = nn.Embedding(config.embed_vocab_num, config.embed_dim)
        self.embedding.weight.data.copy_(GLovevectors.vectors)
        self.embedding.weight.data.requires_grad = config.embed_trainable
        
        # Bi-LSTM architecture
        
        self.BiLSTM = nn.LSTM(input_size = config.embed_dim,
                              hidden_size = config.hidden_size,
                              num_layers = config.lstm_layers,
                              bidirectional=True,
                              # first dimension is batch size
                              batch_first=True,
                              dropout = config.dropout                             
                             )
        # output dim (batch, sentense length, hidden size * 2)
        
        # CNN architecture after Bi LSTM
        self.conv_block_2 = nn.Sequential(
            nn.Conv1d(in_channels = config.hidden_size*2,
                      out_channels = config.kernel_num,
                      kernel_size = config.kernel_size[0]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_review - config.kernel_size[0] + 1) #(n-2+1)*1.
        )
        
        self.conv_block_3 = nn.Sequential(
            nn.Conv1d(in_channels = config.hidden_size*2,
                      out_channels = config.kernel_num,
                      kernel_size = config.kernel_size[1]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_review - config.kernel_size[1] + 1) #(n-3+1)*1.
        )
        
        self.conv_block_4 = nn.Sequential(
            nn.Conv1d(in_channels = config.hidden_size*2,
                      out_channels = config.kernel_num,
                      kernel_size = config.kernel_size[2]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_review - config.kernel_size[2] + 1) #(n-4+1)*1.
        )
        
        # CNN architecture for summary
        #  ================================
        
        self.conv_block_2_s = nn.Sequential(
            nn.Conv1d(config.embed_dim, config.kernel_num, config.kernel_size[0]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_summary - config.kernel_size[0] + 1) #(n-2+1)*1.
        )
        
        self.conv_block_3_s = nn.Sequential(
            nn.Conv1d(config.embed_dim, config.kernel_num, config.kernel_size[1]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_summary - config.kernel_size[1] + 1) #(n-3+1)*1.
        )
        
        self.conv_block_4_s = nn.Sequential(
            nn.Conv1d(config.embed_dim, config.kernel_num, config.kernel_size[2]),
            nn.ReLU(),#activate
            nn.MaxPool1d(config.max_seq_len_summary - config.kernel_size[2] + 1) #(n-4+1)*1.
        )
        # classify layer =============================
        
        self.dropout = nn.Dropout(config.dropout)
        
        # 2 cnn + 2 individual input
        self.fc = nn.Linear(config.kernel_num * len(config.kernel_size)*2, 1) #+2
        self.linear_1_bias = nn.Parameter(torch.zeros(5-1).float())
        
    def forward(self, review, summary):
        # shape:
        # review = batchsize , max_lengthreview
        # summary = batchsize , max_lengthsummary
        # 2 Uppers = batchsize , 1
        
        # (bi lstm + cnn) for reviewtokenized
            # Hidden and cell state definion
        #h0 = torch.zeros((2*config.lstm_layers, config.batch_size, config.hidden_size))
        #c0 = torch.zeros((2*config.lstm_layers, config.batch_size, config.hidden_size))
                # normal distributed init
        #torch.nn.init.xavier_normal_(h0)  
        #torch.nn.init.xavier_normal_(c0)
            # model
        embedded_review = self.embedding(review) # embedded = batch, length, embedd dim
        packed_output, (hidden, cell) = self.BiLSTM(embedded_review)# ,(h0,c0)
        # packed_output = batch , max length , 2* hidden size
        # packed_output = packed_output.unsqueeze(1)
        packed_output = packed_output.transpose(2,1)
        conv_block_2 = self.conv_block_2(packed_output)
            # input = batch , max length , 2* hidden size
            # conv1dout = batch, kernel num, max length
            #conv_block.shape: (batch_size, kernel_num, 1)
        conv_block_3 = self.conv_block_3(packed_output)
        conv_block_4 = self.conv_block_4(packed_output)
        
        out_review = torch.cat((conv_block_2.squeeze(2),
                                conv_block_3.squeeze(2),
                                conv_block_4.squeeze(2)), 1)
        
        # cnn for summarytokenized

        embedded_summary = self.embedding(summary)
        # embedded_summary = embedded_summary.unsqueeze(1)
        embedded_summary = embedded_summary.transpose(2,1)
        
        conv_block_2_s = self.conv_block_2_s(embedded_summary)            
        conv_block_3_s = self.conv_block_3_s(embedded_summary)
        conv_block_4_s = self.conv_block_4_s(embedded_summary)
        
        out_summary = torch.cat((conv_block_2_s.squeeze(2),
                                 conv_block_3_s.squeeze(2),
                                 conv_block_4_s.squeeze(2)), 1)
        
        #print(out_review,'=='*10, out_summary,'=='*10, Upperreview,'=='*10, Uppersummary)
        #print(Upperreview.shape)
        #out_review.flatten()
        #out_summary.flatten()
        
        concatfeature = torch.cat((out_review, out_summary),1) # 256*48 256*48 256*1 256*1
        
        # full connect and softmax
        x = self.dropout(concatfeature)
        logits = self.fc(x)
        logits = logits + self.linear_1_bias
        probas = torch.sigmoid(logits)
        return logits, probas


In [20]:
def cost_fn(logits, levels, imp):
    val = (-torch.sum((F.logsigmoid(logits)*levels
                      + (F.logsigmoid(logits) - logits)*(1-levels))*imp,
           dim=1))
    return torch.mean(val)

In [21]:
model = BiLSTM_TextCNN(config = Config())
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

In [22]:
from sklearn.metrics import f1_score

In [23]:
def compute_F1_score(model, data_loader, device):
    f1score, num_examples = 0, 0
    targetlist = []
    predictedlist = []
    for i, (review,summary, targets, levels) in enumerate(data_loader):

        logits, probas = model(review,summary)
        predict_levels = probas > 0.5
        predicted_labels = torch.sum(predict_levels, dim=1)
        targetlist.extend(targets.tolist())
        predictedlist.extend(predicted_labels.tolist())
    
    f1Score = f1_score(predictedlist, targetlist, average = 'weighted')
    return f1Score

In [24]:
start_time = time.time()
best_f1, best_epoch = 0, -1
for epoch in range(config.epoch): 

    model.train()
    for batch_idx, (review,summary, targets, levels) in enumerate(train_loader):

        # FORWARD AND BACK PROP
        logits, probas = model(review,summary)
        cost = cost_fn(logits, levels, imp)
        optimizer.zero_grad()

        cost.backward()

        # UPDATE MODEL PARAMETERS
        optimizer.step()

        # LOGGING
        if not batch_idx % 50:
            print('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
                 % (epoch+1, config.epoch , batch_idx,
                     len(train_dataset)//config.batch_size, cost))

    model.eval()
    with torch.set_grad_enabled(False):
        valid_f1 = compute_F1_score(model, valid_loader,'cpu')
        #valid_mae, valid_mse = compute_mae_and_mse(model, valid_loader, 'cpu')

    if valid_f1 > best_f1:
        best_f1, best_epoch = valid_f1, epoch
        ########## SAVE MODEL #############
        torch.save(model.state_dict(), os.path.join(r"D:/program files/jupyter notebook/usyd\6850/Final Version/", 'best_model.pt'))


    s = 'f1: | Current Valid: %.4f Ep. %d | Best Valid : %.4f Ep. %d' % (
        valid_f1, epoch, best_f1, best_epoch)
    print(s)

    s = 'Time elapsed: %.2f min' % ((time.time() - start_time)/60)
    print(s)

model.eval()

Epoch: 001/012 | Batch 0000/0046 | Cost: 2.6058
f1: | Current Valid: 0.3125 Ep. 0 | Best Valid : 0.3125 Ep. 0
Time elapsed: 8.28 min
Epoch: 002/012 | Batch 0000/0046 | Cost: 2.7897
f1: | Current Valid: 0.4363 Ep. 1 | Best Valid : 0.4363 Ep. 1
Time elapsed: 16.63 min
Epoch: 003/012 | Batch 0000/0046 | Cost: 1.5428
f1: | Current Valid: 0.4337 Ep. 2 | Best Valid : 0.4363 Ep. 1
Time elapsed: 24.94 min
Epoch: 004/012 | Batch 0000/0046 | Cost: 1.3170
f1: | Current Valid: 0.4062 Ep. 3 | Best Valid : 0.4363 Ep. 1
Time elapsed: 33.34 min
Epoch: 005/012 | Batch 0000/0046 | Cost: 1.5066
f1: | Current Valid: 0.4334 Ep. 4 | Best Valid : 0.4363 Ep. 1
Time elapsed: 41.74 min
Epoch: 006/012 | Batch 0000/0046 | Cost: 1.1342
f1: | Current Valid: 0.4482 Ep. 5 | Best Valid : 0.4482 Ep. 5
Time elapsed: 49.12 min
Epoch: 007/012 | Batch 0000/0046 | Cost: 1.6796
f1: | Current Valid: 0.4217 Ep. 6 | Best Valid : 0.4482 Ep. 5
Time elapsed: 56.60 min
Epoch: 008/012 | Batch 0000/0046 | Cost: 1.6688
f1: | Current V

BiLSTM_TextCNN(
  (embedding): Embedding(400001, 200)
  (BiLSTM): LSTM(200, 100, batch_first=True, dropout=0.2, bidirectional=True)
  (conv_block_2): Sequential(
    (0): Conv1d(200, 16, kernel_size=(2,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=1269, stride=1269, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_3): Sequential(
    (0): Conv1d(200, 16, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=1268, stride=1268, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_4): Sequential(
    (0): Conv1d(200, 16, kernel_size=(4,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=1267, stride=1267, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_2_s): Sequential(
    (0): Conv1d(200, 16, kernel_size=(2,), stride=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=16, stride=16, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block_3_s): Sequential(
    (0): Conv1d(200, 16, kernel_size=(3,), stride=(1,)

In [25]:
with torch.set_grad_enabled(False):  # save memory during inference

    train_f1 = compute_F1_score(model, train_loader,'cpu')
    valid_f1 = compute_F1_score(model, valid_loader,'cpu')
    test_f1 = compute_F1_score(model, test_loader,'cpu')

    s = 'f1 score: | Train: %.4f | Valid: %.4f | Test: %.4f' % (
        train_f1, 
        valid_f1, 
        test_f1, )
    print(s)

s = 'Total Training Time: %.2f min' % ((time.time() - start_time)/60)
print(s)

f1 score: | Train: 0.5921 | Valid: 0.3978 | Test: 0.3924
Total Training Time: 97.97 min


In [26]:
# evaluate best model
model.load_state_dict(torch.load(r"D:/program files/jupyter notebook/usyd/6850/Final Version/best_model.pt"))####
model.eval()

with torch.set_grad_enabled(False):
    train_f1 = compute_F1_score(model, train_loader,
                                               device='cpu')
    valid_f1 = compute_F1_score(model, valid_loader,
                                               device='cpu')
    test_f1 = compute_F1_score(model, test_loader,
                                             device='cpu')

    s = 'f1: | Best Train: %.4f | Best Valid: %.4f | Best Test: %.4f' % (
        train_f1,
        valid_f1,
        test_f1)
    print(s)


f1: | Best Train: 0.6291 | Best Valid: 0.4482 | Best Test: 0.4379


In [27]:
# save predictions
all_pred = []
all_probas = []
with torch.set_grad_enabled(False):
    for batch_idx, (review,summary, targets, levels) in enumerate(test_loader):
        

        logits, probas = model(review,summary)
        all_probas.append(probas)
        predict_levels = probas > 0.5
        predicted_labels = torch.sum(predict_levels, dim=1)
        lst = [str(int(i)) for i in predicted_labels]
        all_pred.extend(lst)

torch.save(torch.cat(all_probas).to(torch.device('cpu')),r"D:/program files/jupyter notebook/usyd/6850/Final Version/test_allprobas.tensor")####

In [28]:
test_pred = pd.DataFrame(data = all_pred, columns = ['rating'])
test_pred['rating'] = test_pred['rating'].apply(lambda x: int(x))
print('accuracy:')
print((test_pred['rating'].values == y_test['rating'].values).sum()/1500)

accuracy:
0.42733333333333334


In [34]:
f1_score(test_pred['rating'].values, y_test['rating'].values, average = 'macro')

0.40190407876456397

In [29]:
#submission
subdataset = pd.read_csv('preprocessedtest_deep.csv',
                         index_col = 0,
                         converters = {'reviewTexttokenized': eval,
                                       'summarytokenized': eval})

subdataset['reviewTexttokenized'] = subdataset['reviewTexttokenized'].apply(Token2EmbedIndex)
subdataset['summarytokenized'] = subdataset['summarytokenized'].apply(Token2EmbedIndex)

subdataset['reviewTexttokenized'] = subdataset['reviewTexttokenized'].apply(lambda x: paddingfunc(x,length = config.max_seq_len_review))
subdataset['summarytokenized'] = subdataset['summarytokenized'].apply(lambda x: paddingfunc(x,length = config.max_seq_len_summary))


In [30]:
subdf = subdataset[['reviewTexttokenized','summarytokenized']]

In [31]:
subdf

Unnamed: 0,reviewTexttokenized,summarytokenized
0,"[181, 5990, 36, 1089, 1465, 1465, 1594, 181, 4...","[1257, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[117, 539, 1250, 413, 2025, 96, 45324, 32173, ...","[1287, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[108, 68, 15644, 455, 14035, 2219, 120, 4403, ...","[2582, 1673, 1465, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[835, 835, 835, 112499, 7730, 523, 805, 530, 6...","[14397, 805, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,"[219, 8558, 13, 411, 353, 523, 7392, 1402, 156...","[41323, 434, 219, 805, 0, 0, 0, 0, 0, 0, 0, 0,..."
...,...,...
2995,"[921, 138, 36, 1689, 156, 248, 179, 645, 1492,...","[6554, 156, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
2996,"[523, 1922, 219, 36, 1819, 10212, 34, 36, 317,...","[1922, 219, 523, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2997,"[24044, 36, 214, 523, 987, 170, 539, 1465, 103...","[2271, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2998,"[1465, 1916, 4104, 3710, 539, 2432, 58, 899, 5...","[3535, 539, 2082, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [32]:
sub_prob = []
subpredict = []
with torch.set_grad_enabled(False):
    for row in subdf.iterrows():
        logits, probas = model(torch.unsqueeze(torch.from_numpy(row[1]['reviewTexttokenized']), 0),
                               torch.unsqueeze(torch.from_numpy(row[1]['summarytokenized']),0))
        sub_prob.append(probas)
        predict_levels = probas > 0.5
        predicted_labels = torch.sum(predict_levels, dim=1)
        lst = [str(int(i)+1) for i in predicted_labels]
        subpredict.extend(lst)

In [33]:
outputdf = pd.DataFrame(data=subpredict)
outputdf.index.names = (['id'])
outputdf.to_csv('submission_glove_Bilstm_cnn_ordinal_deep.csv', header = ['prediction'])