In [None]:
import pandas as pd
import numpy as np
print("the code is running")
# from Package.Models import *
import torch
import pickle
from torch import nn
from torch.nn.modules.padding import ConstantPad1d,ReflectionPad1d
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn.model_selection import train_test_split
from numba import jit
from matplotlib import pyplot as plt
from torch.autograd import Variable
from sklearn.utils import shuffle
from time import time
from textblob import TextBlob as tb
from nltk.tokenize import TweetTokenizer
import re
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
tknzr = TweetTokenizer()
print("import statements finished")

DEBUG  = True
torch.manual_seed(2)

def log(*argv):
    if DEBUG == True:
        try:
            print(argv)
        except:
            print("Error in printing")
            
def preprocess(t):    
    t = t.encode("ASCII","ignore").decode("utf-8")
    t = " ".join(tknzr.tokenize(t))
    t = str(t).lower()
    t = re.sub(r'["\'|?"]','',t)
    t = re.sub(r'[\-]',' ',t)
    return t

def get_batches(x,y = 0,chunks = 400):
    l = len(x)
    cnt = int(np.round(int(l)/int(chunks)))
    remain = l%chunks
    rmt = remain/cnt
    x_list = [x[each*cnt:(each*cnt)+cnt] for each in range(chunks+int(rmt)+2)]
    try:
        if y == 0:
            return  x_list

    except:
        y_list = [y[each*cnt:(each*cnt)+cnt] for each in range(chunks+int(rmt)+2)]
        return x_list,y_list        

@jit
def numba_mean(loss_list):
    return np.array(loss_list).mean()

def load_glove_model(gloveFile):
    import numpy as np
    print ("Loading Glove Model")
    f = open(gloveFile,'r',encoding="utf8")
    print("file read")
    model = {}
    count = 0
    for line in f:
        splitLine = line.split()
        word = splitLine[0]
        try:
            embedding = np.array([float(val) for val in splitLine[1:]])
            model[word] = embedding
        except:
            count = count+1
            pass
    print(count)
    print ("Done.",len(model)," words loaded!")
    return model

## loading the glove vectors
# embedding_loc = "../input/embeddings/glove.840B.300d/glove.840B.300d.txt"
embedding_loc = "../input/embeddings/wiki-news-300d-1M/wiki-news-300d-1M.vec"
try:
    glv
except:
    glv = load_glove_model(embedding_loc)
glv[""] = np.zeros((300))
print("trying to load vectors")    
print(len(glv))


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
dtype = torch.FloatTensor
torch.set_default_tensor_type('torch.FloatTensor')

epoch = 1
lr = 0.0006
checkpoint = 1000
test_checkpoint = 10
threshold = 0.5
batches = 300

## Model definition
class encoder_rnn(nn.Module):
    def __init__(self,input_dim, hidden_dim, layer_dim, output_dim):
        super(encoder_rnn,self).__init__()
        self.rnn = nn.GRU(input_dim, hidden_dim, layer_dim, batch_first=True,bidirectional = True)
        self.layer_dim = layer_dim
        self.hidden_dim = hidden_dim
        self.fc  = nn.Sequential(
                  nn.Linear(hidden_dim*2, hidden_dim*6),
                   nn.BatchNorm1d(num_features = (hidden_dim*6)),
                  nn.ReLU(),
#                 nn.Linear(hidden_dim*6, hidden_dim*4),
#                    nn.BatchNorm1d(num_features = (hidden_dim*4)),
#                   nn.ReLU(),
                  nn.Linear(hidden_dim*6,output_dim),
                  nn.Sigmoid()
                  )
        self.relu = nn.ReLU()
        self.bn1 = nn.BatchNorm1d(num_features = hidden_dim*2)

    def forward(self,x):
        batch_output = torch.zeros(1,self.hidden_dim*2)
        for each in range(len(x)):
            row = x[each]
            for e in range(len(row)):
                X =  (torch.from_numpy(row[e])[None,None,:]).type(dtype)   
                if e == 0:
    #                 h0 = (torch.randn(self.layer_dim*2, 1, self.hidden_dim).type(dtype),torch.randn(self.layer_dim*2, 1, self.hidden_dim).type(dtype))
                    h0 = torch.randn(self.layer_dim*2, 1, self.hidden_dim).type(dtype)
                    output,hidden = self.rnn(X,h0)
                else:
                    output,hidden = self.rnn(X,hidden)
            output = (output).view(-1)
            output = output[None,:]
            batch_output = torch.cat((batch_output,output),0)

        batch_output = (self.bn1(batch_output[1:]))
        final = self.fc(batch_output) 
        return final
    
def init_weights(m):
    if type(m) == nn.Linear:
        print("Xavier applied")
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)
    
## initiate a model   
model  = encoder_rnn(301,20,1,1)
model.apply(init_weights)

## loading the training data
# q = pd.read_csv("../input/quora-insincere-questions-classification/train.csv",index_col = None,encoding = "iso-8859-1")
q = pd.read_csv("../input/train.csv",index_col = None,encoding = "iso-8859-1")
# q = q[:50000]
print("Considered ....",  str(q.shape))
sin = q[q["target"] == 0]
insin = q[q["target"] == 1]
sin = sin.sample(frac = 0.10,random_state = 42)
q = pd.concat([sin,insin])
q = shuffle(q).reset_index(drop = True)
log("Total rows ",q.shape)

def get_vectors(text,glv,pad = 250,get_pos = False):
    import nltk
    import numpy as np
    glv_text = " ".join([ each for each in text.split(" ") if each in glv]) 
    all_vectors = [glv[each] for each in glv_text.split(" ")]
    all_vectors = [np.append(all_vectors[i],(tb(each).sentiment[0])) for i,each in enumerate(glv_text.split(" "))]
    temp = np.zeros((301))
    if pad != None:
        all_vectors = all_vectors[:pad]
        if len(all_vectors) <pad:
            temp_range = pad-len(all_vectors)
            temp_list = temp.tolist()
            [all_vectors.append(temp_list)for each in range(temp_range)]
    if len(all_vectors)== 0:
        log("The words in this text has no vectors")
        log(text)
        all_vectors.append(temp)
    all_vectors = np.array(all_vectors)
    if get_pos == True:
        pos_vector = get_pos_tag(text,glv)
        if len(all_vectors)!= len(pos_vector):
            print("Pos Error detected")
        all_vectors = [np.concatenate((each,pos_vector[1])) for i,each in enumerate(all_vectors) if len(all_vectors)== len(pos_vector)]
    return all_vectors

log(q.groupby("target")["qid"].count())
log("Considered rows ",q.shape)
q["question_text"] = [ preprocess(each) for each in  q["question_text"]]
log("preprocessing words complete")
q["x"]  = q["question_text"]
q["y"] = q["target"]

## test train split
x,xt,y,yt = train_test_split(q['x'],q['y'],test_size=0.005, random_state=42)
x = x.reset_index(drop = True)
xt = xt.reset_index(drop = True)
y = y.reset_index(drop = True)
yt =yt.reset_index(drop = True)

## readying the vectors for the test data
xt = [get_vectors(each,glv,pad= None) for each in xt]

log("Vectorization complete")

## obtaining batches
x_batches,y_batches = get_batches(x,y,chunks = batches)
x_batches = [each.reset_index(drop = True) for each in x_batches if len(each)!=0]
y_batches = [each.reset_index(drop = True) for each in y_batches if len(each)!=0]

log("splitting the batches is complete")  
log("The shape of each batch is ",x_batches[0].shape)      
log("The shape of test data is ",len(xt))      

def feed(x,y,backprop = False,epoch=1,threshold = 0.5):
    t1 = time()
    loss_list = []
    X = x
    Y = (torch.from_numpy(np.array(y))).type(dtype)
    pred = model.forward(X)
    loss =cross(pred,Y).type(dtype)
    if backprop == True:
        loss.backward()
        optimizer.step()
    loss_list.append(loss.item())
    y_pred = pred.detach().numpy()
    y_pred[y_pred >= threshold] = 1
    y_pred[y_pred < threshold] = 0
    y_pred = y_pred.flatten().tolist()
    y_true = y.tolist()
#     print(precision_recall_fscore_support(y_true = y_true,y_pred = y_pred ,average = "micro" ))
    print(classification_report(y_true = y_true,y_pred = y_pred  ))
    log("T:",str(time()-t1))
    return loss_list


optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer.zero_grad()
cross = nn.BCELoss()

log("Model training starts")

for each in range(epoch):
    epoch_cost = []
    for j,e in enumerate(x_batches):
        x, y = x_batches[j],y_batches[j]
        x = [get_vectors(each,glv,pad=None) for each in x]
        train_cost = numba_mean(feed(x,y,backprop = True,epoch = each,threshold = threshold))  
        epoch_cost.append(train_cost)
        
        log("Epoch num - ",each," and batch num ",j)
        log(train_cost)
        if j%test_checkpoint == 0:
            log("TEST COST")
            test_cost = numba_mean(feed(xt,yt,backprop = False,epoch = each,threshold = threshold))
            log(test_cost)        
        log("================================================")
        if j == checkpoint:
            break
    log("EPOCH ",str(np.mean(epoch_cost)))
#     break
print("completed")


Using device: cpu
Xavier applied
Xavier applied




Considered .... (1306122, 3)
('Total rows ', (203341, 3))
(target
0    122531
1     80810
Name: qid, dtype: int64,)
('Considered rows ', (203341, 3))
('preprocessing words complete',)
('Vectorization complete',)
('splitting the batches is complete',)
('The shape of each batch is ', (674,))
('The shape of test data is ', 1017)
('Model training starts',)


  "Please ensure they have the same size.".format(target.size(), input.size()))


              precision    recall  f1-score   support

           0       0.62      0.69      0.66       413
           1       0.41      0.34      0.38       261

   micro avg       0.56      0.56      0.56       674
   macro avg       0.52      0.52      0.52       674
weighted avg       0.54      0.56      0.55       674

('T:', '6.761310577392578')
('Epoch num - ', 0, ' and batch num ', 0)
(0.7044059038162231,)
('TEST COST',)


  "Please ensure they have the same size.".format(target.size(), input.size()))


              precision    recall  f1-score   support

           0       0.62      0.72      0.67       625
           1       0.39      0.29      0.33       392

   micro avg       0.55      0.55      0.55      1017
   macro avg       0.51      0.51      0.50      1017
weighted avg       0.53      0.55      0.54      1017

('T:', '4.110818862915039')
(0.6914972066879272,)
              precision    recall  f1-score   support

           0       0.58      0.70      0.63       391
           1       0.41      0.29      0.34       283

   micro avg       0.53      0.53      0.53       674
   macro avg       0.50      0.50      0.49       674
weighted avg       0.51      0.53      0.51       674

('T:', '6.926717758178711')
('Epoch num - ', 0, ' and batch num ', 1)
(0.7069763541221619,)
              precision    recall  f1-score   support

           0       0.60      0.74      0.67       411
           1       0.37      0.24      0.29       263

   micro avg       0.55      0.55      0

              precision    recall  f1-score   support

           0       0.68      0.90      0.77       402
           1       0.71      0.38      0.49       272

   micro avg       0.69      0.69      0.69       674
   macro avg       0.70      0.64      0.63       674
weighted avg       0.69      0.69      0.66       674

('T:', '6.756030797958374')
('Epoch num - ', 0, ' and batch num ', 17)
(0.5916371941566467,)
              precision    recall  f1-score   support

           0       0.63      0.91      0.74       368
           1       0.76      0.35      0.48       306

   micro avg       0.65      0.65      0.65       674
   macro avg       0.69      0.63      0.61       674
weighted avg       0.69      0.65      0.62       674

('T:', '8.130068063735962')
('Epoch num - ', 0, ' and batch num ', 18)
(0.6194534301757812,)
              precision    recall  f1-score   support

           0       0.66      0.90      0.76       397
           1       0.69      0.33      0.44       2

              precision    recall  f1-score   support

           0       0.75      0.87      0.81       413
           1       0.73      0.54      0.62       261

   micro avg       0.74      0.74      0.74       674
   macro avg       0.74      0.71      0.71       674
weighted avg       0.74      0.74      0.74       674

('T:', '7.048943758010864')
('Epoch num - ', 0, ' and batch num ', 33)
(0.5138651132583618,)
              precision    recall  f1-score   support

           0       0.73      0.84      0.78       402
           1       0.70      0.54      0.61       272

   micro avg       0.72      0.72      0.72       674
   macro avg       0.72      0.69      0.70       674
weighted avg       0.72      0.72      0.71       674

('T:', '7.621331453323364')
('Epoch num - ', 0, ' and batch num ', 34)
(0.5475116968154907,)
              precision    recall  f1-score   support

           0       0.74      0.88      0.80       391
           1       0.77      0.56      0.65       2

              precision    recall  f1-score   support

           0       0.82      0.80      0.81       398
           1       0.72      0.74      0.73       276

   micro avg       0.78      0.78      0.78       674
   macro avg       0.77      0.77      0.77       674
weighted avg       0.78      0.78      0.78       674

('T:', '6.723927736282349')
('Epoch num - ', 0, ' and batch num ', 50)
(0.48271429538726807,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.83      0.78      0.80       625
           1       0.68      0.74      0.71       392

   micro avg       0.76      0.76      0.76      1017
   macro avg       0.75      0.76      0.76      1017
weighted avg       0.77      0.76      0.77      1017

('T:', '4.056748390197754')
(0.47845399379730225,)
              precision    recall  f1-score   support

           0       0.82      0.77      0.79       413
           1       0.67      0.74      0.70       261

   micro avg       0.7

              precision    recall  f1-score   support

           0       0.86      0.81      0.84       382
           1       0.77      0.83      0.80       292

   micro avg       0.82      0.82      0.82       674
   macro avg       0.82      0.82      0.82       674
weighted avg       0.82      0.82      0.82       674

('T:', '6.981743335723877')
('Epoch num - ', 0, ' and batch num ', 66)
(0.44973400235176086,)
              precision    recall  f1-score   support

           0       0.91      0.74      0.82       429
           1       0.66      0.87      0.75       245

   micro avg       0.79      0.79      0.79       674
   macro avg       0.78      0.80      0.78       674
weighted avg       0.82      0.79      0.79       674

('T:', '6.620385646820068')
('Epoch num - ', 0, ' and batch num ', 67)
(0.42768269777297974,)
              precision    recall  f1-score   support

           0       0.84      0.77      0.81       399
           1       0.71      0.79      0.75      

              precision    recall  f1-score   support

           0       0.85      0.82      0.84       388
           1       0.77      0.80      0.79       286

   micro avg       0.82      0.82      0.82       674
   macro avg       0.81      0.81      0.81       674
weighted avg       0.82      0.82      0.82       674

('T:', '7.149710655212402')
('Epoch num - ', 0, ' and batch num ', 82)
(0.42036715149879456,)
              precision    recall  f1-score   support

           0       0.87      0.79      0.83       418
           1       0.70      0.81      0.75       256

   micro avg       0.80      0.80      0.80       674
   macro avg       0.79      0.80      0.79       674
weighted avg       0.81      0.80      0.80       674

('T:', '6.566035032272339')
('Epoch num - ', 0, ' and batch num ', 83)
(0.4589555263519287,)
              precision    recall  f1-score   support

           0       0.90      0.84      0.87       409
           1       0.77      0.86      0.81       

              precision    recall  f1-score   support

           0       0.82      0.90      0.85       400
           1       0.82      0.71      0.76       274

   micro avg       0.82      0.82      0.82       674
   macro avg       0.82      0.80      0.81       674
weighted avg       0.82      0.82      0.82       674

('T:', '6.64970850944519')
('Epoch num - ', 0, ' and batch num ', 99)
(0.4173722565174103,)
              precision    recall  f1-score   support

           0       0.86      0.88      0.87       406
           1       0.81      0.79      0.80       268

   micro avg       0.84      0.84      0.84       674
   macro avg       0.84      0.83      0.83       674
weighted avg       0.84      0.84      0.84       674

('T:', '6.522793292999268')
('Epoch num - ', 0, ' and batch num ', 100)
(0.4217158555984497,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.84      0.87      0.85       625
           1       0.77      0.73   

              precision    recall  f1-score   support

           0       0.86      0.91      0.88       409
           1       0.85      0.76      0.80       265

   micro avg       0.85      0.85      0.85       674
   macro avg       0.85      0.84      0.84       674
weighted avg       0.85      0.85      0.85       674

('T:', '6.702141761779785')
('Epoch num - ', 0, ' and batch num ', 115)
(0.36076807975769043,)
              precision    recall  f1-score   support

           0       0.83      0.91      0.86       393
           1       0.85      0.73      0.79       281

   micro avg       0.83      0.83      0.83       674
   macro avg       0.84      0.82      0.83       674
weighted avg       0.84      0.83      0.83       674

('T:', '7.157123804092407')
('Epoch num - ', 0, ' and batch num ', 116)
(0.3949546813964844,)
              precision    recall  f1-score   support

           0       0.86      0.89      0.87       413
           1       0.82      0.77      0.79     

              precision    recall  f1-score   support

           0       0.87      0.79      0.82       408
           1       0.71      0.81      0.76       266

   micro avg       0.80      0.80      0.80       674
   macro avg       0.79      0.80      0.79       674
weighted avg       0.81      0.80      0.80       674

('T:', '6.725123167037964')
('Epoch num - ', 0, ' and batch num ', 131)
(0.40223756432533264,)
              precision    recall  f1-score   support

           0       0.89      0.75      0.82       411
           1       0.69      0.86      0.76       263

   micro avg       0.79      0.79      0.79       674
   macro avg       0.79      0.80      0.79       674
weighted avg       0.81      0.79      0.80       674

('T:', '7.067915201187134')
('Epoch num - ', 0, ' and batch num ', 132)
(0.40641242265701294,)
              precision    recall  f1-score   support

           0       0.89      0.83      0.86       382
           1       0.79      0.86      0.83    

              precision    recall  f1-score   support

           0       0.89      0.75      0.81       408
           1       0.69      0.86      0.77       266

   micro avg       0.79      0.79      0.79       674
   macro avg       0.79      0.81      0.79       674
weighted avg       0.81      0.79      0.80       674

('T:', '6.894518136978149')
('Epoch num - ', 0, ' and batch num ', 148)
(0.4250272810459137,)
              precision    recall  f1-score   support

           0       0.87      0.76      0.81       387
           1       0.72      0.84      0.78       287

   micro avg       0.79      0.79      0.79       674
   macro avg       0.79      0.80      0.79       674
weighted avg       0.80      0.79      0.79       674

('T:', '6.946535348892212')
('Epoch num - ', 0, ' and batch num ', 149)
(0.43637293577194214,)
              precision    recall  f1-score   support

           0       0.91      0.78      0.84       407
           1       0.72      0.88      0.79     

              precision    recall  f1-score   support

           0       0.89      0.82      0.85       397
           1       0.77      0.86      0.81       277

   micro avg       0.83      0.83      0.83       674
   macro avg       0.83      0.84      0.83       674
weighted avg       0.84      0.83      0.83       674

('T:', '6.8274312019348145')
('Epoch num - ', 0, ' and batch num ', 164)
(0.38621950149536133,)
              precision    recall  f1-score   support

           0       0.91      0.79      0.84       401
           1       0.74      0.89      0.81       273

   micro avg       0.83      0.83      0.83       674
   macro avg       0.83      0.84      0.83       674
weighted avg       0.84      0.83      0.83       674

('T:', '6.888763666152954')
('Epoch num - ', 0, ' and batch num ', 165)
(0.39377012848854065,)
              precision    recall  f1-score   support

           0       0.91      0.80      0.86       410
           1       0.74      0.88      0.81   

              precision    recall  f1-score   support

           0       0.90      0.84      0.87       625
           1       0.76      0.85      0.80       392

   micro avg       0.84      0.84      0.84      1017
   macro avg       0.83      0.84      0.83      1017
weighted avg       0.85      0.84      0.84      1017

('T:', '4.03213906288147')
(0.36608168482780457,)
              precision    recall  f1-score   support

           0       0.92      0.84      0.88       413
           1       0.78      0.88      0.83       261

   micro avg       0.86      0.86      0.86       674
   macro avg       0.85      0.86      0.85       674
weighted avg       0.86      0.86      0.86       674

('T:', '6.747009754180908')
('Epoch num - ', 0, ' and batch num ', 181)
(0.3505975008010864,)
              precision    recall  f1-score   support

           0       0.87      0.87      0.87       391
           1       0.82      0.82      0.82       283

   micro avg       0.85      0.85     

              precision    recall  f1-score   support

           0       0.87      0.85      0.86       421
           1       0.76      0.78      0.77       253

   micro avg       0.82      0.82      0.82       674
   macro avg       0.81      0.82      0.81       674
weighted avg       0.83      0.82      0.83       674

('T:', '6.7334864139556885')
('Epoch num - ', 0, ' and batch num ', 197)
(0.3929588794708252,)
              precision    recall  f1-score   support

           0       0.86      0.89      0.88       397
           1       0.84      0.79      0.81       277

   micro avg       0.85      0.85      0.85       674
   macro avg       0.85      0.84      0.85       674
weighted avg       0.85      0.85      0.85       674

('T:', '6.636523485183716')
('Epoch num - ', 0, ' and batch num ', 198)
(0.3593098819255829,)
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       398
           1       0.83      0.75      0.79     

              precision    recall  f1-score   support

           0       0.85      0.92      0.88       400
           1       0.87      0.76      0.81       274

   micro avg       0.86      0.86      0.86       674
   macro avg       0.86      0.84      0.85       674
weighted avg       0.86      0.86      0.85       674

('T:', '6.752737045288086')
('Epoch num - ', 0, ' and batch num ', 213)
(0.34523317217826843,)
              precision    recall  f1-score   support

           0       0.83      0.87      0.85       395
           1       0.80      0.74      0.77       279

   micro avg       0.82      0.82      0.82       674
   macro avg       0.81      0.81      0.81       674
weighted avg       0.82      0.82      0.82       674

('T:', '6.9214701652526855')
('Epoch num - ', 0, ' and batch num ', 214)
(0.40495625138282776,)
              precision    recall  f1-score   support

           0       0.85      0.89      0.87       412
           1       0.81      0.74      0.78   

              precision    recall  f1-score   support

           0       0.83      0.88      0.86       408
           1       0.80      0.73      0.77       266

   micro avg       0.82      0.82      0.82       674
   macro avg       0.82      0.81      0.81       674
weighted avg       0.82      0.82      0.82       674

('T:', '6.798839807510376')
('Epoch num - ', 0, ' and batch num ', 230)
(0.426872581243515,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.86      0.88      0.87       625
           1       0.80      0.78      0.79       392

   micro avg       0.84      0.84      0.84      1017
   macro avg       0.83      0.83      0.83      1017
weighted avg       0.84      0.84      0.84      1017

('T:', '4.096958637237549')
(0.3782593011856079,)
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       400
           1       0.83      0.76      0.79       274

   micro avg       0.84 

              precision    recall  f1-score   support

           0       0.90      0.87      0.88       413
           1       0.80      0.84      0.82       261

   micro avg       0.86      0.86      0.86       674
   macro avg       0.85      0.85      0.85       674
weighted avg       0.86      0.86      0.86       674

('T:', '6.703538417816162')
('Epoch num - ', 0, ' and batch num ', 246)
(0.369585782289505,)
              precision    recall  f1-score   support

           0       0.89      0.87      0.88       409
           1       0.81      0.84      0.82       265

   micro avg       0.86      0.86      0.86       674
   macro avg       0.85      0.86      0.85       674
weighted avg       0.86      0.86      0.86       674

('T:', '6.8790295124053955')
('Epoch num - ', 0, ' and batch num ', 247)
(0.34877339005470276,)
              precision    recall  f1-score   support

           0       0.85      0.89      0.87       380
           1       0.85      0.80      0.82     

              precision    recall  f1-score   support

           0       0.92      0.88      0.90       405
           1       0.83      0.88      0.85       269

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.88      0.88       674
weighted avg       0.88      0.88      0.88       674

('T:', '7.075596570968628')
('Epoch num - ', 0, ' and batch num ', 262)
(0.32741501927375793,)
              precision    recall  f1-score   support

           0       0.88      0.85      0.87       392
           1       0.80      0.84      0.82       282

   micro avg       0.85      0.85      0.85       674
   macro avg       0.84      0.85      0.84       674
weighted avg       0.85      0.85      0.85       674

('T:', '6.686232328414917')
('Epoch num - ', 0, ' and batch num ', 263)
(0.3977550268173218,)
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       397
           1       0.83      0.85      0.84     

              precision    recall  f1-score   support

           0       0.91      0.80      0.85       417
           1       0.73      0.87      0.79       257

   micro avg       0.82      0.82      0.82       674
   macro avg       0.82      0.83      0.82       674
weighted avg       0.84      0.82      0.83       674

('T:', '6.977371692657471')
('Epoch num - ', 0, ' and batch num ', 279)
(0.4024140238761902,)
              precision    recall  f1-score   support

           0       0.95      0.87      0.91       423
           1       0.80      0.92      0.86       251

   micro avg       0.89      0.89      0.89       674
   macro avg       0.88      0.89      0.88       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.869934320449829')
('Epoch num - ', 0, ' and batch num ', 280)
(0.3184530735015869,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.92      0.83      0.87       625
           1       0.76      0.89 

              precision    recall  f1-score   support

           0       0.92      0.84      0.88       426
           1       0.76      0.88      0.82       248

   micro avg       0.85      0.85      0.85       674
   macro avg       0.84      0.86      0.85       674
weighted avg       0.86      0.85      0.86       674

('T:', '6.906008005142212')
('Epoch num - ', 0, ' and batch num ', 295)
(0.337178498506546,)
              precision    recall  f1-score   support

           0       0.92      0.86      0.89       414
           1       0.80      0.88      0.84       260

   micro avg       0.87      0.87      0.87       674
   macro avg       0.86      0.87      0.87       674
weighted avg       0.88      0.87      0.87       674

('T:', '6.694069147109985')
('Epoch num - ', 0, ' and batch num ', 296)
(0.32957661151885986,)
              precision    recall  f1-score   support

           0       0.89      0.85      0.87       393
           1       0.80      0.85      0.83      

  "Please ensure they have the same size.".format(target.size(), input.size()))


              precision    recall  f1-score   support

           0       0.87      0.90      0.88        68
           1       0.87      0.84      0.85        56

   micro avg       0.87      0.87      0.87       124
   macro avg       0.87      0.87      0.87       124
weighted avg       0.87      0.87      0.87       124

('T:', '1.4411964416503906')
('Epoch num - ', 0, ' and batch num ', 300)
(0.3487355709075928,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.92      0.84      0.88       625
           1       0.78      0.89      0.83       392

   micro avg       0.86      0.86      0.86      1017
   macro avg       0.85      0.87      0.86      1017
weighted avg       0.87      0.86      0.86      1017

('T:', '4.062988758087158')
(0.3268741965293884,)
('EPOCH ', '0.4180014985938405')
              precision    recall  f1-score   support

           0       0.90      0.86      0.88       413
           1       0.80      0.85      0.82 

              precision    recall  f1-score   support

           0       0.87      0.86      0.86       393
           1       0.81      0.81      0.81       281

   micro avg       0.84      0.84      0.84       674
   macro avg       0.84      0.84      0.84       674
weighted avg       0.84      0.84      0.84       674

('T:', '6.84756875038147')
('Epoch num - ', 1, ' and batch num ', 14)
(0.3728407621383667,)
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       413
           1       0.84      0.84      0.84       261

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.729957342147827')
('Epoch num - ', 1, ' and batch num ', 15)
(0.30565983057022095,)
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       394
           1       0.83      0.86      0.85       2

              precision    recall  f1-score   support

           0       0.89      0.88      0.88       625
           1       0.81      0.82      0.82       392

   micro avg       0.86      0.86      0.86      1017
   macro avg       0.85      0.85      0.85      1017
weighted avg       0.86      0.86      0.86      1017

('T:', '4.035719394683838')
(0.32429927587509155,)
              precision    recall  f1-score   support

           0       0.89      0.91      0.90       402
           1       0.86      0.83      0.84       272

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.87      0.88      0.87       674

('T:', '6.6548051834106445')
('Epoch num - ', 1, ' and batch num ', 31)
(0.3120284676551819,)
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       387
           1       0.86      0.84      0.85       287

   micro avg       0.87      0.87    

              precision    recall  f1-score   support

           0       0.85      0.91      0.88       393
           1       0.87      0.78      0.82       281

   micro avg       0.86      0.86      0.86       674
   macro avg       0.86      0.84      0.85       674
weighted avg       0.86      0.86      0.85       674

('T:', '6.9273903369903564')
('Epoch num - ', 1, ' and batch num ', 47)
(0.36839553713798523,)
              precision    recall  f1-score   support

           0       0.86      0.90      0.88       416
           1       0.83      0.77      0.80       258

   micro avg       0.85      0.85      0.85       674
   macro avg       0.84      0.84      0.84       674
weighted avg       0.85      0.85      0.85       674

('T:', '6.6011528968811035')
('Epoch num - ', 1, ' and batch num ', 48)
(0.33999064564704895,)
              precision    recall  f1-score   support

           0       0.85      0.92      0.88       408
           1       0.86      0.75      0.80    

              precision    recall  f1-score   support

           0       0.86      0.92      0.89       389
           1       0.88      0.79      0.83       285

   micro avg       0.86      0.86      0.86       674
   macro avg       0.87      0.85      0.86       674
weighted avg       0.87      0.86      0.86       674

('T:', '7.075518846511841')
('Epoch num - ', 1, ' and batch num ', 63)
(0.34358590841293335,)
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       411
           1       0.83      0.80      0.82       263

   micro avg       0.86      0.86      0.86       674
   macro avg       0.85      0.85      0.85       674
weighted avg       0.86      0.86      0.86       674

('T:', '7.007785797119141')
('Epoch num - ', 1, ' and batch num ', 64)
(0.3375509977340698,)
              precision    recall  f1-score   support

           0       0.86      0.93      0.90       391
           1       0.90      0.80      0.84       

              precision    recall  f1-score   support

           0       0.88      0.91      0.89       401
           1       0.86      0.81      0.84       273

   micro avg       0.87      0.87      0.87       674
   macro avg       0.87      0.86      0.87       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.913106441497803')
('Epoch num - ', 1, ' and batch num ', 80)
(0.29941385984420776,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       625
           1       0.83      0.82      0.83       392

   micro avg       0.87      0.87      0.87      1017
   macro avg       0.86      0.86      0.86      1017
weighted avg       0.87      0.87      0.87      1017

('T:', '4.085699796676636')
(0.323589950799942,)
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       415
           1       0.84      0.81      0.83       259

   micro avg       0.87 

('Epoch num - ', 1, ' and batch num ', 95)
(0.33251529932022095,)
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       407
           1       0.82      0.84      0.83       267

   micro avg       0.86      0.86      0.86       674
   macro avg       0.86      0.86      0.86       674
weighted avg       0.86      0.86      0.86       674

('T:', '6.859920978546143')
('Epoch num - ', 1, ' and batch num ', 96)
(0.33365166187286377,)
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       405
           1       0.85      0.81      0.83       269

   micro avg       0.87      0.87      0.87       674
   macro avg       0.86      0.86      0.86       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.916929006576538')
('Epoch num - ', 1, ' and batch num ', 97)
(0.317070335149765,)
              precision    recall  f1-score   support

           0       0.90      0.89      

              precision    recall  f1-score   support

           0       0.90      0.87      0.88       425
           1       0.79      0.83      0.81       249

   micro avg       0.85      0.85      0.85       674
   macro avg       0.84      0.85      0.85       674
weighted avg       0.86      0.85      0.86       674

('T:', '6.586538076400757')
('Epoch num - ', 1, ' and batch num ', 112)
(0.34815502166748047,)
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       398
           1       0.84      0.84      0.84       276

   micro avg       0.87      0.87      0.87       674
   macro avg       0.87      0.86      0.86       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.930258274078369')
('Epoch num - ', 1, ' and batch num ', 113)
(0.29683297872543335,)
              precision    recall  f1-score   support

           0       0.90      0.86      0.88       407
           1       0.80      0.86      0.83    

              precision    recall  f1-score   support

           0       0.91      0.88      0.89       422
           1       0.81      0.86      0.83       252

   micro avg       0.87      0.87      0.87       674
   macro avg       0.86      0.87      0.86       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.685495853424072')
('Epoch num - ', 1, ' and batch num ', 129)
(0.299883097410202,)
              precision    recall  f1-score   support

           0       0.91      0.91      0.91       390
           1       0.87      0.88      0.88       284

   micro avg       0.89      0.89      0.89       674
   macro avg       0.89      0.89      0.89       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.608790397644043')
('Epoch num - ', 1, ' and batch num ', 130)
(0.30578863620758057,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.90      0.85      0.88       625
           1       0.78      0.85 

              precision    recall  f1-score   support

           0       0.93      0.89      0.91       403
           1       0.84      0.90      0.87       271

   micro avg       0.89      0.89      0.89       674
   macro avg       0.89      0.89      0.89       674
weighted avg       0.90      0.89      0.89       674

('T:', '6.768495321273804')
('Epoch num - ', 1, ' and batch num ', 145)
(0.25465500354766846,)
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       398
           1       0.83      0.87      0.85       276

   micro avg       0.87      0.87      0.87       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.848817348480225')
('Epoch num - ', 1, ' and batch num ', 146)
(0.3173140585422516,)
              precision    recall  f1-score   support

           0       0.93      0.87      0.90       419
           1       0.80      0.89      0.84     

              precision    recall  f1-score   support

           0       0.91      0.87      0.89       418
           1       0.80      0.86      0.83       256

   micro avg       0.86      0.86      0.86       674
   macro avg       0.85      0.86      0.86       674
weighted avg       0.87      0.86      0.87       674

('T:', '6.441942930221558')
('Epoch num - ', 1, ' and batch num ', 161)
(0.3398624360561371,)
              precision    recall  f1-score   support

           0       0.90      0.87      0.88       386
           1       0.83      0.87      0.85       288

   micro avg       0.87      0.87      0.87       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.998349666595459')
('Epoch num - ', 1, ' and batch num ', 162)
(0.3371695280075073,)
              precision    recall  f1-score   support

           0       0.92      0.87      0.89       410
           1       0.82      0.88      0.85      

              precision    recall  f1-score   support

           0       0.91      0.90      0.90       404
           1       0.85      0.87      0.86       270

   micro avg       0.89      0.89      0.89       674
   macro avg       0.88      0.88      0.88       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.944304943084717')
('Epoch num - ', 1, ' and batch num ', 178)
(0.2917918264865875,)
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       388
           1       0.84      0.87      0.86       286

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.961806058883667')
('Epoch num - ', 1, ' and batch num ', 179)
(0.30209997296333313,)
              precision    recall  f1-score   support

           0       0.92      0.87      0.89       419
           1       0.80      0.87      0.84     

              precision    recall  f1-score   support

           0       0.93      0.87      0.90       422
           1       0.80      0.89      0.84       252

   micro avg       0.88      0.88      0.88       674
   macro avg       0.86      0.88      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.567333698272705')
('Epoch num - ', 1, ' and batch num ', 194)
(0.306037575006485,)
              precision    recall  f1-score   support

           0       0.93      0.86      0.89       426
           1       0.79      0.89      0.83       248

   micro avg       0.87      0.87      0.87       674
   macro avg       0.86      0.87      0.86       674
weighted avg       0.88      0.87      0.87       674

('T:', '6.867645740509033')
('Epoch num - ', 1, ' and batch num ', 195)
(0.316712349653244,)
              precision    recall  f1-score   support

           0       0.93      0.88      0.91       416
           1       0.83      0.90      0.86       2

              precision    recall  f1-score   support

           0       0.91      0.88      0.90       625
           1       0.82      0.87      0.84       392

   micro avg       0.88      0.88      0.88      1017
   macro avg       0.87      0.87      0.87      1017
weighted avg       0.88      0.88      0.88      1017

('T:', '4.062181234359741')
(0.2911030948162079,)
              precision    recall  f1-score   support

           0       0.90      0.89      0.90       404
           1       0.84      0.86      0.85       270

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.818427324295044')
('Epoch num - ', 1, ' and batch num ', 211)
(0.3167267143726349,)
              precision    recall  f1-score   support

           0       0.90      0.86      0.88       399
           1       0.81      0.87      0.84       275

   micro avg       0.86      0.86     

              precision    recall  f1-score   support

           0       0.88      0.91      0.90       389
           1       0.88      0.84      0.85       285

   micro avg       0.88      0.88      0.88       674
   macro avg       0.88      0.87      0.88       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.841558218002319')
('Epoch num - ', 1, ' and batch num ', 227)
(0.3100900948047638,)
              precision    recall  f1-score   support

           0       0.87      0.89      0.88       400
           1       0.83      0.80      0.82       274

   micro avg       0.85      0.85      0.85       674
   macro avg       0.85      0.85      0.85       674
weighted avg       0.85      0.85      0.85       674

('T:', '6.661730527877808')
('Epoch num - ', 1, ' and batch num ', 228)
(0.35128656029701233,)
              precision    recall  f1-score   support

           0       0.90      0.89      0.89       417
           1       0.83      0.83      0.83     

              precision    recall  f1-score   support

           0       0.89      0.91      0.90       410
           1       0.86      0.83      0.84       264

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.87      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.932783365249634')
('Epoch num - ', 1, ' and batch num ', 243)
(0.32039862871170044,)
              precision    recall  f1-score   support

           0       0.88      0.91      0.89       410
           1       0.85      0.80      0.83       264

   micro avg       0.87      0.87      0.87       674
   macro avg       0.86      0.86      0.86       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.89107084274292')
('Epoch num - ', 1, ' and batch num ', 244)
(0.31217160820961,)
              precision    recall  f1-score   support

           0       0.85      0.92      0.88       377
           1       0.88      0.79      0.83       2

              precision    recall  f1-score   support

           0       0.88      0.92      0.90       410
           1       0.87      0.80      0.83       264

   micro avg       0.87      0.87      0.87       674
   macro avg       0.87      0.86      0.87       674
weighted avg       0.87      0.87      0.87       674

('T:', '6.818358659744263')
('Epoch num - ', 1, ' and batch num ', 260)
(0.3325806260108948,)
('TEST COST',)
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       625
           1       0.84      0.81      0.83       392

   micro avg       0.87      0.87      0.87      1017
   macro avg       0.86      0.86      0.86      1017
weighted avg       0.87      0.87      0.87      1017

('T:', '4.078894853591919')
(0.30061522126197815,)
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       395
           1       0.92      0.83      0.87       279

   micro avg       0.9

              precision    recall  f1-score   support

           0       0.90      0.92      0.91       410
           1       0.87      0.84      0.85       264

   micro avg       0.89      0.89      0.89       674
   macro avg       0.88      0.88      0.88       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.820111513137817')
('Epoch num - ', 1, ' and batch num ', 276)
(0.30492103099823,)
              precision    recall  f1-score   support

           0       0.91      0.91      0.91       412
           1       0.86      0.86      0.86       262

   micro avg       0.89      0.89      0.89       674
   macro avg       0.89      0.89      0.89       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.7051331996917725')
('Epoch num - ', 1, ' and batch num ', 277)
(0.2914365231990814,)
              precision    recall  f1-score   support

           0       0.88      0.96      0.92       398
           1       0.93      0.80      0.86       

              precision    recall  f1-score   support

           0       0.90      0.91      0.91       405
           1       0.87      0.85      0.86       269

   micro avg       0.89      0.89      0.89       674
   macro avg       0.88      0.88      0.88       674
weighted avg       0.89      0.89      0.89       674

('T:', '6.7887914180755615')
('Epoch num - ', 1, ' and batch num ', 292)
(0.29287460446357727,)
              precision    recall  f1-score   support

           0       0.91      0.89      0.90       410
           1       0.83      0.87      0.85       264

   micro avg       0.88      0.88      0.88       674
   macro avg       0.87      0.88      0.87       674
weighted avg       0.88      0.88      0.88       674

('T:', '6.8743860721588135')
('Epoch num - ', 1, ' and batch num ', 293)
(0.31362712383270264,)
              precision    recall  f1-score   support

           0       0.87      0.91      0.89       382
           1       0.88      0.82      0.85  

In [None]:
# test = pd.read_csv("../input/quora-insincere-questions-classification/test.csv",index_col = None)
test = pd.read_csv("../input/test.csv",index_col = None)
log("Test Shape",test.shape)
test["question_text"] = [preprocess(each) for each in test["question_text"]]
log("preprocessing words complete")

test["x"]  = test["question_text"]

total_result = []
tx_batches = get_batches(x = test["x"],chunks = batches)
tx_batches = [each for each in tx_batches if each.shape[0] != 0]
for i,e in enumerate(tx_batches):
    xv = [get_vectors(each,glv,pad = None) for each in e]
    pred =  model.forward(xv)
    result =pred[:,0].detach().numpy()
    [total_result.append(r) for r in result]
    print(i)
test["result"] = total_result
test["prediction"] = [ 1 if each >= threshold else 0 for each in test.result]
test[["qid","prediction"]].to_csv("submission.csv",index = None)
print("submitted")

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
