# If Files present start from here

In [1]:
import pandas as pd
import os
import pickle
import numpy as np

data_dir = './data/pytorch'
with open(os.path.join(data_dir, 'word_dict_amazon.pkl'), "rb") as f:
    word_dict = pickle.load(f)

In [2]:
import pandas as pd
import os
train = pd.read_csv(os.path.join(data_dir, 'train_amazon.csv'), header=None, names=None)
test_sample = pd.read_csv(os.path.join(data_dir, 'test_amazon.csv'), header=None, names=None)
print(train.shape, test_sample.shape)

(83000, 502) (21975, 502)


In [3]:
from sklearn.model_selection import train_test_split
test, val = train_test_split(test_sample, test_size=0.5)
train.shape, test.shape, val.shape


((83000, 502), (10987, 502), (10988, 502))

In [4]:
#drop rows
test.drop(test.tail(37).index,inplace = True)
val.drop(val.tail(38).index,inplace = True)
test.shape, val.shape

((10950, 502), (10950, 502))

In [5]:
import torch
import torch.utils.data

# Turn the input pandas dataframe into tensors
train_y = torch.from_numpy(train[[0]].values).float()
train_X = torch.from_numpy(train.drop([0, 1], axis=1).values).long()

# Build the dataset
train_ds = torch.utils.data.TensorDataset(train_X, train_y)
# Build the dataloader
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=50)

######val data
# Turn the input pandas dataframe into tensors
val_y = torch.from_numpy(val[[0]].values).float()
val_X = torch.from_numpy(val.drop([0, 1], axis=1).values).long()

# Build the dataset
val_ds = torch.utils.data.TensorDataset(val_X, val_y)
# Build the dataloader
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=50)


#### Test data
# Turn the input pandas dataframe into tensors
test_y = torch.from_numpy(test[[0]].values).float()
test_X = torch.from_numpy(test.drop([0, 1], axis=1).values).long()

# Build the dataset
test_ds = torch.utils.data.TensorDataset(test_X, test_y)
# Build the dataloader
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=50)
print(test_y.shape)

torch.Size([10950, 1])


In [6]:
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import numpy as np

class QRNNLayer(nn.Module):
    def __init__(self,batch_size,input_size,n_filters,kernel_size,embed_size,device,dropout):
        super(QRNNLayer,self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.n_filters = n_filters
        self.kernel_size = kernel_size
        self.embed_size = embed_size
        self.dropout = torch.nn.Dropout(dropout)
        self.device = device
        self.conv1 = torch.nn.Conv1d(self.input_size,self.n_filters,self.kernel_size)
        self.conv2 = torch.nn.Conv1d(self.input_size,self.n_filters,self.kernel_size)
        self.conv3 = torch.nn.Conv1d(self.input_size,self.n_filters,self.kernel_size)
    
    def forward(self,masked_input, h, c):
        Z,F,O = self.masked_conv(masked_input)
        h, c = self.pool(c,Z,F,O)
        masked_input = h
        return masked_input,h,c
    
    def masked_conv(self,x):
        pad = torch.zeros([self.batch_size,1,self.input_size],device=self.device)
        x = torch.cat([pad,x],1).permute(0,2,1)
        Z = torch.tanh((self.conv1(x)))
        F = torch.sigmoid((self.conv2(x)))
        O = torch.sigmoid((self.conv3(x)))
        one_mask = torch.ones_like(F,device=self.device) - F
        F = 1 - self.dropout(one_mask)
        return Z.permute(0,2,1), F.permute(0,2,1), O.permute(0,2,1)
    
    def pool(self, prev_c,Z,F,O):
        c = torch.mul(F,prev_c) + torch.mul(1-F,Z)
        h = torch.mul(O,c)
        return h,c

class QRNN(nn.Module):
    def __init__(self,vocab_size,embed_size,n_filters,kernel_size,batch_size,seq_len,layers,device,dropout):
        super(QRNN,self).__init__()
        self.embed_size = embed_size
        self.n_filters = n_filters
        self.kernel_size = kernel_size
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.num_layer = layers
        self.device = device
        self.embedding = torch.nn.Embedding(vocab_size, embed_size)
        self.dense = torch.nn.Linear(self.seq_len*self.n_filters,1)
        self.QRNN_layers = torch.nn.ModuleList([QRNNLayer(self.batch_size,embed_size if l==0 else n_filters,
                                                         self.n_filters,self.kernel_size,self.embed_size,self.device,
                                                         dropout,) for l in range(self.num_layer)])
        
        
    def forward(self, x, target):
        x = self.embedding(x)
        h = torch.zeros([self.batch_size,self.seq_len,self.n_filters],device=self.device)
        c = torch.zeros_like(h,device=self.device)
        
        masked_input = x
        for l,layer in enumerate(self.QRNN_layers):
            masked_input,h,c = layer(masked_input,h,c)
        dense_input = h.reshape([self.batch_size,-1])
        logits = self.dense(dense_input)
        prediction = torch.sigmoid(logits)
        target = target.view([-1,1])
        correct_pred = torch.eq(torch.round(prediction).type(target.type()),target)
        accuracy = torch.sum(correct_pred)
        return prediction, accuracy

In [7]:
# write to file.
filename = "QRNN_amazon.csv"
def write_to_csv(epochs, train_loss, train_acc, val_loss, val_acc, time_train):
    epoch = [i for i in range(epochs)]
    df_metrics = pd.DataFrame(list(zip(epoch, train_loss, train_acc, val_loss, val_acc, time_train)), columns =['Epoch', 'train_loss', 'train_acc', 'val_loss', 'val_acc', 'train_time'])
    df_metrics.to_csv(filename, index=False)
    
def append_to_csv(epochs, accuracy):
    acc = [accuracy for i in range(epochs)]
    df_csv = pd.read_csv(filename)
    df_csv['Test_Accuracy']  = acc
    df_csv.to_csv(filename, index=False)

In [8]:
import time
epochs = 10
n_filters = 64
kernel_size = 2
layers= 2
learning_rate = 0.001
print(len(word_dict))
vocab_size = 10000
print(vocab_size)
embed_dims = 32
seq_len = 500
dropout = 0.3
batch_size=50
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = QRNN(vocab_size, embed_dims, n_filters, kernel_size, batch_size, seq_len, layers, device, dropout).to(device)
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

counter = 0
QRNN_acc = []
QRNN_valacc = []
train_loss_epoch = []
train_acc_epoch = []
val_loss_epoch = []
val_acc_epoch = []
time_epoch = []
model.train()
for e in range(epochs):
    start_time = time.time()
    train_loss = []
    train_acc = []
    for inputs, labels in train_dl:
        inputs, labels = inputs.cuda(), labels.cuda()
        model.zero_grad()
        logits, accuracy = model(inputs,labels)
        loss = criterion(logits,labels.float())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
        train_loss.append(loss.item())
        train_acc.append(accuracy.item()*100/batch_size)
        if counter%400==0:
            print("Epoch: {}/{}".format(e,epochs),
                         "\tIteration: {}".format(counter),
                         "\tTrain Loss: {:.3f}".format(loss.item()),
                         "\tTrain Accuracy: {:.2f}".format(accuracy.item()*100/batch_size))
            QRNN_acc.append(accuracy.item()*100/batch_size)
        counter += 1
    train_loss_epoch.append(np.round(np.mean(train_loss), 3))
    train_acc_epoch.append(np.round(np.mean(train_acc), 3))
    print("\tTrain Loss: {:.3f}".format(np.mean(train_loss)), "\tTrain Acc: {:.3f}".format(np.mean(train_acc)))

    with torch.no_grad():
        model.eval()
        val_acc = []
        val_loss = []
        for inputs, labels in val_dl:
            inputs_val, labels_val = inputs.cuda(), labels.cuda()
            logits_val,accuracy_val = model(inputs_val,labels_val)
            loss_val = criterion(logits_val,labels_val.float())
            batch_size = labels.size(0)
            val_acc.append(accuracy_val.item()*100/batch_size)
            val_loss.append(loss_val.item())
        val_loss_epoch.append(np.round(np.mean(val_loss), 3))
        val_acc_epoch.append(np.round(np.mean(val_acc), 3))
        print("Val Loss: {:.3f}".format(np.mean(val_loss)), "\tVal Acc: {:.3f}".format(np.mean(val_acc)))
        QRNN_valacc.append(np.mean(val_acc))
        model.train()
    
    end_time = time.time()-start_time
    print("Time to train epoch: {0} s".format(end_time))
    time_epoch.append(np.round(end_time, 3))
    
write_to_csv(epochs, train_loss_epoch, train_acc_epoch, val_loss_epoch, val_acc_epoch, time_epoch)

with torch.no_grad():
    model.eval()
    test_acc = []
    test_loss = []
    for inputs, labels in test_dl:
        input_test, labels_test = inputs.cuda(), labels.cuda()
        logits_test, accuracy_test = model(input_test, labels_test)
        loss_test = criterion(logits_test, labels_test.float())
        batch_size = labels.size(0)
        test_acc.append(accuracy_test.item()*100/batch_size)
        test_loss.append(loss_test.item())
    print("Test Loss: {:.3f}".format(np.mean(test_loss)), "\tTest Acc: {:.3f}".format(np.mean(test_acc)))
    append_to_csv(epochs, np.round(np.mean(test_acc),3))

9998
10000
Epoch: 0/10 	Iteration: 0 	Train Loss: 0.704 	Train Accuracy: 34.00
Epoch: 0/10 	Iteration: 400 	Train Loss: 0.343 	Train Accuracy: 86.00
Epoch: 0/10 	Iteration: 800 	Train Loss: 0.348 	Train Accuracy: 86.00
Epoch: 0/10 	Iteration: 1200 	Train Loss: 0.275 	Train Accuracy: 90.00
Epoch: 0/10 	Iteration: 1600 	Train Loss: 0.321 	Train Accuracy: 94.00
	Train Loss: 0.380 	Train Acc: 83.383
Val Loss: 0.301 	Val Acc: 87.288
Time to train epoch: 13.529175281524658 s
Epoch: 1/10 	Iteration: 2000 	Train Loss: 0.252 	Train Accuracy: 86.00
Epoch: 1/10 	Iteration: 2400 	Train Loss: 0.195 	Train Accuracy: 98.00
Epoch: 1/10 	Iteration: 2800 	Train Loss: 0.338 	Train Accuracy: 88.00
Epoch: 1/10 	Iteration: 3200 	Train Loss: 0.353 	Train Accuracy: 82.00
	Train Loss: 0.279 	Train Acc: 88.353
Val Loss: 0.283 	Val Acc: 88.064
Time to train epoch: 11.216967821121216 s
Epoch: 2/10 	Iteration: 3600 	Train Loss: 0.154 	Train Accuracy: 94.00
Epoch: 2/10 	Iteration: 4000 	Train Loss: 0.216 	Train Acc