In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import warnings

warnings.filterwarnings("ignore")

#### Loading the dataset

In [2]:
with open('../input/train-restaurant/embedding_matrix_laptops_glove300d.npy', 'rb') as f:
    embedding_matrix = np.load(f)

In [3]:
print(embedding_matrix.shape)

In [4]:
with open('../input/train-restaurant/train_laptops.json', 'rb') as f:
    data = pickle.load(f)

In [5]:
# valid
with open('../input/train-restaurant/valid_laptops.json', 'rb') as f:
    data_valid = pickle.load(f)

In [6]:
# test
with open('../input/train-restaurant/test_laptops.json', 'rb') as f:
    data_test = pickle.load(f)

In [7]:
x_train = []
aspect = []
y_train = []
original_sent_per_review = []
for i in data.keys():
        sentence_level_embed = []
        review_aspect = []
        sentence_level_polar = []
        for j in range(len(data[i][0])):
            sentence_level_embed.append(data[i][0][j][0])
            review_aspect.append((embedding_matrix[data[i][0][j][1]] + embedding_matrix[data[i][0][j][2]])/2)
            sentence_level_polar.append(data[i][0][j][3])
        original_sent_per_review.append(data[i][1])
        sentence_level_embed = np.array(sentence_level_embed)
        sentence_level_polar = np.array(sentence_level_polar)
        review_aspect = np.array(review_aspect)
        x_train.append(sentence_level_embed)
        y_train.append(sentence_level_polar)
        aspect.append(review_aspect)
x_train = np.array(x_train)
y_train = np.array(y_train)
aspect = np.array(aspect)

In [8]:
x_train_valid = []
aspect_valid = []
y_train_valid = []
original_sent_per_review_valid = []
for i in data_valid.keys():
        sentence_level_embed = []
        review_aspect = []
        sentence_level_polar = []
        for j in range(len(data_valid[i][0])):
            sentence_level_embed.append(data_valid[i][0][j][0])
            review_aspect.append((embedding_matrix[data_valid[i][0][j][1]] + embedding_matrix[data_valid[i][0][j][2]])/2)
            sentence_level_polar.append(data_valid[i][0][j][3])
        original_sent_per_review_valid.append(data_valid[i][1])
        sentence_level_embed = np.array(sentence_level_embed)
        sentence_level_polar = np.array(sentence_level_polar)
        review_aspect = np.array(review_aspect)
        x_train_valid.append(sentence_level_embed)
        y_train_valid.append(sentence_level_polar)
        aspect_valid.append(review_aspect)
x_train_valid = np.array(x_train_valid)
y_train_valid = np.array(y_train_valid)
aspect_valid = np.array(aspect_valid)

In [9]:
x_train_test = []
aspect_test = []
y_train_test = []
original_sent_per_review_test = []
for i in data_test.keys():
        sentence_level_embed = []
        review_aspect = []
        sentence_level_polar = []
        for j in range(len(data_test[i][0])):
            sentence_level_embed.append(data_test[i][0][j][0])
            review_aspect.append((embedding_matrix[data_test[i][0][j][1]] + embedding_matrix[data_test[i][0][j][2]])/2)
            sentence_level_polar.append(data_test[i][0][j][3])
        original_sent_per_review_test.append(data_test[i][1])
        sentence_level_embed = np.array(sentence_level_embed)
        sentence_level_polar = np.array(sentence_level_polar)
        review_aspect = np.array(review_aspect)
        x_train_test.append(sentence_level_embed)
        y_train_test.append(sentence_level_polar)
        aspect_test.append(review_aspect)
x_train_test = np.array(x_train_test)
y_train_test = np.array(y_train_test)
aspect_test = np.array(aspect_test)

In [10]:
del(data)

In [11]:
print(x_train.shape)
print(y_train.shape)
print(aspect.shape)

In [12]:
print(x_train_valid.shape)
print(y_train_valid.shape)
print(aspect_valid.shape)

In [13]:
print(x_train_test.shape)
print(y_train_test.shape)
print(aspect_test.shape)

#### Architecture

In [14]:
class H_Bi_lSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim_1, hidden_dim_2, max_words, max_sentence):
        global embedding_matrix
        print(embedding_matrix.shape)
        super().__init__()
        self.hidden_dim_1 = hidden_dim_1
        self.max_sentence = max_sentence
        self.max_words = max_words
        self.embedding_dim = embedding_dim
        self.embedding_layer = nn.Embedding(vocab_size, embedding_dim)
        self.embedding_layer.weight.data.copy_(torch.tensor(embedding_matrix))
        self.embedding_layer.weight.requires_grad = False
        self.sentence_level_lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim_1, num_layers=2, batch_first=True, bidirectional=True)
        self.review_level_lstm = nn.LSTM(input_size=2*hidden_dim_1*self.max_words+embedding_dim, hidden_size=hidden_dim_2, num_layers=2, batch_first=True, bidirectional=True)
        self.fc_layer1 = nn.Linear(2*hidden_dim_2, 3)
        self.softmax = nn.Softmax(dim=2)
        
    def forward(self, input_emb_review, aspect_vec):
        review_input = torch.ones([self.max_sentence, 2*self.hidden_dim_1*self.max_words+self.embedding_dim], dtype=torch.float64)
        for i in range(len(input_emb_review)):
            input_emb = self.embedding_layer(input_emb_review[i])
            sent_lstm_out, hid = self.sentence_level_lstm(input_emb.reshape((1,input_emb.shape[0],input_emb.shape[1])))
            review_input[i] = torch.cat((sent_lstm_out.reshape(-1), aspect_vec[i]))
        review_input = torch.tensor(review_input).reshape((1,review_input.shape[0], review_input.shape[1])).cuda()
        review_lstm_out, hid = self.review_level_lstm(review_input)
        fc_out = self.fc_layer1(review_lstm_out)
        return self.softmax(fc_out)

In [15]:
y_train.shape

In [16]:
y_new = y_train.copy()

In [19]:
! mkdir ./glove_300

#### Training

In [None]:
torch.autograd.set_detect_anomaly(True)
h_bi_lstm = H_Bi_lSTM(embedding_matrix.shape[0], embedding_matrix.shape[1],300,100,x_train.shape[2], x_train.shape[1]).double().cuda()
optimizer = optim.SGD(h_bi_lstm.parameters(), lr=0.01)
loss_function = nn.CrossEntropyLoss()
num_epochs = 100
prev_accuracy = 0
accuracy_train = []
accuracy_valid = []
loss_value = []
prev_valid_acc = 0
accuracy_score_valid = 0
for epoch in range(num_epochs):
    print("\nEpoch:", epoch+1)
    current_loss = 0
#     h_bi_lstm.train()
    count = 0
    accuracy = 0
    total_count = 0
    accuracy_score_valid = 0
    count_valid = 0
    for i in tqdm(range(x_train.shape[0])):
#     for i in tqdm(range(5)):
        review = torch.tensor(x_train[i]).cuda()
        aspect_emb = torch.tensor(aspect[i]).cuda()
        output = h_bi_lstm(review, aspect_emb).cuda()
        output = output.reshape((output.shape[1], output.shape[2]))
        y_label = torch.tensor(y_new[i], dtype=torch.long).reshape((y_new[i].shape[0])).cuda()
        loss = loss_function(output[:original_sent_per_review[i]], y_label[:original_sent_per_review[i]])
        current_loss += loss.item()
        y_label = y_label.cpu()
        final_result = np.array(torch.argmax(output, dim=1).cpu().clone())[:original_sent_per_review[i]]
        accuracy += accuracy_score(final_result, y_label[:original_sent_per_review[i]])*original_sent_per_review[i]
        optimizer.zero_grad()
        nn.utils.clip_grad_norm_(h_bi_lstm.parameters(), 5)
        loss.backward()
        optimizer.step()
        count += 1
    for i in tqdm(range(x_train_valid.shape[0])):
        review = torch.tensor(x_train_valid[i]).cuda()
        aspect_emb = torch.tensor(aspect_valid[i]).cuda()
        output = h_bi_lstm(review, aspect_emb).cuda()
        output = output.reshape((output.shape[1], output.shape[2]))
        y_label = torch.tensor(y_train_valid[i], dtype=torch.long).reshape((y_train_valid[i].shape[0]))
        final_result = np.array(torch.argmax(output, dim=1).cpu().clone())[:original_sent_per_review_valid[i]]
        accuracy_score_valid += accuracy_score(final_result, y_label[:original_sent_per_review_valid[i]])*original_sent_per_review_valid[i]
        count_valid += 1  
    accuracy_score_train = accuracy / np.array(original_sent_per_review).sum() 
    valid_score = accuracy_score_valid / np.array(original_sent_per_review_valid).sum() 
    print("Loss is", current_loss)
    print("Accuracy is", accuracy_score_train, "%")
    print("Validation accuracy is", valid_score, "%")
    accuracy_train.append(accuracy_score_train)
    loss_value.append(current_loss/count)
    accuracy_valid.append(valid_score)
#     if prev_accuracy < accuracy:
#         torch.save(h_bi_lstm, 'new-model-' + str(epoch+1) + '.pt')
    if prev_valid_acc < accuracy_score_valid:
        torch.save(h_bi_lstm, './glove_300/new-model-' + str(epoch+1) + '.pt')
    prev_accuracy = accuracy
    prev_valid_acc = accuracy_score_valid
# print(current_loss)

In [None]:
torch.save(h_bi_lstm, './glove_300/finalmodel' + '.pt')

In [None]:
import matplotlib.pyplot as plt
plt.plot(accuracy_train)
plt.title("Training Accuracy VS Epoch for glove-300")
plt.xlabel("Epoch")
plt.ylabel("Accuracy fraction")
plt.savefig('./glove_300/train_acc.jpg')
plt.show()

In [None]:
plt.plot(loss_value)
plt.title("Loss VS Epoch for glove-300")
plt.ylabel("Average Loss")
plt.xlabel("Epoch")
plt.savefig('./glove_300/loss.jpg')
plt.show()

In [None]:
plt.plot(accuracy_valid)
plt.title("Validation Accuracy VS Epoch for glove-300")
plt.xlabel("Epoch")
plt.ylabel("Accuracy fraction")
plt.savefig('./glove_300/valid_acc.jpg')
plt.show()

In [None]:
import shutil
shutil.make_archive("glove_300", 'zip', "./glove_300")

In [None]:
import os
os.chdir(r'/kaggle/working/')
from IPython.display import FileLink
FileLink(r'./glove_300.zip')

####   Testing

In [None]:
model = torch.load("final-model")
accuracy_score_test = 0
for i in tqdm(range(x_train_test.shape[0])):
    review = torch.tensor(x_train_test[i]).cuda()
    aspect_emb = torch.tensor(aspect_test[i]).cuda()
    output = model(review, aspect_emb).cuda()
    output = output.reshape((output.shape[1], output.shape[2]))
    y_label = torch.tensor(y_train_test[i], dtype=torch.long).reshape((y_train_test[i].shape[0]))
    final_result = np.array(torch.argmax(output, dim=1).cpu().clone())[:original_sent_per_review_test[i]]
    accuracy_score_test += accuracy_score(final_result, y_label[:original_sent_per_review_test[i]])*original_sent_per_review_test[i]
    count_valid += 1 
test_score = accuracy_score_test / np.array(original_sent_per_review_valid).sum()
print("Test Accuracy:", test_score)