# D-AT-GRU
## Imports

In [1]:
import itertools
import more_itertools as mit
import math
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataloader as dataloader
from torch.autograd import Variable

import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import matplotlib.pyplot as plt

from semeval2014.semeval_base import *

from nltk.tokenize import TweetTokenizer

from torchviz import make_dot

## Hyper Paramètres

## Load Dataset

In [2]:
trainfile="semeval2014/restaurants-trial.xml"
testfile="semeval2014/Restaurants_Test_Data_PhaseA.xml"
corpus = Corpus(ET.parse(trainfile).getroot().findall('sentence'))
unseen = Corpus(ET.parse(testfile).getroot().findall('sentence'))
b1 = BaselineAspectExtractor(corpus)
predicted = b1.tag(unseen.corpus)
corpus.write_out('test.predicted-aspect.xml', predicted, short=False)

## Show 10 sentences and categories example in train

In [3]:
for index, sentence in zip(range(10), corpus.corpus):
    print(sentence.text)
    for categorie in sentence.aspect_categories:
        print(categorie.term, categorie.polarity)
    print("")

All the appetizers and salads were fabulous, the steak was mouth watering and the pasta was delicious!!!
food positive

And really large portions.
food positive

Go inside and you won't want to leave.
anecdotes/miscellaneous positive

Save yourself the time and trouble and skip this one!
anecdotes/miscellaneous negative

The sweet lassi was excellent as was the lamb chettinad and the garlic naan but the rasamalai was forgettable.
food conflict

Service was quick.
service positive

Oh, don't even let me start with how expensive the bills were!
price negative

Service is top notch.
service positive

The best thing I tasted were the lambc hops.
food positive

Overall I would recommend it and go back again.
anecdotes/miscellaneous positive



## Load Pre-trained Embedding Vectors from Glove (little dataset)

In [4]:
embeddings_dict = {}
embedding_dim=50
with open("embeddings/glove.6B."+ str(embedding_dim) +"d.txt", 'r', encoding="utf-8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[word] = vector

## Exemple d'embedding

In [5]:
print(embeddings_dict['cat'])

[ 0.45281  -0.50108  -0.53714  -0.015697  0.22191   0.54602  -0.67301
 -0.6891    0.63493  -0.19726   0.33685   0.7735    0.90094   0.38488
  0.38367   0.2657   -0.08057   0.61089  -1.2894   -0.22313  -0.61578
  0.21697   0.35614   0.44499   0.60885  -1.1633   -1.1579    0.36118
  0.10466  -0.78325   1.4352    0.18629  -0.26112   0.83275  -0.23123
  0.32481   0.14485  -0.44552   0.33497  -0.95946  -0.097479  0.48138
 -0.43352   0.69455   0.91043  -0.28173   0.41637  -1.2609    0.71278
  0.23782 ]


### Prepare glove embeddings for pytorch

In [6]:
# Get training vocab length
tknzr = TweetTokenizer() # Use tweetTokenizer because of the internet review style
vocab = {}
vocab[''] = 0 # Padding
vocab['oov'] = 1 # out of Vocabulary
for sentence in corpus.corpus:
    for word in tknzr.tokenize(sentence.text):
        if word not in vocab:
            vocab[word] = len(vocab)

# Prepare embeddings vocab matrix
pretrained_embeds = np.zeros((len(vocab), embedding_dim))
oov_embed = np.random.normal(scale=0.5, size=(embedding_dim, ))
for i, word in enumerate(vocab):
    try: 
        pretrained_embeds[i] = embeddings_dict[word]
    except KeyError:
        pretrained_embeds[i] = oov_embed

## Aspects Extractions

In [20]:
# Get training aspects
aspects = {}
for sentence in corpus.corpus:     
    for aspect in sentence.aspect_categories:
        if aspect.term not in aspects:
            aspects[aspect.term] = len(aspects)

## Function to transform polarity to number

In [7]:
def to_polarity(polarity):
    if polarity == 'positive':
        return 0
    elif polarity == 'negative':
        return 1
    elif polarity == 'conflict':
        return 2
    else:
        return 3

## Transform dataset to tensor dataset

In [53]:
def to_tensordataset(corpus, dictionary, aspects, batch_size):
    res = []
    for sentence in corpus.corpus:     
        for aspect in sentence.aspect_categories:
            res.append(([dictionary[word] if word in dictionary else 1 for word in tknzr.tokenize(sentence.text)], aspects[aspect.term], to_polarity(aspect.polarity)))
    random.shuffle(res)
    res = [res[x:x+batch_size] for x in range(0, len(res), batch_size)]
    batched_result = []
    for batch in res:
        a,b,c = list(map(list, zip(*batch)))
        max_len = 0
        for elt in a:
            if len(elt) > max_len:
                max_len = len(elt)
        
        a = torch.tensor([list(mit.padded(elt, 0, max_len)) for elt in a], dtype=torch.long)
        b = torch.tensor(b, dtype=torch.long).flatten()
        c = torch.tensor(c, dtype=torch.int).flatten()
        batched_result.append((a,b,c))
    return batched_result

## D-AT-GRU Model

In [120]:
class D_AT_GRU(nn.Module):
    def __init__(self, pretrained_embeds, aspects_number):
        super(D_AT_GRU, self).__init__() 
        hidden_size=300
        num_layers=1
        bias=True
        batch_first=True
        aspect_size=300
        vocab_size, embed_dim = pretrained_embeds.shape
        
        # Load Glove embeddings
        self.word_embeddings = nn.Embedding(vocab_size, embed_dim)
        self.word_embeddings.load_state_dict({'weight': torch.FloatTensor(pretrained_embeds)})

        # Init GRU 
        self.gru = nn.GRU(input_size= embed_dim, hidden_size= hidden_size,  
                          num_layers= num_layers, bias= bias, 
                          batch_first= batch_first, bidirectional=False)

        # Init random aspects embeddings
        self.aspect_embeddings = nn.Embedding(aspects_number, aspect_size)
        self.aspect_embeddings.load_state_dict({'weight': self.init_random(aspects_number, aspect_size)})

        self.softmax = nn.Softmax(dim=1)

    def forward(self, sentence, aspect):
        word_embedding = self.word_embeddings(sentence)    
        hidden_state, _ = self.gru(word_embedding)
        aspect_embedding = self.aspect_embeddings(aspect)

        # Resize aspect embedding for sentence length in batch
        aspect_embedding = aspect_embedding.unsqueeze_(1)
        aspect_embedding = aspect_embedding.expand(-1, sentence[0].size()[0], -1) 

        hidden_aspect = torch.cat((hidden_state, aspect_embedding), dim=2)
        return self.softmax(hidden_aspect)
    
    def init_random(self, x_size, ysize):
        empty = torch.empty(x_size, ysize, dtype=torch.float32, requires_grad=True)
        return nn.init.normal_(empty, mean=0, std=0.01)

## Training cycle

In [121]:
def train(model, dataset, lr, num_epochs):
    optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
    model.train()
    losses_average = []    
    for epoch in range(1,num_epochs+1):       
        # Remise à zéro des gradients
        optimizer.zero_grad()
        batch_idx = 0
        epoch_losses = []
        for data, aspect, target in dataset:
            batch_idx += 1
            # Get Samples
            data, aspect, target = Variable(data), Variable(aspect), Variable(target)
            
            # Prédiction
            pred = model(data, aspect)
            print(pred)

            # Calculer la cross_entropy loss
            criterion = nn.CrossEntropyLoss()
            loss = criterion(pred, target)

            # Sauvegarde des losses pour affichage
            epoch_losses.append(loss.data.item())

            # Backpropagation
            loss.backward(retain_graph=True)
            optimizer.step()
                      
            # Affichage
            print('\r Train Epoch: {} [{}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(epoch,batch_idx * len(data),len(dataset)*len(data),100. * batch_idx / len(dataset), np.average(epoch_losses)), end='')
        print()
        losses_average.append(np.average(epoch_losses))
    print("Evolution of average losse for each epoch:")
    plot(losses_average)
    plt.show()
    return model

In [122]:
train(D_AT_GRU(pretrained_embeds, len(aspects)), to_tensordataset(corpus, vocab, aspects, 10), 0.01, 20)

torch.Size([10, 23, 300])
tensor([[[0.0430, 0.0431, 0.0363,  ..., 0.0435, 0.0435, 0.0435],
         [0.0487, 0.0435, 0.0403,  ..., 0.0435, 0.0435, 0.0435],
         [0.0458, 0.0461, 0.0440,  ..., 0.0435, 0.0435, 0.0435],
         ...,
         [0.0423, 0.0474, 0.0328,  ..., 0.0435, 0.0435, 0.0435],
         [0.0424, 0.0475, 0.0328,  ..., 0.0435, 0.0435, 0.0435],
         [0.0424, 0.0476, 0.0328,  ..., 0.0435, 0.0435, 0.0435]],

        [[0.0443, 0.0426, 0.0385,  ..., 0.0435, 0.0435, 0.0435],
         [0.0427, 0.0388, 0.0481,  ..., 0.0435, 0.0435, 0.0435],
         [0.0457, 0.0417, 0.0477,  ..., 0.0435, 0.0435, 0.0435],
         ...,
         [0.0436, 0.0470, 0.0349,  ..., 0.0435, 0.0435, 0.0435],
         [0.0436, 0.0470, 0.0349,  ..., 0.0435, 0.0435, 0.0435],
         [0.0436, 0.0471, 0.0349,  ..., 0.0435, 0.0435, 0.0435]],

        [[0.0453, 0.0426, 0.0395,  ..., 0.0435, 0.0435, 0.0435],
         [0.0453, 0.0448, 0.0373,  ..., 0.0435, 0.0435, 0.0435],
         [0.0416, 0.0427, 0.0467

ValueError: Expected target size (10, 600), got torch.Size([10])

In [21]:
aspects

{'food': 0,
 'anecdotes/miscellaneous': 1,
 'service': 2,
 'price': 3,
 'ambience': 4}