# D-AT-GRU
## Imports

In [1]:
%matplotlib inline
import itertools
import math
import time
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data.dataloader as dataloader
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from semeval2014.semeval_base import *

## Hyper Paramètres

## Load Dataset

In [3]:
trainfile="semeval2014/restaurants-trial.xml"
testfile="semeval2014/Restaurants_Test_Data_PhaseA.xml"
corpus = Corpus(ET.parse(trainfile).getroot().findall('sentence'))
unseen = Corpus(ET.parse(testfile).getroot().findall('sentence'))
b1 = BaselineAspectExtractor(corpus)
predicted = b1.tag(unseen.corpus)
corpus.write_out('test.predicted-aspect.xml', predicted, short=False)

## Show 10 sentences and categories example in train

In [4]:
for index, sentence in zip(range(10), corpus.corpus):
    print(sentence.text)
    for categorie in sentence.aspect_categories:
        print(categorie.term, categorie.polarity)
    print("")

All the appetizers and salads were fabulous, the steak was mouth watering and the pasta was delicious!!!
food positive

And really large portions.
food positive

Go inside and you won't want to leave.
anecdotes/miscellaneous positive

Save yourself the time and trouble and skip this one!
anecdotes/miscellaneous negative

The sweet lassi was excellent as was the lamb chettinad and the garlic naan but the rasamalai was forgettable.
food conflict

Service was quick.
service positive

Oh, don't even let me start with how expensive the bills were!
price negative

Service is top notch.
service positive

The best thing I tasted were the lambc hops.
food positive

Overall I would recommend it and go back again.
anecdotes/miscellaneous positive



## Load Pre-trained Embedding Vectors from Glove (little dataset)

In [7]:
embeddings_dict = {}
with open("embeddings/glove.6B.50d.txt", 'r', encoding="utf-8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[word] = vector

## Exemple d'embedding

In [8]:
print(embeddings_dict['cat'])

[ 0.45281  -0.50108  -0.53714  -0.015697  0.22191   0.54602  -0.67301
 -0.6891    0.63493  -0.19726   0.33685   0.7735    0.90094   0.38488
  0.38367   0.2657   -0.08057   0.61089  -1.2894   -0.22313  -0.61578
  0.21697   0.35614   0.44499   0.60885  -1.1633   -1.1579    0.36118
  0.10466  -0.78325   1.4352    0.18629  -0.26112   0.83275  -0.23123
  0.32481   0.14485  -0.44552   0.33497  -0.95946  -0.097479  0.48138
 -0.43352   0.69455   0.91043  -0.28173   0.41637  -1.2609    0.71278
  0.23782 ]


## D-AT-GRU Model

In [9]:
class D_AT_GRU(nn.Module):
    def __init__(self):
        super(D_AT_GRU, self).__init__() 
        embed_dim=300
        hidden_size=300
        num_layers=1
        bias=True
        batch_first=True
        vocab_size=300
        aspect_size=300
        self.word_embeddings = nn.Embedding(vocab_size, embed_dim)   
        self.gru = nn.GRU(input_size= embed_dim, hidden_size= hidden_size,  
                          num_layers= num_layers, bias= bias, 
                          batch_first= batch_first, bidirectional=False)
        self.aspect_embeddings = nn.Embedding(aspect_size, embed_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        embeds = self.word_embeddings(x)
        output, _ = self.gru(embeds)     
        return self.softmax(output)