# Detecção de Sentimento em Análise de Produtos de Lojas Online

## Dados Utilizados

In [6]:
import os
import sys
import pandas as pd
import random
raw_data = "../data/raw/"

In [72]:
reviews = pd.read_csv(os.path.join(raw_data, "olist_order_reviews_dataset.csv"))
reviews.head()

Unnamed: 0,review_id,order_id,review_score,review_comment_title,review_comment_message,review_creation_date,review_answer_timestamp
0,7bc2406110b926393aa56f80a40eba40,73fc7af87114b39712e6da79b0a377eb,4,,,2018-01-18 00:00:00,2018-01-18 21:46:59
1,80e641a11e56f04c1ad469d5645fdfde,a548910a1c6147796b98fdf73dbeba33,5,,,2018-03-10 00:00:00,2018-03-11 03:05:13
2,228ce5500dc1d8e020d8d1322874b6f0,f9e4b658b201a9f2ecdecbb34bed034b,5,,,2018-02-17 00:00:00,2018-02-18 14:36:24
3,e64fb393e7b32834bb789ff8bb30750e,658677c97b385a9be170737859d3511b,5,,Recebi bem antes do prazo estipulado.,2017-04-21 00:00:00,2017-04-21 22:02:06
4,f7c4243c7fe1938f181bec41a392bdeb,8e6bfb81e283fa7e4f11123a3fb894f1,5,,Parabéns lojas lannister adorei comprar pela I...,2018-03-01 00:00:00,2018-03-02 10:26:53


In [14]:
reviews = pd.read_csv(os.path.join(raw_data, "olist_order_reviews_dataset.csv")).fillna("")
reviews.head()

In [15]:
reviews = pd.read_csv(os.path.join(raw_data, "olist_order_reviews_dataset.csv"), 
                      usecols=["review_score", "review_comment_message"]).fillna("")
reviews.head()

In [16]:
reviews = pd.read_csv(os.path.join(raw_data, "olist_order_reviews_dataset.csv"), 
                      usecols=["review_score", "review_comment_message"]).fillna("")
reviews = reviews[reviews.review_comment_message != ""]
reviews.head()

<b>Separação do Dataset</b>

In [17]:
separated = dict()
for score, comment in set(list(map(tuple, reviews.values))):
    try:
        separated[score].add(comment)
    except:
        separated[score] = {comment}
        
    
train, test = [], []
for key in separated:
    test_examples = random.sample(separated[key], k=int(len(separated[key]) * 0.2))
    train_examples = [x for x in separated[key] if x not in test_examples]
    test_examples = list(test_examples)
    
    for example in test_examples:
        test.append([key, example])
    for example in train_examples:
        train.append([key, example])
        
        
# Shuffle labels
random.shuffle(train)
train = pd.DataFrame(train, columns=["score", "message"])
random.shuffle(test)
test = pd.DataFrame(test, columns=["score", "message"])

In [19]:
train.to_csv("../data/interim/train.csv")
test.to_csv("../data/interim/test.csv")

## Modelo de Rede Neural Convolucional

In [20]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

class mLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, embed_size, output_size):
        super(mLSTM, self).__init__()

        self.hidden_size = hidden_size
        # input embedding
        self.encoder = nn.Embedding(input_size, embed_size)
        # lstm weights
        self.weight_fm = nn.Linear(hidden_size, hidden_size)
        self.weight_im = nn.Linear(hidden_size, hidden_size)
        self.weight_cm = nn.Linear(hidden_size, hidden_size)
        self.weight_om = nn.Linear(hidden_size, hidden_size)
        self.weight_fx = nn.Linear(embed_size, hidden_size)
        self.weight_ix = nn.Linear(embed_size, hidden_size)
        self.weight_cx = nn.Linear(embed_size, hidden_size)
        self.weight_ox = nn.Linear(embed_size, hidden_size)
        # multiplicative weights
        self.weight_mh = nn.Linear(hidden_size, hidden_size)
        self.weight_mx = nn.Linear(embed_size, hidden_size)
        # decoder
        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, inp, h_0, c_0):
        # encode the input characters
        inp = self.encoder(inp)
        # calculate the multiplicative matrix
        m_t = self.weight_mx(inp) * self.weight_mh(h_0)
        # forget, input and output gates
        f_g = torch.sigmoid(self.weight_fx(inp) + self.weight_fm(m_t))
        i_g = torch.sigmoid(self.weight_ix(inp) + self.weight_im(m_t))
        o_g = torch.sigmoid(self.weight_ox(inp) + self.weight_om(m_t))
        # intermediate cell state
        c_tilda = torch.tanh(self.weight_cx(inp) + self.weight_cm(m_t))
        # current cell state
        cx = f_g * c_0 + i_g * c_tilda
        # hidden state
        hx = o_g * torch.tanh(cx)

        out = self.decoder(hx.view(1,-1))

        return out, hx, cx

    def init_hidden(self):
        h_0 = Variable(torch.zeros(1, self.hidden_size)).cuda()
        c_0 = Variable(torch.zeros(1, self.hidden_size)).cuda()
        return h_0, c_0



In [23]:
# Converte string para uma lista de inteiros
from unidecode import unidecode
import random

def char_tensor(string):
    string = unidecode(string)
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        try:
            tensor[c] = ord(string[c])
        except:
            print(c)
            raise
    return Variable(tensor)

print(char_tensor('The omega (Ω) symbol\n'))  

tensor([ 84, 104, 101,  32, 111, 109, 101, 103,  97,  32,  40,  79,  41,  32,
        115, 121, 109,  98, 111, 108,  10])


## Modelo de Rede Neural Artificial

In [29]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.rnn = mLSTM(embed_size, hidden_size, embed_size, embed_size).cuda()
        self.rnn.load_state_dict(torch.load("lstm_11.pth"))
        self.fc1 = nn.Linear(2*hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, int(hidden_size/4))
        self.fc3 = nn.Linear(int(hidden_size/4), int(hidden_size/2))
        self.fc4 = nn.Linear(int(hidden_size/2), 5)
        self.dropout = nn.Dropout(0.5) 
        
    def forward(self, text_tensor):
        with torch.no_grad():
            hidden, cell = self.rnn.init_hidden()
            for p in range(len(text_tensor)):
                _, hidden, cell = self.rnn(text_tensor[p], hidden, cell)
        
        
        x = torch.cat((hidden, cell), 0).view(1, -1)
        x = F.relu(self.dropout(self.fc1(x)))
        x = F.relu(self.dropout(self.fc2(x)))
        x = F.relu(self.dropout(self.fc3(x)))
        x = F.relu(self.fc4(x))
        return x

## Analisador Automático de Sentimento

In [54]:
import numpy as np

class SentimentDetector():
    
    def __init__(self):
        embed_size = 128 # ascii representation
        hidden_size = 2048

        self.net = Net()
        self.net.load_state_dict(torch.load("net_9.pth"))
        self.net.cuda()
        self.net.eval()
        
    def _getScore(self, prediction):
        hiscore, higrade = 1, 0
        for score, e in enumerate(list(prediction.detach()[0])):
            if e.item() > higrade:
                hiscore = score+1
                higrade = e.item()
        return hiscore, higrade
        
    def detectSentiment(self, text):
        tensor = char_tensor(text).cuda()
        pred = self.net(tensor)
        
        score, grade = self._getScore(pred)
        if grade == 0:
            return "Not Conclusive!"
        
        if score >= 4:
            return "Positive! ({})".format(pred)
        return "Negative! ({})".format(pred)
        

In [55]:
detector = SentimentDetector()

In [69]:
detector.detectSentiment("Mano, tava quase rasgando meu c# na cerca e pedindo refound pq o jogo tava a 40 fps no médio e pesando dms no pc. Gráfico já tava meio feio e embaçadão dava nem p ver longe.. Até q resolvi coisa a primeira opção dos gráficos: Nível predefinido de qualidade [Equilibrado] Deixei assim, ele colocou uma config q ficou equilibrado performance e qualidade (AVA SERIO)? Agora o game ta liso lisoo e lindooo, os gráficos estão com mais qualidade e o game rodando de boinha :D Quando vocês mudarem por lá irão ver como vai ficar a configuração.. Tenta ai vocês q estão com o mesmo problema/sentimento que eu estava. Espero ter ajudado! Bjo! OBS: TA CARO P CARALHOU OAIEOIWIOOWOOOW Q ISSO '-' ")

"Positive! (tensor([[0.0000, 0.0000, 0.0000, 0.0000, 1.8127]], device='cuda:0',\n       grad_fn=<ReluBackward0>))"

In [70]:
detector.detectSentiment("Mano, tava quase rasgando meu c# na cerca e pedindo refound pq o jogo tava a 40 fps no médio e pesando dms no pc. Gráfico já tava meio feio e embaçadão dava nem p ver longe..")

"Negative! (tensor([[0.5551, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',\n       grad_fn=<ReluBackward0>))"

In [71]:
detector.detectSentiment("O jogo parece aqueles ports que a Konami faz com o PES pra PC. atalho de teclas que conflitam com atalhos do Steam e da nVidia.")

"Negative! (tensor([[0.6149, 0.0000, 0.0000, 0.0000, 0.0000]], device='cuda:0',\n       grad_fn=<ReluBackward0>))"

In [74]:
detector.detectSentiment("Incrível! Estragou no primeiro dia")

"Negative! (tensor([[0.4320, 0.0000, 0.0000, 0.0000, 0.2681]], device='cuda:0',\n       grad_fn=<ReluBackward0>))"