# Tannic_Sentiments

#### Based on https://colab.research.google.com/drive/1OlQpHdZD7zVyZW56r8vI-L8BYylq_Umm#scrollTo=XhAOS8BY0jC2

## A Sentiment Classifier that has been triained on wine reviews. 

In [2]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Softmax, Embedding, GlobalMaxPool1D, GlobalAvgPool1D, Concatenate, Bidirectional, SpatialDropout1D

Using TensorFlow backend.


In [3]:
wine_df = pd.read_csv('wine_data/tipsy_sentiments/ranked_wine.csv').dropna()
wine_df.sample(7)

Unnamed: 0.1,Unnamed: 0,description,points,sentiment
85887,7931,This ripe and structured wine is poised betwee...,90,1
116649,35363,The Estates Reserve is a barrel selection comb...,92,1
96848,105332,This lightly colored; 100% varietal wine offer...,91,1
29873,102570,Jammy aromas of raspberry and cherry show a sl...,86,0
123081,38423,This is pure Riesling; very mineral; steely an...,93,1
20380,110770,It's not clear what separates this from some o...,85,0
3405,79985,Thin and simple; dry and sharp; this has modes...,83,0


In [4]:
# trainable_df = wine_df[['description', 'points']]
# trainable_df.head()

In [5]:
tokenizer = Tokenizer()

In [19]:
tokenizer.fit_on_texts(wine_df.description)
print(tokenizer.)

<keras_preprocessing.text.Tokenizer object at 0x14ae163d0>


In [7]:
tokenizer.texts_to_sequences(['I need a test string, and this will do.'])

[[2340, 606, 3, 6939, 4521, 1, 6, 87, 834]]

In [8]:
print(pad_sequences((tokenizer.texts_to_sequences(["I need a test string", "This will do."])), 50))

[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0 2340  606    3 6939 4521]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    6   87  834]]


In [9]:
x = pad_sequences(tokenizer.texts_to_sequences(wine_df.description), 50)
y = wine_df.sentiment

In [10]:
y = np.asarray(y)
x = np.asarray(x)

In [11]:
print(x)

[[   0    0    0 ...  208  154  703]
 [   0    0    0 ...   14    2   19]
 [   0    0    0 ...  106 1312 3426]
 ...
 [ 161   10  970 ...   23   20  358]
 [4375   11  570 ...  341 2203   93]
 [1362 3536    2 ...    3 4047    8]]


In [13]:
# y.sample(3)

In [14]:
input = Input((50,), name='input')
embed = Embedding(len(tokenizer.word_index), 100)(input)
embed_dropout = SpatialDropout1D(0.5)(embed)
rnn = Bidirectional(GRU(50,return_sequences=True, recurrent_dropout=0.2))(embed_dropout)
maxed = GlobalMaxPool1D()(rnn)
avged = GlobalAvgPool1D()(rnn)
concat = Concatenate()([maxed, avged])
dense = Dense(3, activation='softmax')(concat)

model = Model(input, dense)

In [15]:
model.compile('adam', 'sparse_categorical_crossentropy', ['sparse_categorical_accuracy'])

In [16]:
model.fit(x, y, batch_size=512, validation_split=0.2, epochs=3)

Train on 103972 samples, validate on 25994 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x14a1ba9d0>

In [17]:
test = model.predict(pad_sequences(tokenizer.texts_to_sequences(["Hello", 'This is the worst', 'This has depth', 'There is feeling', 'please work', ]), 50))
test

array([[9.7178751e-01, 2.6949944e-02, 1.2624608e-03],
       [9.9002576e-01, 9.0078544e-03, 9.6633990e-04],
       [9.4289577e-01, 5.5846319e-02, 1.2578520e-03],
       [9.8226786e-01, 1.6616993e-02, 1.1151489e-03],
       [9.8141390e-01, 1.7609445e-02, 9.7665901e-04]], dtype=float32)