# Tannic_Sentiments

#### Based on https://colab.research.google.com/drive/1OlQpHdZD7zVyZW56r8vI-L8BYylq_Umm#scrollTo=XhAOS8BY0jC2

## A Sentiment Classifier that has been triained on wine reviews. 

In [16]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Softmax, Embedding, GlobalMaxPool1D, GlobalAvgPool1D, Concatenate, Bidirectional, SpatialDropout1D

In [17]:
wine_df = pd.read_csv('wine_data/tipsy_sentiments/ranked_wine.csv').dropna()
wine_df.sample(7)

Unnamed: 0.1,Unnamed: 0,description,points,sentiment
96522,20552,Most Pinot Noirs would kill to have this depth...,91,1
16355,66779,Cut grass; crisp green pears; honeydew melon; ...,85,0
1405,79705,Generic; neutral aromas hint at apple and peac...,82,0
58206,65893,Leafy; earthy aromas of raisin; baked plum and...,88,1
40351,89634,A refreshing wine; with nicely ripened fruit a...,87,0
4682,50584,A decent effort; with notes of spice; brambly ...,83,0
40023,96101,Fresh green herbs and granite notes add a lean...,87,0


In [18]:
# trainable_df = wine_df[['description', 'points']]
# trainable_df.head()

In [19]:
tokenizer = Tokenizer()

In [20]:
tokenizer.fit_on_texts(wine_df.description)


In [21]:
tokenizer.texts_to_sequences(['I need a test string, and this will do.'])

[[2340, 606, 3, 6939, 4521, 1, 6, 87, 834]]

In [22]:
print(pad_sequences((tokenizer.texts_to_sequences(["I need a test string", "This will do."])), 50))

[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0 2340  606    3 6939 4521]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    6   87  834]]


In [23]:
x = pad_sequences(tokenizer.texts_to_sequences(wine_df.description), 50)
y = wine_df.sentiment

In [24]:
y = np.asarray(y)
x = np.asarray(x)

In [25]:
print(x)

[[   0    0    0 ...  208  154  703]
 [   0    0    0 ...   14    2   19]
 [   0    0    0 ...  106 1312 3426]
 ...
 [ 161   10  970 ...   23   20  358]
 [4375   11  570 ...  341 2203   93]
 [1362 3536    2 ...    3 4047    8]]


In [26]:
# y.sample(3)

In [27]:
input = Input((50,), name='input')
embed = Embedding(len(tokenizer.word_index), 100)(input)
embed_dropout = SpatialDropout1D(0.5)(embed)
rnn = Bidirectional(GRU(50,return_sequences=True, recurrent_dropout=0.2))(embed_dropout)
maxed = GlobalMaxPool1D()(rnn)
avged = GlobalAvgPool1D()(rnn)
concat = Concatenate()([maxed, avged])
dense = Dense(3, activation='softmax')(concat)

model = Model(input, dense)

In [28]:
model.compile('adam', 'sparse_categorical_crossentropy', ['sparse_categorical_accuracy'])

In [31]:
model.fit(x, y, batch_size=512, validation_split=0.2, epochs=7)

Train on 103972 samples, validate on 25994 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x149479190>

In [30]:
test = model.predict(pad_sequences(tokenizer.texts_to_sequences(["Hello", 'This is the worst', 'This has depth', 'There is feeling', 'please work', ]), 50))
test

array([[9.5176369e-01, 4.7651261e-02, 5.8507896e-04],
       [9.8154092e-01, 1.8003672e-02, 4.5535169e-04],
       [8.8928872e-01, 1.1021998e-01, 4.9126853e-04],
       [9.7039366e-01, 2.9084759e-02, 5.2156980e-04],
       [9.6618104e-01, 3.3329200e-02, 4.8977329e-04]], dtype=float32)