# Simile or Metaphor Classifer

### Import Statements

In [44]:
import pandas as pd
import numpy as np
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense , GRU
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam

In [45]:
data = pd.read_csv('data/fos.csv', sep='\s*,\s*', engine='python')
print(data.columns.tolist())

['sentence', 'labels']


In [46]:
np.random.seed(1)

fos_dict = {0.0: 'Metaphor', 1.0: 'Simile'}
x = data['sentence']
y = data['labels']
data.head()
#train_sentences, test_sentences, train_labels, test_labels = train_test_split(x, y, test_size=0.2)

Unnamed: 0,sentence,labels
0,he pleaded for her forgiveness but janetâ€™s h...,0
1,dreams like ghosts must hide away; / â€˜tis th...,1
2,the people who still lived in the town were st...,1
3,the sun is a golden ball.,0
4,what storms then shook the ocean of my sleep.,0


### Data Preprocessing

You want to get a `vocab size` so that you can make a dictionary of words. You do this because Neural Networks can not read text inputs so you convert them to an array on `n` numbers inorder to pipe them through our network. 

In [47]:
vocab_size = 1250
encoded_docs = [one_hot(d, vocab_size) for d in x]
list(encoded_docs)

[[402, 101, 541, 891, 1175, 624, 96, 980, 16, 67, 1073],
 [804, 734, 241, 997, 770, 174, 179, 1076, 476],
 [1076,
  293,
  1242,
  1067,
  1045,
  650,
  1076,
  487,
  267,
  991,
  650,
  1145,
  734,
  445,
  247],
 [1076, 19, 163, 303, 331, 840],
 [300, 361, 362, 452, 1076, 472, 976, 129, 493],
 [462,
  402,
  1159,
  1076,
  208,
  976,
  1076,
  71,
  402,
  1121,
  125,
  1223,
  125,
  303,
  108,
  630],
 [1076, 369, 167, 309, 550, 705, 363],
 [1196, 132, 879, 303, 1162, 541, 1076, 639, 976, 178],
 [976, 329, 163, 1076, 508, 976, 129, 804, 831, 918],
 [1076, 917, 1181, 16, 125, 115, 125, 303, 364],
 [983, 518, 1134, 962, 460, 711, 367, 636, 267, 1095, 125, 852, 214],
 [303, 918, 796, 637, 1027, 837, 967, 1076, 38, 734, 303, 1032, 976, 79],
 [1076, 341, 976, 763, 163, 651, 191, 1043, 343, 1076, 50, 191, 1175],
 [678, 101, 857, 828, 269, 1210, 592, 997],
 [1076, 461, 1065, 1176, 1076, 340, 976, 1076, 257, 997, 1007, 24, 831, 396],
 [230, 881, 16, 734, 1076, 743, 19, 125, 236, 83

Now we pad the `word_tokens` in order to have a uniform `input size` for the Neural Network. I found the max length of the words in a given sentence and chose that as the max token. Padded zeros in the begning but you can chose to padded them in the end as well, it's just prefrence.

In [48]:
max_tokens = 20
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='pre')
list(padded_docs)

[array([   0,    0,    0,    0,    0,    0,    0,    0,    0,  402,  101,
         541,  891, 1175,  624,   96,  980,   16,   67, 1073]),
 array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         804,  734,  241,  997,  770,  174,  179, 1076,  476]),
 array([   0,    0,    0,    0,    0, 1076,  293, 1242, 1067, 1045,  650,
        1076,  487,  267,  991,  650, 1145,  734,  445,  247]),
 array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0, 1076,   19,  163,  303,  331,  840]),
 array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         300,  361,  362,  452, 1076,  472,  976,  129,  493]),
 array([   0,    0,    0,    0,  462,  402, 1159, 1076,  208,  976, 1076,
          71,  402, 1121,  125, 1223,  125,  303,  108,  630]),
 array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0, 1076,  369,  167,  309,  550,  705,  363]),
 array([   0,    0,    0,    0,   

## Creating the Model

`Gated Recurrent Units` GRUs are a gating mechanism in recurrent neural networks, introduced in 2014 by Kyunghyun Cho. Their performance on polyphonic music modeling and speech signal modeling was found to be similar to that of `Long Short-term Memory` LSTM. However, GRUs have been shown to exhibit better performance on smaller datasets. They have fewer parameters than LSTM, as they lack an output gate.

In [49]:
model = Sequential()
embedding_size = 8
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_size,
                    input_length=max_tokens,
                    name='layer_embedding'))
model.add(GRU(units=16, name = "gru_1",return_sequences=True))
model.add(GRU(units=8, name = "gru_2" ,return_sequences=True))
model.add(GRU(units=4, name= "gru_3"))
model.add(Dense(1, activation='sigmoid',name="dense_1"))
optimizer = Adam(lr=1e-4)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer_embedding (Embedding)  (None, 20, 8)             10000     
_________________________________________________________________
gru_1 (GRU)                  (None, 20, 16)            1200      
_________________________________________________________________
gru_2 (GRU)                  (None, 20, 8)             600       
_________________________________________________________________
gru_3 (GRU)                  (None, 4)                 156       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
Total params: 11,961
Trainable params: 11,961
Non-trainable params: 0
_________________________________________________________________


In [52]:
model.fit(padded_docs, y, validation_split=0.8, epochs=100, verbose=1)

Train on 59 samples, validate on 240 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100


Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2a761f099b0>

## Prediction
Predictions might be a little off, im waiting to scrape more data and to change this to a classification problem with all figure of speeches.

In [57]:
txt = ["you are as bright as a button","love is a battlefield","Tastes Like Awesome Feels","She was fishing for compliments"]
token_txt = [one_hot(d, vocab_size) for d in txt]
padded_txt = pad_sequences(token_txt, maxlen=max_tokens, padding='pre')
print(padded_txt)
pred = model.predict(padded_txt)
print('\n Prediction:',pred[:,0])

[[   0    0    0    0    0    0    0    0    0    0    0    0    0  425
   550  125  488  125  303  760]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0  337  163  303   11]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0 1063  734 1071  516]
 [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0   63   16 1015  541  148]]

 Prediction: [ 0.53878784  0.11134189  0.14806707  0.12144646]
