## Recurrent Neural Networks

~Sentiment analysis

~Character/text generation

In [1]:
# Bag of words

vocab = {}
word_encoding = 1
def bag_of_words(text):
    global word_encoding
    
    words = text.lower().split(" ")
    bag = {}
    for word in words:
        if word in vocab:
            encoding = vocab[word]
        else:
            # print(word)
            vocab[word] = word_encoding
            encoding = word_encoding
            word_encoding +=1 
            
        if encoding in bag:
            bag[encoding] += 1
        else:
            bag[encoding] = 1
            
    return bag

text = "this is a bag please use this bag carefully"

bag = bag_of_words(text)
print(bag)

print(vocab)
            

{1: 2, 2: 1, 3: 1, 4: 2, 5: 1, 6: 1, 7: 1}
{'this': 1, 'is': 2, 'a': 3, 'bag': 4, 'please': 5, 'use': 6, 'carefully': 7}


In [2]:
import tensorflow as tf
from tensorflow import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
VOCAB_SIZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

(train_data, train_labels), (test_data,test_labels)  = imdb.load_data(num_words=VOCAB_SIZE)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [6]:
print(len(train_data[0]))
print(len(train_data[1]))

218
189


In [10]:
# left padding
train_data = sequence.pad_sequences(train_data, MAXLEN)
test_data = sequence.pad_sequences(test_data, MAXLEN)

In [8]:
print(len(train_data[0]))
print(len(train_data[1]))

250
250


In [13]:
model = keras.Sequential([
    keras.layers.Embedding(VOCAB_SIZE,32), # creats vectors of 32 dimentions for every word 
    keras.layers.LSTM(32),
    keras.layers.Dense(1,activation="sigmoid")
])

In [14]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 32)          2834688   
                                                                 
 lstm_2 (LSTM)               (None, 32)                8320      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2843041 (10.85 MB)
Trainable params: 2843041 (10.85 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
model.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=['accuracy'])

In [17]:
history = model.fit(train_data,train_labels,epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
results = model.evaluate(test_data,test_labels)
print(results)

[0.5384095907211304, 0.848800003528595]


In [20]:
word_index = imdb.get_word_index()

def encode_text(text):
    tokens = keras.preprocessing.text.text_to_word_sequence(text)
    tokens = [word_index[word] if word in word_index else 0 for word in tokens]
    return sequence.pad_sequences([tokens],MAXLEN)[0]

text = "that movie was just amazing, so amazing"
encoded = encode_text(text)

print(encoded)

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  12  17  13  4

In [24]:
def predict(text):
    encoded_text = encode_text(text)
    pred = np.zeros((1,250))
    pred[0] = encoded_text
    result = model.predict(pred)
    print(result[0])
    
pos_rev = "that movie was awesome!, I really loved it and would watch it again because it was amazingly great"

predict(pos_rev)

neg_rev = "that movie sucked. I hated it and wouldn't watch it again"
predict(neg_rev)

[0.75298333]
[0.4619957]
