# **`Basic Sentimental Analysis using RNN (embedding)`**

In [11]:
docs = ['go india',
		'india india',
		'hip hip hurray',
		'jeetega bhai jeetega india jeetega',
		'bharat mata ki jai',
		'kohli kohli',
		'sachin sachin',
		'dhoni dhoni',
		'modi ji ki jai',
		'inquilab zindabad']

In [12]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

In [13]:
tokenizer.fit_on_texts(docs)

In [14]:
len(tokenizer.word_index)

17

In [15]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[9, 1],
 [1, 1],
 [3, 3, 10],
 [2, 11, 2, 1, 2],
 [12, 13, 4, 5],
 [6, 6],
 [7, 7],
 [8, 8],
 [14, 15, 4, 5],
 [16, 17]]

In [16]:
from keras.utils import pad_sequences
sequences = pad_sequences(sequences, padding='post')
sequences

array([[ 9,  1,  0,  0,  0],
       [ 1,  1,  0,  0,  0],
       [ 3,  3, 10,  0,  0],
       [ 2, 11,  2,  1,  2],
       [12, 13,  4,  5,  0],
       [ 6,  6,  0,  0,  0],
       [ 7,  7,  0,  0,  0],
       [ 8,  8,  0,  0,  0],
       [14, 15,  4,  5,  0],
       [16, 17,  0,  0,  0]], dtype=int32)

Contructing the RNN model

In [17]:
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [21]:
model = Sequential()
model.add(Embedding(18, output_dim=2, input_length=5))
model.add(Flatten()) # Added Flatten layer to connect Embedding output to Dense layer (or use SimpleRNN)
model.add(Dense(1, activation='sigmoid')) # Added a final output layer for prediction

model.summary()

In [22]:
model.compile('adam', 'accuracy')

In [23]:
pred = model.predict(sequences)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step
[[0.4879327 ]
 [0.4949233 ]
 [0.5074343 ]
 [0.5080404 ]
 [0.4850578 ]
 [0.5111327 ]
 [0.51534903]
 [0.5160035 ]
 [0.48814145]
 [0.52347666]]


## Project start -------

In [25]:
from keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [33]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

In [34]:
X_train = pad_sequences(X_train, padding = 'post', maxlen=50)
X_test = pad_sequences(X_test, padding = 'post', maxlen=50)

In [35]:
X_train.shape

(25000, 50)

In [37]:
CORRECT_VOCAB_SIZE = 10881
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=2, input_length=50)) 
model.add(SimpleRNN(32,return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

In [38]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train,epochs=5,validation_data=(X_test,y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 16ms/step - acc: 0.5196 - loss: 0.6901 - val_acc: 0.5604 - val_loss: 0.6757
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - acc: 0.7371 - loss: 0.5194 - val_acc: 0.7903 - val_loss: 0.4540
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - acc: 0.8435 - loss: 0.3649 - val_acc: 0.8053 - val_loss: 0.4447
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - acc: 0.8783 - loss: 0.3008 - val_acc: 0.8071 - val_loss: 0.4424
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - acc: 0.9024 - loss: 0.2552 - val_acc: 0.7938 - val_loss: 0.4684
