In [1]:
from keras.datasets import imdb
import numpy as np
from keras import models
from keras.layers import  Dense
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt


In [2]:
# we are using 10000 words which occurs more frequently
((XT,YT),(Xt,Yt)) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [3]:
len(XT)

25000

In [4]:
len(Xt)

25000

In [5]:
print(XT[1])

[1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 8255, 2, 349, 2637, 148, 605, 2, 8003, 15, 123, 125, 68, 2, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 2, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 2, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]


In [6]:
 word_index = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [8]:
#print(word_index.items())

In [9]:
# WE create reverse dictionary which maps indexes with words.
index_word = dict([value,key] for [key,value] in word_index.items())

In [10]:
print(index_word)



In [12]:
actual_reveiw = ' '.join([index_word.get(word_index -3,'?') for  word_index in XT[0]])

In [13]:
print(actual_reveiw)

? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you thi

In [14]:
#vectorizing 
#we are mapping the words with 1 and 0
def vectorize_sentences(sentences,dim=10000):
  outputs = np.zeros((len(sentences),dim))
  for i,word_index in enumerate(sentences):
    outputs[i,word_index] = 1
  return outputs



In [15]:
X_train =  vectorize_sentences(XT)
X_test = vectorize_sentences(Xt)

In [16]:
print(X_train.shape)

(25000, 10000)


In [17]:
print(X_test.shape)

(25000, 10000)


In [18]:
print(X_train[1])

[0. 1. 1. ... 0. 0. 0.]


In [19]:
y_train = np.asarray(YT).astype('float32')

In [21]:
y_test = np.asarray(Yt).astype('float32')

In [22]:
model = models.Sequential()
model.add(Dense(units=4096,activation="relu",input_shape = (10000,)))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(64,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(16,activation='relu'))
model.add(Dense(1,activation= 'sigmoid'))

In [23]:
model.compile(optimizer='rmsprop',loss ='binary_crossentropy',metrics=['accuracy'])

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4096)              40964096  
_________________________________________________________________
dense_1 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dense_2 (Dense)              (None, 64)                262208    
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 17        
Total params: 58,010,241
Trainable params: 58,010,241
Non-trainable params: 0
____________________________________________

In [25]:
#to avoid overfitting we use     early stooping
X_val = X_train[:5000]
X_train_new = X_train[5000:]
y_val = y_train[:5000]
y_train_new = y_train[5000:]

In [26]:
checkpoint = ModelCheckpoint("sentimental_analysis.h1",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 3,
                          verbose = 1,
                          restore_best_weights = True)
callbacks = [earlystop, checkpoint]
history = model.fit(X_train_new,y_train_new,epochs=10,batch_size=512,callbacks = callbacks,validation_data=(X_val,y_val))

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.28271, saving model to sentimental_analysis.h1
INFO:tensorflow:Assets written to: sentimental_analysis.h1/assets
Epoch 2/10

Epoch 00002: val_loss improved from 0.28271 to 0.27895, saving model to sentimental_analysis.h1
INFO:tensorflow:Assets written to: sentimental_analysis.h1/assets
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.27895
Epoch 4/10

Epoch 00004: val_loss did not improve from 0.27895
Epoch 5/10
Restoring model weights from the end of the best epoch.

Epoch 00005: val_loss did not improve from 0.27895
Epoch 00005: early stopping


In [27]:
import matplotlib.pyplot as plt
h =history.history

In [28]:
model.evaluate(X_test,y_test)



[0.2969638407230377, 0.8705999851226807]

In [29]:
model.evaluate(X_train,y_train)



[0.14691179990768433, 0.944320023059845]

In [30]:
h

{'accuracy': [0.7330999970436096,
  0.8923500180244446,
  0.9392499923706055,
  0.9702500104904175,
  0.9801999926567078],
 'loss': [0.686073362827301,
  0.2829414904117584,
  0.16899336874485016,
  0.0931553989648819,
  0.10930561274290085],
 'val_accuracy': [0.8845999836921692,
  0.8830000162124634,
  0.8899999856948853,
  0.8880000114440918,
  0.8920000195503235],
 'val_loss': [0.2827051877975464,
  0.2789526581764221,
  0.3509962856769562,
  0.378332257270813,
  0.3698258399963379]}

In [31]:
# output is greater than .7 consider as positive rating
#output is less than .7 consider as negative  rating.
model.predict(X_test)

array([[0.09938464],
       [0.9995492 ],
       [0.99406743],
       ...,
       [0.07264784],
       [0.08007413],
       [0.3054911 ]], dtype=float32)