<a href="https://colab.research.google.com/github/IamAgranshRastogi/Neural_Network/blob/main/Implement_CNN_text_classifctn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Embedding
from keras.preprocessing import sequence

In [None]:
# Our dictionary will contain only of the top 7000 words appearing most frequently
top_words = 7000
# Now we split our data-set into training and test data
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
# Looking at the nature of training data
print(X_train[0])
print(y_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
1


In [None]:
print('Shape of training data: ')
print(X_train.shape)
print(y_train.shape)

Shape of training data: 
(25000,)
(25000,)


In [None]:
print('Shape of test data: ')
print(X_test.shape)
print(y_test.shape)

Shape of test data: 
(25000,)
(25000,)


In [None]:
# Padding the data samples to a maximum review length in words
import keras
max_words = 450
X_train = keras.utils.pad_sequences(X_train, maxlen=max_words)
X_test = keras.utils.pad_sequences(X_test, maxlen=max_words)

In [None]:
# Building the CNN Model
model = Sequential()      # initilaizing the Sequential nature for CNN model

In [None]:
# Adding the embedding layer which will take in maximum of 450 words as input and provide a 32 dimensional output of those words which belong in the top_words dictionary
model.add(Embedding(top_words, 32, input_length=max_words))
model.add(Conv1D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 450, 32)           224000    
                                                                 
 conv1d (Conv1D)             (None, 450, 32)           3104      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 225, 32)          0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 7200)              0         
                                                                 
 dense (Dense)               (None, 250)               1800250   
                                                                 
 dense_1 (Dense)             (None, 1)                 251       
                                                        

In [None]:
# Fitting the data onto model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2)

Epoch 1/2
196/196 - 36s - loss: 0.5268 - accuracy: 0.6964 - val_loss: 0.3367 - val_accuracy: 0.8565 - 36s/epoch - 183ms/step
Epoch 2/2
196/196 - 37s - loss: 0.2250 - accuracy: 0.9138 - val_loss: 0.2659 - val_accuracy: 0.8891 - 37s/epoch - 188ms/step


<keras.callbacks.History at 0x7fa573d92650>

In [None]:
# Getting score metrics from our model
scores = model.evaluate(X_test, y_test, verbose=0)

In [None]:
# Displays the accuracy of correct sentiment prediction over test data
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 88.91%
