<a href="https://colab.research.google.com/github/Aparna156/Hybrid-LSTM-and-CNN-model/blob/main/Hybrid_LSTM_and_CNN_model_On_moviereviews.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense,Conv1D,MaxPooling1D
from keras.layers import LSTM,Dropout
from keras.layers import Embedding
from keras.preprocessing import sequence
from keras.callbacks import ModelCheckpoint
# fix random seed for reproducibility
np.random.seed(7)
from prettytable import PrettyTable
import warnings
warnings.filterwarnings('ignore')

In [3]:
#!pip install tensorflow
#import tensorflow as tf



In [2]:
# load the dataset but only keep the top n words, zero the rest
top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
X_train,X_cv,y_train,y_cv = train_test_split(X_train,y_train,test_size = 0.2)
print("Shape of train data:", X_train.shape)
print("Shape of Test data:", X_test.shape)
print("Shape of CV data:", X_cv.shape)

# truncate and pad input sequences
max_review_length = 600
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
X_cv = sequence.pad_sequences(X_cv,maxlen=max_review_length)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Shape of train data: (20000,)
Shape of Test data: (25000,)
Shape of CV data: (5000,)


In [3]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
filepath="weights_best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max',save_weights_only=True)
callbacks_list = [checkpoint]
model.fit(X_train, y_train, epochs=5, batch_size=256,verbose = 1,callbacks = callbacks_list,validation_data=(X_cv,y_cv))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 600, 32)           320000    
                                                                 
 lstm (LSTM)                 (None, 100)               53200     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 373301 (1.42 MB)
Trainable params: 373301 (1.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/5



Epoch 2/5



Epoch 3/5



Epoch 4/5



Epoch 5/5





<keras.src.callbacks.History at 0x7f201420bfa0>

In [4]:
scores = model.evaluate(X_test, y_test, verbose=1,batch_size = 256)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.28%


In [7]:
# create the model
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
filepath="weights_best_cnn.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max',save_weights_only=True)
callbacks_list = [checkpoint]
model.fit(X_train, y_train, epochs=3, batch_size=256,verbose = 1,callbacks = callbacks_list,validation_data=(X_cv,y_cv))

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 600, 32)           320000    
                                                                 
 conv1d_1 (Conv1D)           (None, 600, 32)           3104      
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 300, 32)           0         
 g1D)                                                            
                                                                 
 lstm_2 (LSTM)               (None, 100)               53200     
                                                                 
 dense_2 (Dense)             (None, 1)                 101       
                                                                 
Total params: 376405 (1.44 MB)
Trainable params: 376405 (1.44 MB)
Non-trainable params: 0 (0.00 Byte)
__________________



Epoch 2/3



Epoch 3/3





<keras.src.callbacks.History at 0x7f20136e6110>

In [8]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 88.15%


In [11]:
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

Shape of X_train: (20000, 600)
Shape of y_train: (20000,)


In [13]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 600, 32)           320000    
                                                                 
 conv1d_4 (Conv1D)           (None, 600, 32)           3104      
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 300, 32)           0         
 g1D)                                                            
                                                                 
 dense_5 (Dense)             (None, 300, 1)            33        
                                                                 
Total params: 323137 (1.23 MB)
Trainable params: 323137 (1.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
# create the model
from tensorflow.keras.layers import Flatten
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
filepath="weights_best_cnn2.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max',save_weights_only=True)
callbacks_list = [checkpoint]
model.fit(X_train, y_train, epochs=5, batch_size=256,verbose = 1,callbacks = callbacks_list,validation_data=(X_cv,y_cv))

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_7 (Embedding)     (None, 600, 32)           320000    
                                                                 
 conv1d_6 (Conv1D)           (None, 600, 32)           3104      
                                                                 
 max_pooling1d_6 (MaxPoolin  (None, 300, 32)           0         
 g1D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 9600)              0         
                                                                 
 dense_8 (Dense)             (None, 100)               960100    
                                                                 
 dense_9 (Dense)             (None, 1)                 101       
                                                      



Epoch 2/5



Epoch 3/5



Epoch 4/5



Epoch 5/5





<keras.src.callbacks.History at 0x7f201a8a09d0>

In [17]:
scores = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 87.07%
