**Loading the Pre-Processed Dataset**

In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Embedding
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences


pre_processed_tweets = pd.read_csv('/content/pre_processed_tweets.csv')

print('Shape of the dataset:', pre_processed_tweets.shape)
pre_processed_tweets.head()

Shape of the dataset: (7613, 2)


Unnamed: 0,tweets,target
0,deeds reason earthquake may allah forgive,1
1,forest fire near ronge sask canada,1
2,residents asked ishelter place notified office...,1
3,people receive wildfires evacuation orders cal...,1
4,got sent photo ruby alaska smoke wildfires pou...,1


**One Hot Encoding for Word Embedding Layers**

In [8]:
vocab_size = 200
encoded_reviews = [one_hot(d, vocab_size) for d in pre_processed_tweets.tweets.astype(str).values]
print(encoded_reviews)

[[73, 60, 107, 68, 18, 194], [170, 152, 197, 111, 137, 40], [194, 31, 78, 79, 189, 79, 170, 130, 79, 2, 73], [161, 134, 181, 170, 2, 107], [171, 3, 173, 7, 199, 18, 181, 98, 93], [58, 102, 107, 183, 146, 178, 48, 143, 60, 152, 55, 181], [114, 156, 89, 51, 142, 46, 101, 180, 43, 25, 189, 78], [155, 2, 116, 152, 1], [160, 154, 170, 22, 52, 139, 154], [171, 100, 84, 184], [172, 161, 9, 184, 138, 194], [155, 69, 94, 156, 185, 30, 101, 113, 31, 69, 94, 131, 131, 148, 101], [126, 101, 116, 35, 94, 85, 45, 182, 119], [114, 45, 29, 77, 45], [120, 93, 66, 175, 71, 9, 121], [4, 155], [96, 168], [45, 78], [71, 111], [185], [20], [48, 180], [96, 75], [38, 89], [159], [60, 92, 119, 54, 175], [35, 116, 168], [96, 104], [13], [144, 156], [16], [147, 154, 138, 44], [42, 132, 178, 89, 177], [57, 121, 73, 148, 82, 36, 44, 104], [167, 171, 36, 44], [99, 140, 77, 152, 116, 184, 44], [38, 161, 37, 45, 66, 49, 172, 130, 65, 115, 28, 36, 43, 44, 151], [80, 173, 160, 36, 44], [183, 121, 72, 70, 27, 36, 44, 10

**Padding**

In [9]:
max_length = 30
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[ 73  60 107 ...   0   0   0]
 [170 152 197 ...   0   0   0]
 [194  31  78 ...   0   0   0]
 ...
 [179 156   0 ...   0   0   0]
 [186  48  70 ...   0   0   0]
 [147  52  27 ...   0   0   0]]


**Train Test Split**

In [10]:
X_train, X_test, y_train, y_test = train_test_split(np.array(padded_reviews), pre_processed_tweets.target.values, test_size=0.33, random_state=42, stratify= pre_processed_tweets.target.values)

print('training dataset size:', X_train.shape)
print('testing dataset size:', X_test.shape)

training dataset size: (5100, 30)
testing dataset size: (2513, 30)


**Modeling**

**Bidirectional LSTM Model**

In [11]:
model = keras.Sequential()
model.add(layers.Embedding(input_dim=vocab_size, output_dim=300, input_length=max_length, name="embedding_layer"))
model.add(layers.Bidirectional(layers.LSTM(128, return_sequences = True, recurrent_dropout=0.2)))
model.add(layers.GlobalMaxPool1D())
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(30, activation = "relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(30, activation = "relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation = 'sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_layer (Embedding)  (None, 30, 300)          60000     
                                                                 
 bidirectional_1 (Bidirectio  (None, 30, 256)          439296    
 nal)                                                            
                                                                 
 global_max_pooling1d_1 (Glo  (None, 256)              0         
 balMaxPooling1D)                                                
                                                                 
 batch_normalization_1 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                      

In [12]:
reduce_lr = ReduceLROnPlateau(
    monitor = 'val_loss', 
    factor = 0.2, 
    verbose = 1, 
    patience = 5,                        
    min_lr = 0.001
)

history = model.fit(
    X_train, 
    y_train, 
    epochs = 10,
    batch_size = 64,
    validation_data = [X_test, y_test],
    verbose = 1,
    callbacks = [reduce_lr]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
