<a href="https://colab.research.google.com/github/Tariquzzaman-faisal/VITD/blob/main/GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mounting to drive

In [46]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Imports

In [47]:
!pip install tensorflow



In [48]:
!pip install fasttext



In [49]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional, GlobalMaxPool1D, Input, Flatten, MaxPooling1D, SpatialDropout1D, Activation

from keras.callbacks import EarlyStopping

from numpy import array
from sklearn.metrics import classification_report

import gensim
from gensim import models
from gensim.models import Word2Vec
import fasttext.util
import pandas as pd
import numpy as np

# Loading Model

In [50]:
fasttext_model = fasttext.load_model("/content/drive/MyDrive/Research/Shared Task/Violence Inciting Text Detection (VITD) Bangla/notebooks/Tariq/fasttext/model_bn_300.bin")



# Load Dataset

In [51]:
train_dataset = pd.read_csv("/content/drive/MyDrive/Research/Shared Task/Violence Inciting Text Detection (VITD) Bangla/dataset/task datasets/original/train.csv")
val_dataset = pd.read_csv("/content/drive/MyDrive/Research/Shared Task/Violence Inciting Text Detection (VITD) Bangla/dataset/task datasets/original/dev.csv")
test_dataset = pd.read_csv("/content/drive/MyDrive/Research/Shared Task/Violence Inciting Text Detection (VITD) Bangla/dataset/task datasets/original/test.csv")

In [52]:
print(f'train: {train_dataset.shape}\nval: {val_dataset.shape}\ntest: {test_dataset.shape}')

train: (2700, 2)
val: (1330, 2)
test: (2016, 2)


# Oversampling

In [53]:
# train_dataset['label'].value_counts()

In [54]:
# import math
# # Find the maximum class frequency
# max_class_frequency = train_dataset['label'].value_counts().max()

# # Group the dataset by labels
# grouped = train_dataset.groupby('label')

# resampled_data = []
# for label, group in grouped:
#     if len(group) < max_class_frequency:
#         oversampled_group = group.sample(max_class_frequency, replace=True, random_state=42)
#         resampled_data.append(oversampled_group)
#     else:
#         resampled_data.append(group)

# # Concatenate the resampled groups to create the balanced dataset
# balanced_dataset = pd.concat(resampled_data)

# # Shuffle the dataset to ensure randomness
# balanced_dataset = balanced_dataset.sample(frac=1, random_state=42).reset_index(drop=True)

In [55]:
# balanced_dataset.shape

In [56]:
# balanced_dataset['label'].value_counts()

In [57]:
# train_dataset = balanced_dataset
# train_dataset['label'].value_counts()

In [58]:
train_x = train_dataset['text']
train_y = train_dataset['label']

val_x = val_dataset['text']
val_y = val_dataset['label']

test_x = test_dataset['text']
test_y = test_dataset['label']

# Embedding Setup

In [59]:
tokenizer=Tokenizer(oov_token = "<OOV>", split=' ') # Splitting text based on whitespace and adding "Out of vocabulary"
tokenizer.fit_on_texts(train_x) # Using the tokenizer on out train dataset to tokenize the train dataset
train_encoded=tokenizer.texts_to_sequences(train_x)
# print(train_encoded)


In [60]:
train_padded= pad_sequences(train_encoded, padding='post', maxlen=256)
# print(train_padded)

In [61]:
train_padded.shape[1]

256

In [62]:
# padding df_test
test_encoded=tokenizer.texts_to_sequences(test_x)
test_padded= pad_sequences(test_encoded, padding='post', maxlen=train_padded.shape[1])

In [63]:
# padding df_validation
val_encoded=tokenizer.texts_to_sequences(val_x)
val_padded= pad_sequences(val_encoded, padding='post', maxlen=train_padded.shape[1])

In [64]:
# function that takes word vector as input and returned an embedding layer
def embedding_creation(EMBEDDING_DIM, word_vectors):
  vocabulary_size=len(tokenizer.word_index)+1
  word_index=tokenizer.word_index
  embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM))

  for word, i in word_index.items():
    try:
      embedding_vector=word_vectors[word] # taking the word vector of all the words in the index
      embedding_matrix[i]=embedding_vector # inserting the vector of the word to the embeddings matrix,  index wise
    except KeyError:
      embedding_matrix[i]=np.random.normal(0,np.sqrt(0.25),EMBEDDING_DIM)
      """
      The strategy of generating random vectors for missing words (KeyError)
      in the embedding matrix is useful because it provides a way to
      include out-of-vocabulary words in the representation,
        prevents loss of information, helps with stable training, and
        ensures a complete embedding matrix for neural network models.
      """
  embedding_layer=Embedding(vocabulary_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False)

  return embedding_layer

In [65]:
EMBEDDING_DIM = 300
wv = fasttext_model
IFT = embedding_creation(EMBEDDING_DIM, wv)
# gets the embedding layer from the word vectors using EMBEDDING_DIM as dim size

In [66]:
max_length = train_padded.shape[1]
vocabulary_size = len(tokenizer.word_index) + 1
# creating a randomly initialized embedding layer (RE)
RE = Embedding(vocabulary_size, EMBEDDING_DIM,input_length = max_length, trainable=True)

# Early Stopping

In [67]:
earlystop_callback = EarlyStopping(
    monitor ="val_loss",
    min_delta=0,
    patience=3,
    verbose=1,
    mode="min",
    restore_best_weights=True,
)

In [68]:
emb_X_name_collection = [ [IFT, 'IFT']]
"""
IFT = embedding_creation(EMBEDDING_DIM, wv)
# IFT has the embedding layer from the word vectors using EMBEDDING_DIM as dim size
"""

'\nIFT = embedding_creation(EMBEDDING_DIM, wv)\n# IFT has the embedding layer from the word vectors using EMBEDDING_DIM as dim size\n'

# Model Configuration Orignal

In [69]:
!pip install keras



In [70]:
from tensorflow.keras.layers import Layer, Embedding, Bidirectional, LSTM, GlobalMaxPool1D, Dense
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K


# Attention without maxpool

In [71]:
# # Define the custom attention mechanism as a subclass of Layer
# class AttentionLayer(Layer):
#     def __init__(self, **kwargs):
#         super(AttentionLayer, self).__init__(**kwargs)

#     def build(self, input_shape):
#         self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1),
#                                  initializer='random_normal', trainable=True)
#         super(AttentionLayer, self).build(input_shape)

#     def call(self, x):
#         e = K.tanh(K.dot(x, self.W))  # Calculate alignment scores
#         alpha = K.softmax(e, axis=1)   # Compute attention weights
#         weighted_sum = x * alpha       # Apply attention to input
#         return K.sum(weighted_sum, axis=1)


# Attention for maxpool

In [72]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import GRU, Dense, GlobalMaxPooling1D
# import tensorflow as tf

# # Define the Attention Layer
# class AttentionLayer(tf.keras.layers.Layer):
#     def __init__(self, **kwargs):
#         super(AttentionLayer, self).__init__(**kwargs)

#     def build(self, input_shape):
#         self.W_q = self.add_weight(name='W_q',
#                                   shape=(input_shape[-1], input_shape[-1]),
#                                   initializer='uniform',
#                                   trainable=True)
#         super(AttentionLayer, self).build(input_shape)

#     def call(self, x):
#         q = tf.tanh(tf.matmul(x, self.W_q))
#         a = tf.nn.softmax(q, axis=-1)
#         x = x * a
#         return x


# With attention

In [73]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import GRU, Dense

# num_classes = 3

# # Loop through each embedding layer and create models with attention
# for emb_X_name in emb_X_name_collection:
#     model = Sequential([
#         emb_X_name[0],
#         GRU(100, dropout=0.4, return_sequences=True),  # Use GRU instead of LSTM
#         AttentionLayer(),  # Use the custom attention layer
#         Dense(64, activation='elu'),
#         Dense(32, activation='relu'),
#         Dense(16, activation='relu'),
#         Dense(num_classes, activation='softmax'),  # Use softmax for multi-class classification
#     ],
#     name="Sentiment_Model")

#     model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Without attention

In [74]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.optimizers import Adam

num_classes = 3
learning_rate = 0.001  # Adjust the learning rate as needed

for emb_X_name in emb_X_name_collection:
    model = Sequential([
        emb_X_name[0],
        GRU(150, dropout=0.3, return_sequences=True),
        GlobalMaxPool1D(),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax'),  # Use softmax for multi-class classification
    ],
    name="Sentiment_Model")
    optimizer = Adam(learning_rate=learning_rate)  # Set the learning rate for the optimizer
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])  # Use sparse_categorical_crossentropy

# Attention + Maxpool

In [75]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import GRU, Dense
# from tensorflow.keras.optimizers import Adam
# num_classes = 3

# # Loop through each embedding layer and create models with attention
# for emb_X_name in emb_X_name_collection:
#     model = Sequential([
#         emb_X_name[0],
#         GRU(100, dropout=0.4, return_sequences=True),  # Use GRU instead of LSTM
#         AttentionLayer(),  # Use the custom attention layer
#         GlobalMaxPooling1D(),
#         Dense(64, activation='elu'),
#         Dense(32, activation='relu'),
#         Dense(16, activation='relu'),
#         Dense(num_classes, activation='softmax'),  # Use softmax for multi-class classification
#     ],
#     name="Sentiment_Model")
#     optimizer = Adam(learning_rate=2e-5)  # You can adjust the learning rate as needed
#     model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


In [76]:
model.summary()

Model: "Sentiment_Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 300)         3052200   
                                                                 
 gru_2 (GRU)                 (None, None, 150)         203400    
                                                                 
 global_max_pooling1d_2 (Gl  (None, 150)               0         
 obalMaxPooling1D)                                               
                                                                 
 dense_4 (Dense)             (None, 32)                4832      
                                                                 
 dense_5 (Dense)             (None, 3)                 99        
                                                                 
Total params: 3260531 (12.44 MB)
Trainable params: 208331 (813.79 KB)
Non-trainable params: 3052200 (11.64 MB)
______

# Training

In [77]:
history = model.fit(train_padded, train_y, epochs=100, batch_size=32, validation_data=(val_padded, val_y), callbacks=[earlystop_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 8: early stopping


In [78]:
prediction = model.predict(test_padded)

p = []
for i in range(len(prediction)):
    a = []
    for j in range(3):
        a.append(round(prediction[i][j]))
    p.append(a)




In [79]:
prediction

array([[0.4404862 , 0.4968601 , 0.06265371],
       [0.6095443 , 0.15504445, 0.23541118],
       [0.9849346 , 0.01266591, 0.00239954],
       ...,
       [0.03784822, 0.02404939, 0.9381024 ],
       [0.03460599, 0.06336546, 0.9020286 ],
       [0.93616647, 0.04867815, 0.01515536]], dtype=float32)

In [80]:
# Determine the maximum column index for each row
max_indices = np.argmax(p, axis=1)

# Create a DataFrame with the max_indices
pred_labels = pd.DataFrame({'Value': max_indices})

In [81]:
pred_labels

Unnamed: 0,Value
0,0
1,0
2,0
3,0
4,1
...,...
2011,0
2012,1
2013,2
2014,2


In [82]:
from sklearn.metrics import classification_report

target_names = ['neutral', 'passive', 'active']
r = classification_report(test_y, pred_labels, output_dict=True)

In [83]:
r

{'0': {'precision': 0.7399103139013453,
  'recall': 0.9032846715328468,
  'f1-score': 0.8134757600657354,
  'support': 1096},
 '1': {'precision': 0.8211206896551724,
  'recall': 0.5299026425591099,
  'f1-score': 0.6441251056635672,
  'support': 719},
 '2': {'precision': 0.602803738317757,
  'recall': 0.6417910447761194,
  'f1-score': 0.6216867469879518,
  'support': 201},
 'accuracy': 0.7440476190476191,
 'macro avg': {'precision': 0.7212782472914249,
  'recall': 0.6916594529560253,
  'f1-score': 0.6930958709057515,
  'support': 2016},
 'weighted avg': {'precision': 0.7552038845733198,
  'recall': 0.7440476190476191,
  'f1-score': 0.7339555655499648,
  'support': 2016}}

In [84]:
df = pd.DataFrame(r)

# Transpose the DataFrame
df = df.transpose()

print(df)

              precision    recall  f1-score      support
0              0.739910  0.903285  0.813476  1096.000000
1              0.821121  0.529903  0.644125   719.000000
2              0.602804  0.641791  0.621687   201.000000
accuracy       0.744048  0.744048  0.744048     0.744048
macro avg      0.721278  0.691659  0.693096  2016.000000
weighted avg   0.755204  0.744048  0.733956  2016.000000


In [86]:
# model.save("/content/drive/MyDrive/Research/Shared Task/Violence Inciting Text Detection (VITD) Bangla/notebooks/Tariq/final/GRU.h5")  # Save the model in an h5 format

  saving_api.save_model(
