# Importing Libraries

In [None]:
!pip uninstall numpy gensim

In [None]:
!pip install numpy gensim

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec, FastText
from tensorflow.keras.callbacks import EarlyStopping
from keras import backend as K
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, SimpleRNN, Bidirectional, LSTM, GRU, Layer


In [None]:
# Load and Preprocess Data
data = pd.read_excel('/kaggle/input/urdu-dataset1/Proposed Dataset.xlsx')
data.dropna(inplace=True)
data.head()

In [None]:
# Tokenization
tokenizer = Tokenizer()
#data['sents'] = data['no stop'].apply(lambda x: str(x).split())
tokenizer.fit_on_texts(data['sents'])
sequences = tokenizer.texts_to_sequences(data['sents'])

In [None]:
# Pad Sequences
vocab_size = len(tokenizer.word_index) + 1
maxlen = max(data['sents'].apply(len))
padded_sequences = pad_sequences(sequences, maxlen=maxlen, padding='post')

In [None]:
fastext = FastText(data['sents'], vector_size=100, window=3, min_count=1, workers=4, epochs=100)

In [None]:
fastext_embedding_matrix = np.zeros((vocab_size, 100))

for word, i in tokenizer.word_index.items():
    fastext_embedding_matrix[i] = fastext.wv[word] if word in fastext.wv else np.zeros(100)

In [None]:
test_data=pd.read_csv('/kaggle/input/urdu-dataset1/train.csv')
test_data.dropna(inplace=True)
test_data.head()

In [None]:
stopwords = ['آئی', 'آئے', 'آج', 'آخر', 'آخرکبر', 'آدهی', 'آًب', 'آٹھ', 'آیب', 'اة', 'اخبزت', 'اختتبم',  'ادھر', 'ارد', 'اردگرد', 'ارکبى', 'اش', 'اضتعوبل', 'اضتعوبلات', 'اضطرذ', 'اضکب', 'اضکی',  'اضکے', 'اطراف', 'اغیب', 'افراد', 'الگ', 'اور', 'اوًچب', 'اوًچبئی', 'اوًچی', 'اوًچے', 'اى',  'اً', 'اًذر', 'اًہیں', 'اٹھبًب', 'اپٌب', 'اپٌے', 'اچھب', 'اچھی', 'اچھے', 'اکثر', 'اکٹھب',  'اکٹھی', 'اکٹھے', 'اکیلا', 'اکیلی', 'اکیلے', 'اگرچہ', 'اہن', 'ایطے', 'ایک', 'ب', 'ت', 'تبزٍ',  'تت', 'تر', 'ترتیت', 'تریي', 'تعذاد', 'تن', 'تو', 'توبم', 'توہی', 'توہیں', 'تٌہب', 'تک',  'تھب', 'تھوڑا', 'تھوڑی', 'تھوڑے', 'تھی', 'تھے', 'تیي', 'ثب', 'ثبئیں', 'ثبترتیت', 'ثبری', 'ثبرے',  'ثبعث', 'ثبلا', 'ثبلترتیت', 'ثبہر', 'ثدبئے', 'ثرآں', 'ثراں', 'ثرش', 'ثعذ', 'ثغیر', 'ثلٌذ',  'ثلٌذوثبلا', 'ثلکہ', 'ثي', 'ثٌب', 'ثٌبرہب', 'ثٌبرہی', 'ثٌبرہے', 'ثٌبًب', 'ثٌذ', 'ثٌذکرو',  'ثٌذکرًب', 'ثٌذی', 'ثڑا', 'ثڑوں', 'ثڑی', 'ثڑے', 'ثھر', 'ثھرا', 'ثھراہوا', 'ثھرپور', 'ثھی',  'ثہت', 'ثہتر', 'ثہتری', 'ثہتریي', 'ثیچ', 'ج', 'خب', 'خبرہب', 'خبرہی', 'خبرہے', 'خبهوظ',  'خبًب', 'خبًتب', 'خبًتی', 'خبًتے', 'خبًٌب', 'خت', 'ختن', 'خجکہ', 'خص', 'خططرذ', 'خلذی',  'خو', 'خواى', 'خوًہی', 'خوکہ', 'خٌبة', 'خگہ', 'خگہوں', 'خگہیں', 'خیطب', 'خیطبکہ', 'در',  'درخبت', 'درخہ', 'درخے', 'درزقیقت', 'درضت', 'دش', 'دفعہ', 'دلچطپ', 'دلچطپی', 'دلچطپیبں',  'دو', 'دور', 'دوراى', 'دوضرا', 'دوضروں', 'دوضری', 'دوضرے', 'دوًوں', 'دکھبئیں', 'دکھبتب',  'دکھبتی', 'دکھبتے', 'دکھبو', 'دکھبًب', 'دکھبیب', 'دی', 'دیب', 'دیتب', 'دیتی', 'دیتے', 'دیر',  'دیٌب', 'دیکھو', 'دیکھٌب', 'دیکھی', 'دیکھیں', 'دے', 'ر', 'راضتوں', 'راضتہ', 'راضتے', 'رریعہ',  'رریعے', 'رکي', 'رکھ', 'رکھب', 'رکھتب', 'رکھتبہوں', 'رکھتی', 'رکھتے', 'رکھی', 'رکھے', 'رہب',  'رہی', 'رہے', 'ز', 'زبصل', 'زبضر', 'زبل', 'زبلات', 'زبلیہ', 'زصوں', 'زصہ', 'زصے', 'زقبئق',  'زقیتیں', 'زقیقت', 'زکن', 'زکویہ', 'زیبدٍ', 'صبف', 'صسیر', 'صفر', 'صورت', 'صورتسبل', 'صورتوں',  'صورتیں', 'ض', 'ضبت', 'ضبتھ', 'ضبدٍ', 'ضبرا', 'ضبرے', 'ضبل', 'ضبلوں', 'ضت', 'ضرور', 'ضرورت',  'ضروری', 'ضلطلہ', 'ضوچ', 'ضوچب', 'ضوچتب', 'ضوچتی', 'ضوچتے', 'ضوچو', 'ضوچٌب', 'ضوچی', 'ضوچیں',  'ضکب', 'ضکتب', 'ضکتی', 'ضکتے', 'ضکٌب', 'ضکی', 'ضکے', 'ضیذھب', 'ضیذھی', 'ضیذھے', 'ضیکٌڈ', 'ضے',  'طرف', 'طریق', 'طریقوں', 'طریقہ', 'طریقے', 'طور', 'طورپر', 'ظبہر', 'ع', 'عذد', 'عظین', 'علاقوں',  'علاقہ', 'علاقے', 'علاوٍ', 'عووهی', 'غبیذ', 'غخص', 'غذ', 'غروع', 'غروعبت', 'غے', 'فرد', 'فی', 'ق',  'قجل', 'قجیلہ', 'قطن', 'لئے', 'لا', 'لازهی', 'لو', 'لوجب', 'لوجی', 'لوجے', 'لوسبت', 'لوسہ',  'لوگ', 'لوگوں', 'لڑکپي', 'لگتب', 'لگتی', 'لگتے', 'لگٌب', 'لگی', 'لگیں', 'لگے', 'لی', 'لیب',  'لیٌب', 'لیں', 'لے', 'ه', 'هتعلق', 'هختلف', 'هسترم', 'هسترهہ', 'هسطوش', 'هسیذ', 'هطئلہ',  'هطئلے', 'هطبئل', 'هطتعول', 'هطلق', 'هعلوم', 'هػتول', 'هلا', 'هوکي', 'هوکٌبت', 'هوکٌہ',  'هٌبضت', 'هڑا', 'هڑًب', 'هڑے', 'هکول', 'هگر', 'هہرثبى', 'هیرا', 'هیری', 'هیرے', 'هیں', 'و',  'وار', 'والے', 'وٍ', 'ًئی', 'ًئے', 'ًب', 'ًبپطٌذ', 'ًبگسیر', 'ًطجت', 'ًقطہ', 'ًو', 'ًوخواى',  'ًکبلٌب', 'ًکتہ', 'ًہ', 'ًہیں', 'ًیب', 'ًے', 'ٓ آش', 'ٹھیک', 'پبئے', 'پبش', 'پبًب', 'پبًچ',  'پر', 'پراًب', 'پطٌذ', 'پل', 'پورا', 'پوچھب', 'پوچھتب', 'پوچھتی', 'پوچھتے', 'پوچھو', 'پوچھوں',  'پوچھٌب', 'پوچھیں', 'پچھلا', 'پھر', 'پہلا', 'پہلی', 'پہلےضی', 'پہلےضے', 'پہلےضےہی', 'پیع',  'چبر', 'چبہب', 'چبہٌب', 'چبہے', 'چلا', 'چلو', 'چلیں', 'چلے', 'چکب', 'چکی', 'چکیں', 'چکے',  'چھوٹب', 'چھوٹوں', 'چھوٹی', 'چھوٹے', 'چھہ', 'چیسیں', 'ڈھوًڈا', 'ڈھوًڈلیب', 'ڈھوًڈو', 'ڈھوًڈًب',  'ڈھوًڈی', 'ڈھوًڈیں', 'ک', 'کئی', 'کئے', 'کب', 'کبفی', 'کبم', 'کت', 'کجھی', 'کرا', 'کرتب',  'کرتبہوں', 'کرتی', 'کرتے', 'کرتےہو', 'کررہب', 'کررہی', 'کررہے', 'کرو', 'کرًب', 'کریں', 'کرے',  'کطی', 'کل', 'کن', 'کوئی', 'کوتر', 'کورا', 'کوروں', 'کورٍ', 'کورے', 'کوطي', 'کوى', 'کوًطب',  'کوًطی', 'کوًطے', 'کھولا', 'کھولو', 'کھولٌب', 'کھولی', 'کھولیں', 'کھولے', 'کہ', 'کہب', 'کہتب',  'کہتی', 'کہتے', 'کہو', 'کہوں', 'کہٌب', 'کہی', 'کہیں', 'کہے', 'کی', 'کیب', 'کیطب', 'کیطرف',  'کیطے', 'کیلئے', 'کیوًکہ', 'کیوں', 'کیے', 'کے', 'کےثعذ', 'کےرریعے', 'گئی', 'گئے', 'گب',  'گرد', 'گروٍ', 'گروپ', 'گروہوں', 'گٌتی', 'گی', 'گیب', 'گے', 'ہر', 'ہن', 'ہو', 'ہوئی', 'ہوئے',  'ہوا', 'ہوبرا', 'ہوبری', 'ہوبرے', 'ہوتب', 'ہوتی', 'ہوتے', 'ہورہب', 'ہورہی', 'ہورہے', 'ہوضکتب',  'ہوضکتی', 'ہوضکتے', 'ہوًب', 'ہوًی', 'ہوًے', 'ہوچکب', 'ہوچکی', 'ہوچکے', 'ہوگئی', 'ہوگئے',  'ہوگیب', 'ہوں', 'ہی', 'ہیں', 'ہے', 'ی', 'یقیٌی', 'یہ', 'یہبں']

In [None]:
test_data['no stop']=test_data['tweet'].apply(lambda x:[word for word in str(x).split() if word not in stopwords])

In [None]:
test_data

In [None]:
test_tokenizer=Tokenizer()
# test_data['sents']=test_data['tweet'].apply(lambda x:str(x).split())

test_tokenizer.fit_on_texts(test_data['no stop'])
test_sequences=tokenizer.texts_to_sequences(test_data['no stop'])

In [None]:
test_padded_sequences=pad_sequences(test_sequences,maxlen=maxlen,padding='post')

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, data['target'], test_size=0.2, random_state=42)

FOR TESTING WITH TRAIN DATASET

In [None]:
xx_test=test_padded_sequences
yy_test=test_data['target']

# CNN

In [None]:
class Attention(Layer):

    def __init__(self, return_sequences=True):
        super(Attention, self).__init__()
        self.return_sequences = return_sequences

    def build(self, input_shape):
        # Add weights for attention mechanism
        self.W = self.add_weight(
            name="att_weight",
            shape=(input_shape[-1], input_shape[-1]),
            initializer="normal",
            trainable=True,
        )
        self.b = self.add_weight(
            name="att_bias",
            shape=(input_shape[-1],),
            initializer="zeros",
            trainable=True,
        )
        self.u = self.add_weight(
            name="att_u",
            shape=(input_shape[-1], 1),
            initializer="normal",
            trainable=True,
        )
        super(Attention, self).build(input_shape)

    def call(self, x):
        # Step 1: Calculate scores
        score = tf.tanh(tf.tensordot(x, self.W, axes=[2, 0]) + self.b)

        # Step 2: Compute attention weights
        attention_weights = tf.nn.softmax(tf.tensordot(score, self.u, axes=[2, 0]), axis=1)

        # Step 3: Apply attention weights to the input
        context_vector = attention_weights * x

        if self.return_sequences:
            return context_vector
        else:
            return tf.reduce_sum(context_vector, axis=1)

    def compute_output_shape(self, input_shape):
        if self.return_sequences:
            return input_shape
        else:
            return (input_shape[0], input_shape[-1])


In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        Conv1D(filters=32, kernel_size=8, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor (e.g., validation loss)
    patience=3,          # Number of epochs with no improvement before stopping
    restore_best_weights=True  # Restore model weights from the epoch with the best value of the monitored quantity
)

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Assuming 'history' is the variable storing the training history
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"CNN_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# RNN

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        Conv1D(filters=32, kernel_size=8, activation='relu'),
        MaxPooling1D(pool_size=2),
        SimpleRNN(100),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"RNN_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# Bi-RNN

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        Bidirectional(SimpleRNN(100)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"Bi-RNN_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# LSTM

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        LSTM(units=64, dropout=0.2, return_sequences=False),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"LSTM_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# Bi-LSTM

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        Bidirectional(LSTM(units=64, dropout=0.2, recurrent_dropout=0.0, return_sequences=False)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"Bi-LSTM_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# GRU

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        GRU(units=64, dropout=0.2, recurrent_dropout=0.0, return_sequences=False),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"GRU_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))

# Bi-GRU

In [None]:
# Define LSTM Model
def build_model(embedding_matrix):
    model = Sequential([
        Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True, input_length=maxlen),
        Attention(return_sequences=True),
        Bidirectional(GRU(units=64, dropout=0.2, recurrent_dropout=0.0, return_sequences=False)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.build((None, maxlen))  # Build the model with input shape
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Build and Train Model
urdu_model = build_model(fastext_embedding_matrix)
urdu_model.summary()

In [None]:
history = urdu_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=500,
    batch_size=32,
    callbacks=[early_stopping]  # Add the early stopping callback here
)

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(X_test, y_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")

In [None]:
# Predictions and Metrics
y_pred = (urdu_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
report = classification_report(y_test, y_pred, output_dict=True)
report_df = pd.DataFrame(report).transpose()

plt.figure(figsize=(10, 6))
sns.heatmap(report_df[:-3].drop(columns=['support']), annot=True, cmap='viridis', fmt=".2f")  # Exclude support and averages
plt.title('Classification Report')
plt.show()

In [None]:
urdu_model.save(f"Bi-GRU_model.keras")

Testing with TRAIN DATASET

In [None]:
# Evaluate Model
eval_results = urdu_model.evaluate(xx_test, yy_test)
print(f"Test Loss: {eval_results[0]}, Test Accuracy: {eval_results[1]}")
# Predictions and Metrics
y_pred = (urdu_model.predict(xx_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(yy_test, y_pred))
print("Precision:", precision_score(yy_test, y_pred))
print("Recall:", recall_score(yy_test, y_pred))
print("F1 Score:", f1_score(yy_test, y_pred))