In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, GRU, Bidirectional, Dense, Dropout, SpatialDropout1D
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
url = "https://github.com/MuhammadYaseenKhan/Urdu-Sentiment-Corpus/raw/master/urdu-sentiment-corpus-v1.tsv"
df = pd.read_csv(url, sep='\t', names=['text', 'label'])

# Preprocessing
# Convert labels to binary (P -> 1, N -> 0)
df['label'] = df['label'].apply(lambda x: 1 if x == 'P' else 0)

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
X = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(X, maxlen=100)  # Assuming max length of 100 for sequences

y = df['label'].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Define the models
def build_rnn_model(num_layers, dropout_rate):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100, input_length=100))
    for _ in range(num_layers):
        model.add(LSTM(128, return_sequences=True))
        model.add(Dropout(dropout_rate))
    model.add(LSTM(128))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def build_gru_model(num_layers, dropout_rate):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100, input_length=100))
    for _ in range(num_layers):
        model.add(GRU(128, return_sequences=True))
        model.add(Dropout(dropout_rate))
    model.add(GRU(128))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def build_lstm_model(num_layers, dropout_rate):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100, input_length=100))
    for _ in range(num_layers):
        model.add(LSTM(128, return_sequences=True))
        model.add(Dropout(dropout_rate))
    model.add(LSTM(128))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def build_bilstm_model(num_layers, dropout_rate):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100, input_length=100))
    for _ in range(num_layers):
        model.add(Bidirectional(LSTM(128, return_sequences=True)))
        model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(128)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Train and evaluate models with different hyperparameters
models = {
    "RNN": build_rnn_model,
    "GRU": build_gru_model,
    "LSTM": build_lstm_model,
    "BiLSTM": build_bilstm_model
}

results = {}

for model_name, model_builder in models.items():
    for num_layers in [2, 3]:
        for dropout_rate in [0.3, 0.7]:
            model = model_builder(num_layers, dropout_rate)
            history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=0)
            y_pred = (model.predict(X_test) > 0.5).astype("int32")
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)
            results[(model_name, num_layers, dropout_rate)] = {
                "Accuracy": accuracy,
                "Precision": precision,
                "Recall": recall,
                "F1 Score": f1
            }
            print(f"{model_name} (Layers: {num_layers}, Dropout: {dropout_rate}):")
            print("Accuracy:", accuracy)
            print("Precision:", precision)
            print("Recall:", recall)
            print("F1 Score:", f1)
            print()

# Print results in a table
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.index = pd.MultiIndex.from_tuples(results_df.index, names=['Model', 'Layers', 'Dropout'])
print(results_df)


RNN (Layers: 2, Dropout: 0.3):
Accuracy: 0.5776892430278885
Precision: 0.5563380281690141
Recall: 0.6475409836065574
F1 Score: 0.5984848484848485

RNN (Layers: 2, Dropout: 0.7):
Accuracy: 0.5657370517928287
Precision: 0.5802469135802469
Recall: 0.38524590163934425
F1 Score: 0.4630541871921182

RNN (Layers: 3, Dropout: 0.3):
Accuracy: 0.5617529880478087
Precision: 0.5714285714285714
Recall: 0.39344262295081966
F1 Score: 0.4660194174757281

RNN (Layers: 3, Dropout: 0.7):
Accuracy: 0.5776892430278885
Precision: 0.5444444444444444
Recall: 0.8032786885245902
F1 Score: 0.6490066225165563

GRU (Layers: 2, Dropout: 0.3):
Accuracy: 0.6095617529880478
Precision: 0.5714285714285714
Recall: 0.7868852459016393
F1 Score: 0.6620689655172414

GRU (Layers: 2, Dropout: 0.7):
Accuracy: 0.5657370517928287
Precision: 0.5371428571428571
Recall: 0.7704918032786885
F1 Score: 0.632996632996633

GRU (Layers: 3, Dropout: 0.3):
Accuracy: 0.601593625498008
Precision: 0.5785714285714286
Recall: 0.6639344262295082
F

In [None]:
results_df.to_csv('results.csv')
