In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, GRU, LSTM, Bidirectional, Dense, Dropout

df = pd.read_csv('urdu-sentiment-corpus-v1.tsv', delimiter='\t')
df.columns = ['Tweet', 'Class']
df['Class'] = df['Class'].map({'P': 1, 'N': 0})
df = df.dropna()
df

Unnamed: 0,Tweet,Class
0,میں نے ایٹم بم بنایا ھے ۔۔۔۔او بھائی ایٹم بمب ...,1.0
1,چندے سے انقلاب اور عمران خان وزیر اعظم نہیں بن...,0.0
3,"سرچ انجن گوگل کے نائب صدر نے فضا میں ، 130,000...",1.0
4,ابھی تک اسکی لہریں کبھی کبھی آ جاتی ہیں یار :أْ,1.0
5,گندی زبان اور گٹر جیسے دماغ والے جاهل جیالے ه...,0.0
...,...,...
995,اُس آدمی نے اِس سالار کو کافی معقول ٹپ دی ہے ۔,1.0
996,چچا غالب کی روح سے معذرت کے ساتھہم نے مانا کہ ...,1.0
997,واہ جناب واہ! اچھی رہی۔ جناب خود کو فرشتہ سمجو...,1.0
998,اسلام آباد :پی اے ٹی کا دھرنا ختم، صفائی کے کا...,1.0


In [2]:
X = df['Tweet'].values
y = df['Class'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

max_sequence_length = max([len(seq) for seq in X_train_seq + X_test_seq])
vocab_size = len(tokenizer.word_index) + 1

X_train_padded = pad_sequences(X_train_seq, maxlen=max_sequence_length, padding='post')
X_test_padded = pad_sequences(X_test_seq, maxlen=max_sequence_length, padding='post')


In [3]:
def create_model(model_type, num_layers, dropout_rate):
    model = Sequential()
    model.add(Embedding(vocab_size, 100, input_length=max_sequence_length))
    
    if model_type == 'RNN':
        for _ in range(num_layers):
            model.add(SimpleRNN(64, return_sequences=True))
        model.add(SimpleRNN(64))
    elif model_type == 'GRU':
        for _ in range(num_layers):
            model.add(GRU(64, return_sequences=True))
        model.add(GRU(64))
    elif model_type == 'LSTM':
        for _ in range(num_layers):
            model.add(LSTM(64, return_sequences=True))
        model.add(LSTM(64))
    elif model_type == 'BiLSTM':
        for _ in range(num_layers):
            model.add(Bidirectional(LSTM(64, return_sequences=True)))
        model.add(Bidirectional(LSTM(64)))
    
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))
    
    return model

In [4]:
results = []

for model_type in ['RNN', 'GRU', 'LSTM', 'BiLSTM']:
    for num_layers in [2, 3]:
        for dropout_rate in [0.3, 0.7]:
            print(f"Training {model_type} with {num_layers} layers and dropout {dropout_rate}...")
            model = create_model(model_type, num_layers, dropout_rate)
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
            model.fit(X_train_padded, y_train, epochs=5, batch_size=64, verbose=0)
            y_pred = np.round(model.predict(X_test_padded))

            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred)

            results.append({
                'Model': model_type,
                'Num Layers': num_layers,
                'Dropout': dropout_rate,
                'Accuracy': accuracy,
                'Precision': precision,
                'Recall': recall,
                'F1': f1
            })

Training RNN with 2 layers and dropout 0.3...
Training RNN with 2 layers and dropout 0.7...
Training RNN with 3 layers and dropout 0.3...
Training RNN with 3 layers and dropout 0.7...
Training GRU with 2 layers and dropout 0.3...
Training GRU with 2 layers and dropout 0.7...


  _warn_prf(average, modifier, msg_start, len(result))


Training GRU with 3 layers and dropout 0.3...


  _warn_prf(average, modifier, msg_start, len(result))


Training GRU with 3 layers and dropout 0.7...


  _warn_prf(average, modifier, msg_start, len(result))


Training LSTM with 2 layers and dropout 0.3...


  _warn_prf(average, modifier, msg_start, len(result))


Training LSTM with 2 layers and dropout 0.7...
Training LSTM with 3 layers and dropout 0.3...
Training LSTM with 3 layers and dropout 0.7...
Training BiLSTM with 2 layers and dropout 0.3...
Training BiLSTM with 2 layers and dropout 0.7...
Training BiLSTM with 3 layers and dropout 0.3...
Training BiLSTM with 3 layers and dropout 0.7...


In [5]:
results_df = pd.DataFrame(results)
print(results_df)

     Model  Num Layers  Dropout  Accuracy  Precision    Recall        F1
0      RNN           2      0.3  0.506122   0.511628  0.532258  0.521739
1      RNN           2      0.7  0.457143   0.458716  0.403226  0.429185
2      RNN           3      0.3  0.510204   0.518182  0.459677  0.487179
3      RNN           3      0.7  0.473469   0.477477  0.427419  0.451064
4      GRU           2      0.3  0.493878   0.000000  0.000000  0.000000
5      GRU           2      0.7  0.493878   0.000000  0.000000  0.000000
6      GRU           3      0.3  0.493878   0.000000  0.000000  0.000000
7      GRU           3      0.7  0.493878   0.000000  0.000000  0.000000
8     LSTM           2      0.3  0.640816   0.628571  0.709677  0.666667
9     LSTM           2      0.7  0.636735   0.606061  0.806452  0.692042
10    LSTM           3      0.3  0.591837   0.558252  0.927419  0.696970
11    LSTM           3      0.7  0.636735   0.733333  0.443548  0.552764
12  BiLSTM           2      0.3  0.624490   0.61594