In [13]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, GRU, SimpleRNN, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [14]:
dataset = pd.read_csv('../../dataset/Tweets.csv')

In [15]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)  # Remove URLs
    text = re.sub(r"@\w+", "", text)    # Remove mentions
    text = re.sub(r"[^A-Za-z\s]", "", text)  # Remove special characters
    text = text.lower().strip()  # Convert to lowercase and strip whitespace
    return text

In [16]:
dataset = dataset[['text', 'airline_sentiment']].dropna()
dataset['cleaned_text'] = dataset['text'].apply(clean_text)

In [17]:
label_encoder = LabelEncoder()
dataset['encoded_sentiment'] = label_encoder.fit_transform(dataset['airline_sentiment'])


In [18]:
max_vocab_size = 10000
max_sequence_length = 50
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(dataset['cleaned_text'])
sequences = tokenizer.texts_to_sequences(dataset['cleaned_text'])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', truncating='post')


In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, 
    dataset['encoded_sentiment'], 
    test_size=0.2, 
    random_state=42
)

In [20]:
lstm_model = Sequential([
    Embedding(input_dim=max_vocab_size, output_dim=64, input_length=max_sequence_length),
    LSTM(64, return_sequences=True),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])



In [21]:
lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [22]:
lstm_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32, verbose=1)


Epoch 1/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 33ms/step - accuracy: 0.6053 - loss: 0.9319 - val_accuracy: 0.7193 - val_loss: 0.6801
Epoch 2/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 32ms/step - accuracy: 0.7078 - loss: 0.6553 - val_accuracy: 0.7483 - val_loss: 0.6248
Epoch 3/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.8062 - loss: 0.4941 - val_accuracy: 0.7821 - val_loss: 0.5553
Epoch 4/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.8528 - loss: 0.4221 - val_accuracy: 0.7821 - val_loss: 0.6346
Epoch 5/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 31ms/step - accuracy: 0.8877 - loss: 0.3627 - val_accuracy: 0.7910 - val_loss: 0.6069


<keras.src.callbacks.history.History at 0x2af17fca300>

In [23]:
lstm_loss, lstm_accuracy = lstm_model.evaluate(X_test, y_test, verbose=0)
print(f"LSTM Model Test Loss: {lstm_loss}")
print(f"LSTM Model Test Accuracy: {lstm_accuracy}")

LSTM Model Test Loss: 0.6069058179855347
LSTM Model Test Accuracy: 0.7909836173057556


In [24]:
gru_model = Sequential([
    Embedding(input_dim=max_vocab_size, output_dim=64, input_length=max_sequence_length),
    GRU(64, return_sequences=True),
    Dropout(0.2),
    GRU(32),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])

In [25]:
gru_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
gru_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32, verbose=1)

Epoch 1/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - accuracy: 0.6181 - loss: 0.9411 - val_accuracy: 0.6452 - val_loss: 0.8969
Epoch 2/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 31ms/step - accuracy: 0.6166 - loss: 0.9176 - val_accuracy: 0.7370 - val_loss: 0.6018
Epoch 3/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.7801 - loss: 0.5563 - val_accuracy: 0.7920 - val_loss: 0.5557
Epoch 4/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - accuracy: 0.8561 - loss: 0.4039 - val_accuracy: 0.7893 - val_loss: 0.5742
Epoch 5/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 32ms/step - accuracy: 0.9051 - loss: 0.2871 - val_accuracy: 0.7923 - val_loss: 0.5995


<keras.src.callbacks.history.History at 0x2af0f0c8aa0>

In [27]:
gru_loss, gru_accuracy = gru_model.evaluate(X_test, y_test, verbose=0)
print(f"GRU Model Test Loss: {gru_loss}")
print(f"GRU Model Test Accuracy: {gru_accuracy}")

GRU Model Test Loss: 0.5995177030563354
GRU Model Test Accuracy: 0.7923497557640076


In [28]:
rnn_model = Sequential([
    Embedding(input_dim=max_vocab_size, output_dim=64, input_length=max_sequence_length),
    SimpleRNN(64, return_sequences=True),
    Dropout(0.2),
    SimpleRNN(32),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])

In [29]:
rnn_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [30]:
rnn_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32, verbose=1)

Epoch 1/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - accuracy: 0.6001 - loss: 0.9123 - val_accuracy: 0.6667 - val_loss: 0.7707
Epoch 2/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.6827 - loss: 0.7547 - val_accuracy: 0.6776 - val_loss: 0.7673
Epoch 3/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.7109 - loss: 0.7085 - val_accuracy: 0.6452 - val_loss: 0.8431
Epoch 4/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.6142 - loss: 0.8769 - val_accuracy: 0.6452 - val_loss: 0.8367
Epoch 5/5
[1m366/366[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.6234 - loss: 0.8453 - val_accuracy: 0.6452 - val_loss: 0.8379


<keras.src.callbacks.history.History at 0x2af1c8f0dd0>

In [31]:
rnn_loss, rnn_accuracy = rnn_model.evaluate(X_test, y_test, verbose=0)
print(f"SimpleRNN Model Test Loss: {rnn_loss}")
print(f"SimpleRNN Model Test Accuracy: {rnn_accuracy}")

SimpleRNN Model Test Loss: 0.8378779292106628
SimpleRNN Model Test Accuracy: 0.6451502442359924
