In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load Dataset
columns = ['id', 'country', 'Label', 'Text']
df = pd.read_csv("/kaggle/input/twitter-entity-sentiment-analysis/twitter_training.csv", names=columns)
df.dropna(inplace=True)

# Encode Labels
le = LabelEncoder()
df['Label'] = le.fit_transform(df['Label'])

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Text'])
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(df['Text'])
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df['Label'], test_size=0.2, random_state=42, stratify=df['Label'])

# Model Architecture
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    BatchNormalization(),
    Bidirectional(LSTM(32)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(4, activation='softmax')
])

# Compile Model
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

# Learning Rate Scheduler
callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-5)

# Train Model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64, callbacks=[callback])

# Evaluate Model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification Report
from sklearn.metrics import classification_report
y_pred = np.argmax(model.predict(X_test), axis=1)
print(classification_report(y_test, y_pred, target_names=le.classes_)) 

Epoch 1/10
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 43ms/step - accuracy: 0.4578 - loss: 1.1923 - val_accuracy: 0.6689 - val_loss: 0.8931 - learning_rate: 0.0010
Epoch 2/10
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 42ms/step - accuracy: 0.8228 - loss: 0.5280 - val_accuracy: 0.8501 - val_loss: 0.4302 - learning_rate: 0.0010
Epoch 3/10
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 42ms/step - accuracy: 0.8996 - loss: 0.2968 - val_accuracy: 0.8711 - val_loss: 0.3729 - learning_rate: 0.0010
Epoch 4/10
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 42ms/step - accuracy: 0.9230 - loss: 0.2213 - val_accuracy: 0.8749 - val_loss: 0.3880 - learning_rate: 0.0010
Epoch 5/10
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.9313 - loss: 0.1896
Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m925/925[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m