In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, classification_report

# Read the data
df = pd.read_csv('/content/SemEval/pd2.csv', delimiter=',')
val_df = pd.read_csv('/content/SemEval/output_val.csv', delimiter=',')
test_df = pd.read_csv('/content/SemEval/output_test.csv', delimiter=',')

# Emotion mapping
emotion_mapping = {'disgust': 0, 'contempt': 1, 'anger': 2, 'neutral': 3, 'joy': 4, 'sadness': 5, 'fear': 6, 'surprise': 7}
df['label'] = df['emotion'].map(emotion_mapping)
val_df['label'] = val_df['emotion'].map(emotion_mapping)

# Drop NaN values
df = df.dropna()

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(df['utterance'], df['label'], test_size=0.2, random_state=42)

# Tokenize using Keras Tokenizer
max_words = 10000
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)

# Pad sequences for equal length input to LSTM
max_len = max(len(seq) for seq in X_train_seq)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post')
X_val_pad = pad_sequences(X_val_seq, maxlen=max_len, padding='post')

# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

# Build LSTM model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(64, activation='relu'))
model.add(Dense(8, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
model.fit(X_train_pad, y_train_encoded, epochs=10, batch_size=32, validation_data=(X_val_pad, y_val_encoded), callbacks=[early_stopping])

# Evaluate the model on the validation set
val_predictions = np.argmax(model.predict(X_val_pad), axis=-1)
val_accuracy = accuracy_score(y_val_encoded, val_predictions)
print("LSTM Accuracy:", val_accuracy)
print("LSTM Classification Report:\n", classification_report(y_val_encoded, val_predictions))
# Calculate and print the weighted F1 score for validation set
val_weighted_f1 = f1_score(y_val_encoded, val_predictions, average='weighted')
print("LSTM Weighted F1:", val_weighted_f1)

Attention Mechanisms

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, MultiHeadAttention, Dropout, LayerNormalization, Concatenate, GlobalAveragePooling1D, Bidirectional, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, classification_report, f1_score

max_len = max(len(seq) for seq in X_train_pad)

# Pad or truncate sequences to the determined max_len
X_train_pad = pad_sequences(X_train_pad, maxlen=max_len, padding='post', truncating='post')
X_val_pad = pad_sequences(X_val_pad, maxlen=max_len, padding='post', truncating='post')




input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=max_words, output_dim=embedding_dim)(input_layer)

# Self-Attention Mechanism
self_attention = MultiHeadAttention(num_heads=8, key_dim=embedding_dim//8)(value=embedding_layer, query=embedding_layer, attention_mask=None)
self_attention = Dropout(0.5)(self_attention)
self_attention = LayerNormalization(epsilon=1e-6)(embedding_layer + self_attention)

# Additive Attention Mechanism
lstm_output = Bidirectional(LSTM(128, return_sequences=True))(self_attention)
additive_attention = MultiHeadAttention(num_heads=8, key_dim=embedding_dim//8)(value=lstm_output, query=lstm_output, attention_mask=None)
additive_attention = Dropout(0.5)(additive_attention)
additive_attention = LayerNormalization(epsilon=1e-6)(lstm_output + additive_attention)

# Concatenate outputs and apply attention
merged = Concatenate()([self_attention, additive_attention])
pooled = GlobalAveragePooling1D()(merged)

dense_layer = Dense(256, activation='relu')(pooled)
dense_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(8, activation='softmax')(dense_layer)

model_attention = Model(inputs=input_layer, outputs=output_layer)

# Compile the model with learning rate
learning_rate_attention = 0.0001  # Adjust the learning rate as needed
optimizer_attention = Adam(learning_rate=learning_rate_attention)
model_attention.compile(optimizer=optimizer_attention, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping_attention = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with attention mechanisms
model_attention.fit(X_train_pad, y_train_encoded, epochs=20, batch_size=64, validation_data=(X_val_pad, y_val_encoded), callbacks=[early_stopping_attention])

# Evaluate the model with attention mechanisms on the validation set
val_predictions_attention = np.argmax(model_attention.predict(X_val_pad), axis=-1)
val_accuracy_attention = accuracy_score(y_val_encoded, val_predictions_attention)
print("Model with Attention Mechanisms Accuracy:", val_accuracy_attention)
print("Model with Attention Mechanisms Classification Report:\n", classification_report(y_val_encoded, val_predictions_attention))

# Calculate and print the weighted F1 score for validation set
val_weighted_f1_attention = f1_score(y_val_encoded, val_predictions_attention, average='weighted')
print("Model with Attention Mechanisms Weighted F1:", val_weighted_f1_attention)
