In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, GlobalMaxPool1D
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import pandas as pd

In [2]:
from google.colab import files

In [3]:
uploaded = files.upload()

Saving test.csv to test.csv
Saving train.csv to train.csv


In [7]:
# Load data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Parameters for tokenization and padding
MAX_NUM_WORDS = 10000  # Vocabulary size
MAX_SEQUENCE_LENGTH = 100  # Maximum length of sequences (tweets)

# Preprocessing the text data in the train dataset
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(train_df['Tweet'])

X_train = tokenizer.texts_to_sequences(train_df['Tweet'])
X_train = pad_sequences(X_train, maxlen=MAX_SEQUENCE_LENGTH)

# Extract the target labels
y_train = train_df[['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']].values

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)



# Building a simple feedforward neural network model
model = Sequential()
model.add(Embedding(MAX_NUM_WORDS, 128, input_length=MAX_SEQUENCE_LENGTH))
model.add(GlobalMaxPool1D())
model.add(Dense(64, activation='relu'))
model.add(Dense(11, activation='sigmoid'))  # 11 outputs for 11 emotions

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Custom F1 score callback
class F1ScoreCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        y_val_pred = (self.model.predict(X_val) > 0.5).astype(int)
        f1 = f1_score(y_val, y_val_pred, average='macro')
        print(f" - val_f1_score: {f1:.4f}")

# Train the model with F1 score callback
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val),
                    callbacks=[F1ScoreCallback()])

Epoch 1/5




[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
 - val_f1_score: 0.0000
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.2370 - loss: 0.5214 - val_accuracy: 0.4447 - val_loss: 0.4677
Epoch 2/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
 - val_f1_score: 0.1926
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.3777 - loss: 0.4550 - val_accuracy: 0.4861 - val_loss: 0.4135
Epoch 3/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
 - val_f1_score: 0.3725
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5033 - loss: 0.3620 - val_accuracy: 0.5327 - val_loss: 0.3623
Epoch 4/5
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
 - val_f1_score: 0.4003
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.5948 - loss: 0.2884 - val_accuracy: 0.4

In [9]:
uploaded = files.upload()

Saving sample_submission.csv to sample_submission.csv


In [10]:
submission_df = pd.read_csv('sample_submission.csv')

# Preprocess the test data for predictions
X_test = tokenizer.texts_to_sequences(test_df['Tweet'])
X_test = pad_sequences(X_test, maxlen=MAX_SEQUENCE_LENGTH)

# Generate predictions for the test data
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

# Prepare the submission file
submission_df.iloc[:, 1:] = y_pred_binary  # Exclude ID column, update rest
submission_df.to_csv('submission.csv', index=False)

[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [11]:
from google.colab import files

# Download the saved submission file
files.download("submission.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>