In [None]:
# This will prompt you to select the file to upload
from google.colab import files
uploaded = files.upload()

Saving train.csv to train.csv


In [None]:
# This will prompt you to select the file to upload
from google.colab import files
uploaded = files.upload()

Saving test.csv to test.csv


In [None]:
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from datasets import Dataset
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
import pandas as pd
import torch
from torch import nn
import numpy as np

# Check if GPU is available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the train and test datasets from CSV files
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Define the emotion columns (target labels)
emotion_columns = ['anger', 'anticipation', 'disgust', 'fear', 'joy',
                   'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']

# Initialize TF-IDF tokenizer and fit on the training data
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(train_data["Tweet"]).toarray()
X_test = vectorizer.transform(test_data["Tweet"]).toarray()

# Replace 'NONE' values with 0 in the labels
y_train = train_data[emotion_columns].replace('NONE', 0).astype(int).values

# Split the data into training and validation sets
train_inputs, val_inputs, train_labels, val_labels = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

# Convert to tensors for PyTorch
train_inputs_tensor = torch.tensor(train_inputs, dtype=torch.float32).to(device)
val_inputs_tensor = torch.tensor(val_inputs, dtype=torch.float32).to(device)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.float32).to(device)
val_labels_tensor = torch.tensor(val_labels, dtype=torch.float32).to(device)

# Compute class weights for each label
class_weights = np.sum(train_labels == 0, axis=0) / np.sum(train_labels == 1, axis=0)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

# Convert the tensors into Hugging Face datasets
train_dataset = Dataset.from_dict({"input_ids": train_inputs_tensor, "labels": train_labels_tensor})
val_dataset = Dataset.from_dict({"input_ids": val_inputs_tensor, "labels": val_labels_tensor})

# Define the model architecture
class DeeperFeedForwardNN(nn.Module):
    def __init__(self, input_size, num_labels):
        super(DeeperFeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(256, 128)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(128, num_labels)

    def forward(self, input_ids, labels=None):
        x = self.fc1(input_ids)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        logits = self.fc4(x)

        if labels is not None:
            labels = labels.to(device)
            loss_fn = nn.BCEWithLogitsLoss(pos_weight=class_weights_tensor)
            loss = loss_fn(logits, labels)
            return loss, logits
        else:
            return logits

# Initialize the model
model = DeeperFeedForwardNN(input_size=X_train.shape[1], num_labels=len(emotion_columns)).to(device)

# Apply Xavier initialization to the model's weights
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)

model.apply(init_weights)

# Define training arguments with early stopping
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",  # Save the model at the end of each epoch
    learning_rate=1e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=30,  # Set high epoch count, early stopping will prevent overfitting
    weight_decay=0.01,
    load_best_model_at_end=True,  # Load the best model at the end of training
    metric_for_best_model="eval_loss",
    max_grad_norm=1.0,
    logging_dir="./logs",  # Directory for logs
    logging_steps=10,
    greater_is_better=False,  # Minimize eval_loss
    report_to="none"  # Disable logging to external trackers
)

# Initialize the Trainer with early stopping
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]  # Stop after 3 epochs of no improvement
)

# Train the model
trainer.train()

# Evaluate the model on the validation set
val_predictions = trainer.predict(val_dataset)
val_logits = val_predictions.predictions

# Lower the threshold to 0.2 to encourage more positive predictions
val_binary_preds = (torch.sigmoid(torch.tensor(val_logits)) > 0.2).numpy().astype(int)

# Extract the true labels for validation
true_val_labels = val_labels_tensor.cpu().numpy()

# Compute Macro F1-Score on the validation set
macro_f1 = f1_score(true_val_labels[:len(val_binary_preds)], val_binary_preds, average='macro')
print(f"Validation Macro F1-Score: {macro_f1}")

# Prepare submission file for Kaggle (using test set predictions)
test_inputs_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
test_dataset = Dataset.from_dict({"input_ids": test_inputs_tensor})

# Get predictions on the test set
test_predictions = trainer.predict(test_dataset)
test_logits = test_predictions.predictions

# Lower the threshold to 0.2 for the test set as well
test_preds_binary = (torch.sigmoid(torch.tensor(test_logits)) > 0.2).numpy().astype(int)

# Prepare submission file
submission_df = pd.DataFrame(test_preds_binary, columns=emotion_columns)
submission_df["ID"] = test_data["ID"]
submission_df = submission_df[['ID'] + emotion_columns]

# Save submission file
submission_df.to_csv("submissiond.csv", index=False)
print("Submission file created: 'submissiond.csv'")




Epoch,Training Loss,Validation Loss
1,1.0655,1.092212
2,1.1075,1.07841
3,0.9375,0.973409
4,0.8399,0.935653
5,0.7665,0.926028
6,0.7805,0.932488
7,0.6728,0.938063
8,0.6362,0.975589


Validation Macro F1-Score: 0.40117439117659726


Submission file created: 'submissiond.csv'
