In [None]:
# This will prompt you to select the file to upload
from google.colab import files
uploaded = files.upload()

Saving train.csv to train.csv


In [None]:
# This will prompt you to select the file to upload
from google.colab import files
uploaded = files.upload()

Saving test.csv to test.csv


In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading

In [None]:
import pandas as pd
import torch
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
import wandb

# Step 1: Login to Weights & Biases
#wandb.login()  # If using API key, add it like:
wandb.login(key="97c4966291215c34bb3c2aa45571bc0a38eb1387")

# Initialize the wandb run
wandb.init(
    project="NLPHW6",  # Replace with your project name
    name="roberta-multi-label-run2",  # Optional: Customize run name
    config={
        "learning_rate": 2e-5,
        "epochs": 20,
        "batch_size": 8,
        "weight_decay": 0.01,
    }
)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load train and test datasets
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Define the emotion columns (target labels)
emotion_columns = ['anger', 'anticipation', 'disgust', 'fear', 'joy',
                   'love', 'optimism', 'pessimism', 'sadness', 'surprise', 'trust']

# Replace 'NONE' with 0 only in the test data (not needed for train data)
test_data[emotion_columns] = test_data[emotion_columns].replace('NONE', 0).astype(int)

# Initialize the RoBERTa tokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Tokenize tweets for RoBERTa
def encode_tweets(data):
    return tokenizer(
        list(data["Tweet"]),
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

# Encode train and test datasets
train_encodings = encode_tweets(train_data)
test_encodings = encode_tweets(test_data)

# Prepare labels for the training data
y_train = train_data[emotion_columns].values

# Extract input tensors from the encodings
train_input_ids = train_encodings["input_ids"]
train_attention_mask = train_encodings["attention_mask"]

# Ensure the number of samples matches with labels
print(f"Input IDs shape: {train_input_ids.shape}")
print(f"Labels shape: {y_train.shape}")

# Split the data into train and validation sets
train_input_ids, val_input_ids, train_attention_mask, val_attention_mask, train_labels, val_labels = train_test_split(
    train_input_ids, train_attention_mask, y_train, test_size=0.2, random_state=42
)

# Convert to Hugging Face datasets
train_dataset = Dataset.from_dict({
    "input_ids": train_input_ids,
    "attention_mask": train_attention_mask,
    "labels": torch.tensor(train_labels, dtype=torch.float32)
})

val_dataset = Dataset.from_dict({
    "input_ids": val_input_ids,
    "attention_mask": val_attention_mask,
    "labels": torch.tensor(val_labels, dtype=torch.float32)
})

# Initialize the RoBERTa model for multi-label classification
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=len(emotion_columns),
    problem_type="multi_label_classification"
).to(device)

# Define metrics for evaluation
def compute_metrics(p):
    preds = torch.sigmoid(torch.tensor(p.predictions)).numpy()
    preds_binary = (preds > 0.2).astype(int)
    precision, recall, f1, _ = precision_recall_fscore_support(
        p.label_ids, preds_binary, average='macro'
    )

    # Log metrics to wandb
    wandb.log({"macro_f1": f1, "precision": precision, "recall": recall})

    return {"macro_f1": f1, "precision": precision, "recall": recall}

# Define training arguments with early stopping
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=20,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    logging_dir="./logs",
    logging_steps=10,
    greater_is_better=True,
    report_to="wandb"  # Enable wandb logging
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

# Train the model
trainer.train()

# Evaluate the model on the validation set
val_results = trainer.evaluate()
print(f"Validation Results: {val_results}")

# Prepare the test dataset for predictions
test_dataset = Dataset.from_dict({
    "input_ids": test_encodings["input_ids"],
    "attention_mask": test_encodings["attention_mask"]
})

# Generate predictions on the test set
test_predictions = trainer.predict(test_dataset)
test_logits = test_predictions.predictions

# Apply a threshold of 0.2 to the predictions
test_preds_binary = (torch.sigmoid(torch.tensor(test_logits)) > 0.2).numpy().astype(int)

# Prepare submission file for Kaggle
submission_df = pd.DataFrame(test_preds_binary, columns=emotion_columns)
submission_df["ID"] = test_data["ID"]
submission_df = submission_df[['ID'] + emotion_columns]

# Save submission file
submission_df.to_csv("submission_roberta4.csv", index=False)
print("Submission file created: 'submission_roberta4.csv'")

# Finish the wandb run
wandb.finish()


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmaryannekiarie98[0m ([33mmaryannekiarie98-university-of-texas-at-dallas[0m). Use [1m`wandb login --relogin`[0m to force relogin


  test_data[emotion_columns] = test_data[emotion_columns].replace('NONE', 0).astype(int)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



Input IDs shape: torch.Size([7724, 128])
Labels shape: (7724, 11)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Macro F1,Precision,Recall
1,0.3312,0.321387,0.508463,0.435124,0.624358
2,0.2941,0.305198,0.573599,0.522469,0.712052
3,0.2462,0.309749,0.588151,0.542837,0.677417
4,0.2238,0.310862,0.592036,0.534487,0.685302
5,0.2065,0.322582,0.593096,0.511629,0.724158
6,0.1653,0.331143,0.590901,0.525704,0.683767
7,0.1512,0.349879,0.585173,0.527607,0.661876
8,0.1363,0.372124,0.587215,0.521216,0.682587


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Results: {'eval_loss': 0.32258230447769165, 'eval_macro_f1': 0.5930958363040367, 'eval_precision': 0.5116294372897398, 'eval_recall': 0.7241581731755563, 'eval_runtime': 10.8314, 'eval_samples_per_second': 142.641, 'eval_steps_per_second': 8.955, 'epoch': 8.0}
Submission file created: 'submission_roberta4.csv'


0,1
eval/loss,▃▁▁▂▃▄▆█▃
eval/macro_f1,▁▆████▇██
eval/precision,▁▇█▇▆▇▇▇▆
eval/recall,▁▇▅▅█▅▄▅█
eval/runtime,▁█▇▇█▇▇█▅
eval/samples_per_second,█▁▂▂▁▂▂▁▄
eval/steps_per_second,█▁▂▂▁▂▂▁▄
macro_f1,▁▆████▇██
precision,▁▇█▇▆▇▇▇▆
recall,▁▇▅▅█▅▄▅█

0,1
eval/loss,0.32258
eval/macro_f1,0.5931
eval/precision,0.51163
eval/recall,0.72416
eval/runtime,10.8314
eval/samples_per_second,142.641
eval/steps_per_second,8.955
macro_f1,0.5931
precision,0.51163
recall,0.72416
