<a href="https://colab.research.google.com/github/Anum-Ilyas9/Assignment3/blob/main/assignment03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
import pandas as pd
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import os

# Install Kaggle (if not already installed)
!pip install kaggle --upgrade --quiet

# Setup Kaggle Environment
import os

# Set up Kaggle credentials securely
kaggle_username = "YOUR_KAGGLE_USERNAME"  # Replace with your Kaggle username
kaggle_key = "YOUR_KAGGLE_API_KEY"        # Replace with your Kaggle API key

# Create the Kaggle directory and API file
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)
with open(os.path.expanduser("~/.kaggle/kaggle.json"), "w") as file:
    file.write(f'{{"username":"{kaggle_username}","key":"{kaggle_key}"}}')

# Set permissions for the API key file
!chmod 600 ~/.kaggle/kaggle.json

# Download the Sarcasm Detection Dataset using Kaggle API
!kaggle datasets download -d rmisra/news-headlines-dataset-for-sarcasm-detection -p ./data --unzip

# Extract the Dataset (only if required, since --unzip is used above)
import zipfile
import os

dataset_zip_path = './data/Sarcasm_Headlines_Dataset_v2.json'  # Adjusted Path for JSON dataset
dataset_extract_path = './data'  # Assuming the dataset is directly available after download

# Create extraction directory if it doesn't exist
os.makedirs(dataset_extract_path, exist_ok=True)

# No need to extract since the dataset is in JSON format and already unzipped by the command above
print("Dataset downloaded successfully!")

# 2. Load Dataset
data = pd.read_json("./data/Sarcasm_Headlines_Dataset_v2.json", lines=True)  # Load the JSON file
data['label'] = data['is_sarcastic'].map({0: 0, 1: 1})  # Map sarcasm labels: 1 for sarcastic, 0 for non-sarcastic


# 3. Train-Test Split
train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['headline'].tolist(), data['label'].tolist(), test_size=0.2, random_state=42
)

# 4. Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=64)  # Reduced max length


train_dataset = Dataset.from_dict({"text": train_texts, "label": train_labels})
val_dataset = Dataset.from_dict({"text": val_texts, "label": val_labels})

# 5. Load Pre-trained Model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# 6. Compute Metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

# 7. Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",  # Disable W&B reporting
    load_best_model_at_end=True,  # Load the best model at the end
    metric_for_best_model='accuracy'  # Define which metric to use for best model
)



# 8. Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset.map(tokenize_function),
    eval_dataset=val_dataset.map(tokenize_function),
    compute_metrics=compute_metrics,
)

# 9. Train Model
print("Starting model fine-tuning...")
trainer.train()

# 10. Evaluate Model
print("Evaluating model...")
metrics = trainer.evaluate()

# 11. Display Metrics
print("\nFine-tuning Results:")
print(f"Accuracy: {metrics['eval_accuracy']:.4f}")
print(f"Precision: {metrics['eval_precision']:.4f}")
print(f"Recall: {metrics['eval_recall']:.4f}")
print(f"F1 Score: {metrics['eval_f1']:.4f}")


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22895 [00:00<?, ? examples/s]

Map:   0%|          | 0/5724 [00:00<?, ? examples/s]

Starting model fine-tuning...


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.201,0.35197,0.922082,0.945377,0.887871,0.915722
2,0.1187,0.336497,0.935709,0.941639,0.922316,0.931877


Evaluating model...



Fine-tuning Results:
Accuracy: 0.9357
Precision: 0.9416
Recall: 0.9223
F1 Score: 0.9319
