In [1]:
import os 

os.chdir("../..")

from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoModelForSequenceClassification
from peft import get_peft_model, LoraConfig, TaskType
import evaluate
import numpy as np
from task1.config import ProjectPaths
import pandas as pd
import torch

paths = ProjectPaths()

# === 3. Set device ===
device = "mps" if torch.backends.mps.is_available() else "cpu"

# === 4. Load and preprocess data ===
def load_datasets(path):
    df = pd.read_csv(path, sep='\t')
    df = df[df['label'].isin(['SUBJ', 'OBJ'])].copy()
    df['label'] = df['label'].map({'OBJ': 0, 'SUBJ': 1})
    df = df[['sentence', 'label']]
    return Dataset.from_pandas(df)

train_dataset = load_datasets(paths.english_data_dir / "train_en.tsv")
val_dataset   = load_datasets(paths.english_data_dir / "dev_en.tsv")
test_dataset  = load_datasets(paths.english_data_dir / "dev_test_en.tsv")
competition_test_dataset = load_datasets(paths.english_data_dir / "test_en_labeled.tsv")

W0615 10:51:12.747000 28716 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
dataset = load_dataset("rotten_tomatoes")
train_dataset_stage1 = dataset["train"]
val_dataset_stage1 = dataset["validation"]

small_train_dataset = train_dataset_stage1.select(range(1000))  # Use only 1000 examples for training
small_val_dataset = val_dataset_stage1.select(range(200))

In [4]:
model_name = "microsoft/deberta-v3-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


STAGE 1 FINETUNE

In [3]:
def tokenize_stage1(examples):
    # The text column in this dataset is named 'text'
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

In [5]:
tokenized_train_dataset = small_train_dataset.map(tokenize_stage1, batched=True)
tokenized_val_dataset = small_val_dataset.map(tokenize_stage1, batched=True)

# The 'label' column is already correct, just need to set the format
tokenized_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
tokenized_val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# The Trainer works best when the label column is named 'labels'
tokenized_train_dataset = tokenized_train_dataset.rename_column("label", "labels")
tokenized_val_dataset = tokenized_val_dataset.rename_column("label", "labels")

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [6]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=16,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [2]:
f1 = evaluate.load("f1")
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
        "precision": precision.compute(predictions=preds, references=labels, average="macro")["precision"],
        "recall": recall.compute(predictions=preds, references=labels, average="macro")["recall"],
    }

In [9]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=5,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    fp16=True,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    compute_metrics=compute_metrics
)

# === 10. Train ===
trainer.train()

trainer.save_model("./results/stage1-rotten-tomatoes/final_checkpoint")

STAGE 2

In [6]:
def tokenize_fn(examples):
    return tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)
competition_test_dataset = competition_test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")
competition_test_dataset = competition_test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
competition_test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/830 [00:00<?, ? examples/s]

Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/484 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [12]:
from peft import PeftModel

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model = PeftModel.from_pretrained(base_model, "./results/stage1-rotten-tomatoes/final_checkpoint")

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
lora_config = LoraConfig(
    r=32,
    lora_alpha=16,
    lora_dropout=0.05,
    task_type=TaskType.SEQ_CLS,
    target_modules=["query_proj", "key_proj", "value_proj", "dense"] 
)

model = get_peft_model(model, lora_config).to(device)

In [14]:
# === 8. TrainingArguments ===
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# === 10. Train ===
trainer.train()

# === 11. Evaluate on test set ===
trainer.evaluate(eval_dataset=test_dataset)

NameError: name 'training_args' is not defined