In [None]:
!pip install datasets evaluate -q

In [None]:
import torch
from datasets import DatasetDict, load_dataset
from transformers import pipeline
import pandas as pd
import numpy as np
import time
import math
import tqdm
import evaluate
import wandb
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")

Neither PyTorch nor TensorFlow >= 2.0 have been found.Models won't be available and only tokenizers, configurationand file/data utilities can be used.


ImportError: cannot import name 'AutoModelForCausalLM' from 'transformers' (c:\Users\wchas\anaconda3\Lib\site-packages\transformers\__init__.py)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

Dataset

In [None]:
dataset = load_dataset("dataset(CHANGE)")

wandb

In [None]:
import wandb
wandb.login()
run = wandb.init(
    # Set the project where this run will be logged
    project="NLP_Final_Project",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": 2e-5,
        "epochs": 3,
        "batch_size": 32,
        "architecture": "distilbert-base-uncased",
        "dataset": "hate_speech_offensive",
        "weight_decay": 0.01,
        "optimizer": "Adam"
    },
)

Base Trainer


In [None]:
class CustomTrainer(Trainer):
    def _inner_training_loop(
        self,
        batch_size=None,
        args=None,
        resume_from_checkpoint=None,
        trial=None,
        ignore_keys_for_eval=None
    ):
        number_of_epochs = args.num_train_epochs
        start = time.time()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # Define loss function
        criterion = torch.nn.CrossEntropyLoss().to(device)

        # Setup optimizer and scheduler
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.learning_rate)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.9)

        # Get dataloaders from the Trainer
        train_dataloader = self.get_train_dataloader()
        eval_dataloader = self.get_eval_dataloader()

        for epoch in range(number_of_epochs):
            train_loss_per_epoch = 0.0
            train_correct = 0
            total_train_samples = 0

            # Set model to training mode
            self.model.train()
            with tqdm.tqdm(train_dataloader, unit="batch") as training_epoch:
                training_epoch.set_description(f"Training Epoch {epoch}")
                for step, batch in enumerate(training_epoch):
                    # --- Prepare inputs for the model ---
                    # Create a new dictionary with only model inputs.
                    model_inputs = {}
                    if "input_ids" in batch:
                        model_inputs["input_ids"] = batch["input_ids"].to(device)
                    if "attention_mask" in batch:
                        model_inputs["attention_mask"] = batch["attention_mask"].to(device)
                    if "token_type_ids" in batch:
                        model_inputs["token_type_ids"] = batch["token_type_ids"].to(device)

                    # Extract the labels from the batch.
                    # Our dataset uses the key "class" for labels.
                    #print(batch)
                    if "class" in batch:
                        labels = batch["class"].to(device)
                    elif "labels" in batch:
                        labels = batch["labels"].to(device)
                    else:
                        raise ValueError("No label column ('class' or 'labels') found in batch.")

                    # --- Forward pass ---
                    self.optimizer.zero_grad()
                    outputs = self.model(**model_inputs)
                    logits = outputs.logits

                    # Compute loss
                    loss = criterion(logits, labels)
                    train_loss_per_epoch += loss.item()

                    # --- Backward pass & optimizer step ---
                    loss.backward()
                    self.optimizer.step()

                    # --- Compute training accuracy ---
                    preds = logits.argmax(dim=1)
                    train_correct += (preds == labels).sum().item()
                    total_train_samples += labels.size(0)

            # Adjust the learning rate
            self.scheduler.step()
            train_loss_per_epoch /= len(train_dataloader)
            train_acc_per_epoch = train_correct / total_train_samples

            # --- Evaluation loop ---
            eval_loss_per_epoch = 0.0
            eval_correct = 0
            total_eval_samples = 0

            self.model.eval()
            with torch.no_grad():
                with tqdm.tqdm(eval_dataloader, unit="batch") as eval_epoch:
                    eval_epoch.set_description(f"Evaluation Epoch {epoch}")
                    for batch in eval_epoch:
                        # Prepare evaluation inputs (same as above)
                        model_inputs = {}
                        if "input_ids" in batch:
                            model_inputs["input_ids"] = batch["input_ids"].to(device)
                        if "attention_mask" in batch:
                            model_inputs["attention_mask"] = batch["attention_mask"].to(device)
                        if "token_type_ids" in batch:
                            model_inputs["token_type_ids"] = batch["token_type_ids"].to(device)

                        if "class" in batch:
                            labels = batch["class"].to(device)
                        elif "labels" in batch:
                            labels = batch["labels"].to(device)
                        else:
                            raise ValueError("No label column ('class' or 'labels') found in batch.")

                        outputs = self.model(**model_inputs)
                        logits = outputs.logits
                        loss = criterion(logits, labels)
                        eval_loss_per_epoch += loss.item()

                        preds = logits.argmax(dim=1)
                        eval_correct += (preds == labels).sum().item()
                        total_eval_samples += labels.size(0)

            eval_loss_per_epoch /= len(eval_dataloader)
            eval_acc_per_epoch = eval_correct / total_eval_samples

            print(f"\tTrain Loss: {train_loss_per_epoch:.3f} | Train Acc: {train_acc_per_epoch * 100:.2f}%")
            print(f"\tEval Loss: {eval_loss_per_epoch:.3f} | Eval Acc: {eval_acc_per_epoch * 100:.2f}%")
            wandb.log({"Accuracy": (train_acc_per_epoch * 100), "Train Loss": train_loss_per_epoch, "Loss":eval_loss_per_epoch,"Evaluation Accuracy":(eval_acc_per_epoch*100)})

        total_time = (time.time() - start) / 60
        print(f"Total Training Time: {total_time:.3f} minutes")


In [None]:
training_args = TrainingArguments(
    output_dir='NLP_HW2_Model',
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
)

In [None]:
trainer = CustomTrainer(
    model=model,
    args=training_args,
    # train_dataset=tokenized_hate_data['train'],
    # eval_dataset=tokenized_hate_data['test'],
    tokenizer=bert_cased_tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)
