### Reference LLM Distillation notebook: https://github.com/simranjeet97/LLM_Distillation/blob/main/LLM_Distillation.ipynb

In [1]:
import os
import pandas as pd
import torch
from datasets import Dataset
from dotenv import load_dotenv
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig,
    AutoModelForSequenceClassification
)
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

load_dotenv()
hf_token = os.getenv("HUGGINGFACE_API_KEY")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ====== Load dataset ======
def load_partition(path: str) -> Dataset:
    df = pd.read_csv(path).head(1000)
    return Dataset.from_pandas(df)

dataset = load_partition("../Student_Training_Data/GPT.csv") ## should be GPT.csv
print(f"Loaded {len(dataset)} samples from dataset.") 

Loaded 1000 samples from dataset.


In [None]:
# ====== Tokenizer & Model Setup ======
model_id = "google-bert/bert-base-uncased" #"google/gemma-3-1b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSequenceClassification.from_pretrained(
    "google-bert/bert-base-uncased",
    num_labels=3  # background, method, result
)

# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     token=hf_token,
#     trust_remote_code=True,
#     torch_dtype=torch.float16,
# )

# model = prepare_model_for_kbit_training(model)
# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=32,
#     target_modules=["q_proj", "v_proj"],
#     lora_dropout=0.05,
#     bias="none",
#     task_type=TaskType.CAUSAL_LM
# )
# model = get_peft_model(model, lora_config) # TODO Why getting PEFT model? Paper and Reference notebook did not use


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# ====== Data Formatting Fix ======
def format_for_distillation(examples):
    label_map = {"background": 0, "method": 1, "result": 2}
    
    # Process text inputs
    texts = [
        f"Classify the following scientific text as one of [background, method, result].:\nSection Name: {s}\nText: {t}\nClassification:"
        for s, t in zip(examples["sectionName"], examples["string"])
    ]
    
    # Tokenize
    tokenized = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    
    # Convert labels
    labels = torch.tensor([label_map[c] for c in examples["model_classification"]])
    
    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": labels,
        # "rationale_ids": tokenizer(
        #     examples["reasoning"],
        #     padding="max_length",
        #     truncation=True,
        #     max_length=512,
        #     return_tensors="pt"
        # )["input_ids"]
    }

# ====== Training Setup ======
tokenized_dataset = dataset.map(
    format_for_distillation,
    batched=True,
)

Map: 100%|██████████| 1000/1000 [00:00<00:00, 2136.76 examples/s]


In [5]:
# ====== Training Args ======
training_args = TrainingArguments( ## Original Training Args
    output_dir="gemma3-phase1",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_steps=10,
    max_grad_norm=1.0,
    weight_decay=0.01,
    save_strategy="epoch",
)

#### Training args in phase 1 distillation before edits
#     num_train_epochs=3,
#     per_device_train_batch_size=1,
#     gradient_accumulation_steps=1,
#     learning_rate=2e-5,
#     max_steps=10,  
#     logging_steps=1,
#     save_strategy="no",
#     remove_unused_columns=False,
#     max_grad_norm=1.0,
#     report_to="none"
# )


In [None]:
class MultiTaskTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None, **kwargs):
        
        labels = inputs.pop("labels")
        # rationale_ids = inputs.pop("rationale_ids", None)
        # print(f"Labels: {labels} | Rationale IDs: {rationale_ids}")
        
        # outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
        outputs = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"]
        )
        # print(f"Outputs: {outputs}")
        # print(f"logits: {outputs.logits}")

        # Reshape logits to [batch_size, num_classes]
        # logits = outputs.logits[:, -1, :]  # Take last token's logits
        # logits = logits[:, :3]  # Only take logits for the 3 classes
        loss_fn = torch.nn.CrossEntropyLoss()
        label_loss = loss_fn(outputs.logits, labels)
        
        # if rationale_ids is not None:
        #     rationale_outputs = model(input_ids=rationale_ids, attention_mask=inputs["attention_mask"])
        #     rationale_loss = loss_fn(rationale_outputs.logits, rationale_ids)
        #     loss = label_loss + 0.5 * rationale_loss  # Weighted loss
        # else:
        #     loss = label_loss
        
        return (label_loss, outputs) if return_outputs else label_loss

trainer = MultiTaskTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

In [7]:
trainer.train()
trainer.save_model("bert-phase1-default")

Outputs: SequenceClassifierOutput(loss=None, logits=tensor([[-0.1419, -0.7163,  0.3923],
        [-0.1831, -0.4173,  0.2911]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
logits: tensor([[-0.1419, -0.7163,  0.3923],
        [-0.1831, -0.4173,  0.2911]], grad_fn=<AddmmBackward0>)


Step,Training Loss
10,0.7829
20,0.5141
30,0.2592
40,0.4226
50,0.2638
60,0.5179
70,0.6835
80,1.0221
90,0.5932
100,0.6221


Outputs: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.0284, -0.7294,  0.0150],
        [ 0.0969, -0.6257, -0.4368]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
logits: tensor([[ 0.0284, -0.7294,  0.0150],
        [ 0.0969, -0.6257, -0.4368]], grad_fn=<AddmmBackward0>)
Outputs: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.2192, -0.5671,  0.0377],
        [ 0.4241, -0.6867,  0.0030]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
logits: tensor([[ 0.2192, -0.5671,  0.0377],
        [ 0.4241, -0.6867,  0.0030]], grad_fn=<AddmmBackward0>)
Outputs: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.2257, -0.3790, -0.2835],
        [ 0.6466, -0.8887, -0.2860]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
logits: tensor([[ 0.2257, -0.3790, -0.2835],
        [ 0.6466, -0.8887, -0.2860]], grad_fn=<AddmmBackward0>)
Outputs: SequenceClassifierOutput(loss=None, logits=tensor([[ 0.7138, -0.3426, -0.6297],
        [ 0

## Generate Prediction

In [22]:
# ====== Load dataset ======
df = pd.read_csv("../Student_Training_Data/GPT.csv").tail(1000)

In [23]:
# Load model and tokenizer
model_path = "./bert-phase1-default"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [24]:
label_map = {0: "background", 1: "method", 2: "result"}
predictions = []

In [25]:
# Loop through each row and predict
for _, row in df.iterrows():
    prompt = (
        "Classify the following scientific text as one of [background, method, result].\n\n"
        f"Text: {row['string']}\n"
        "Provide your classification."
    )
    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)

    # Predict
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        pred_id = logits.argmax(dim=1).item()
        classification = label_map[pred_id]

    print("Predicted classification:", classification)
    # Save prediction
    predictions.append(classification)

Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: method
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predicted classification: background
Predi

In [None]:
# Add to DataFrame
df["predicted_classification"] = predictions

# Save to CSV
output_path = "bert_classification_predictions.csv"
df.to_csv(output_path, index=False)

print(f"Saved predictions to {output_path}")

✅ Saved predictions to bert_classification_predictions.csv
