# Step 6: BERT Fine-tuning & Evaluation

This notebook demonstrates the fine-tuning of a DistilBERT model for product categorization. 

**Note:** Due to resource constraints, the model was trained in "Fast Mode" on a subset of the data. The code below shows the full pipeline for loading the trained model and evaluating it.

In [None]:
import os
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

# Constants
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
MODEL_DIR = os.path.join(PROJECT_ROOT, "models", "bert_final")
PROCESSED_DIR = os.path.join(PROJECT_ROOT, "data", "processed")

print(f"Model Directory: {MODEL_DIR}")

## 1. Load Trained Model and Tokenizer

In [None]:
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
    le = joblib.load(os.path.join(MODEL_DIR, "label_encoder.joblib"))
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Ensure training has completed successfully.")

## 2. Load Test Data

In [None]:
test_df = pd.read_csv(os.path.join(PROCESSED_DIR, "test.csv"))
print(f"Test set size: {len(test_df)}")

# Encode labels
test_df["label"] = le.transform(test_df["category"])

## 3. Evaluate on Test Set

In [None]:
from datasets import Dataset

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

test_dataset = Dataset.from_pandas(test_df)
test_dataset = test_dataset.map(tokenize_function, batched=True)
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

# Create Trainer for evaluation
trainer = Trainer(
    model=model,
    eval_dataset=test_dataset,
    compute_metrics=None # We'll compute manually
)

print("Running prediction on test set...")
predictions = trainer.predict(test_dataset)
preds = np.argmax(predictions.predictions, axis=1)
labels = predictions.label_ids

## 4. Classification Report

In [None]:
print(classification_report(labels, preds, target_names=le.classes_))

## 5. Confusion Matrix

In [None]:
cm = confusion_matrix(labels, preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()