In [4]:
# Cell 1: Import Required Libraries
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from peft import PeftModel, PeftConfig
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
    classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("✅ All libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

✅ All libraries imported successfully!
PyTorch version: 2.9.1
CUDA available: False


In [5]:
# Cell 2: Load IMDb Dataset - Recreate Assignment 7 Test Set
print("Loading IMDb dataset...")
dataset = load_dataset("imdb")

# Recreate the EXACT same subset from Assignment 7
print("Using the same subset as Assignment 7 (2000 train, 500 test)")
dataset['train'] = dataset['train'].shuffle(seed=42).select(range(2000))
dataset['test'] = dataset['test'].shuffle(seed=42).select(range(500))

# Our test dataset
test_dataset = dataset['test']

print(f"\n✅ Test dataset loaded: {len(test_dataset)} examples")
print(f"✅ Using seed=42 (same as Assignment 7)")
print(f"\nFirst test example:")
print(f"Text: {test_dataset[0]['text'][:100]}...")
print(f"Label: {test_dataset[0]['label']} (0=Negative, 1=Positive)")

Loading IMDb dataset...
Using the same subset as Assignment 7 (2000 train, 500 test)

✅ Test dataset loaded: 500 examples
✅ Using seed=42 (same as Assignment 7)

First test example:
Text: <br /><br />When I unsuspectedly rented A Thousand Acres, I thought I was in for an entertaining Kin...
Label: 1 (0=Negative, 1=Positive)


In [6]:
# Cell 3: Load Tokenizer and Prepare Test Data
print("Loading tokenizer...")
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples['text'], 
        padding='max_length',
        truncation=True,
        max_length=512
    )

print("Tokenizing test dataset...")
tokenized_test = test_dataset.map(tokenize_function, batched=True)

print(f"✅ Tokenizer loaded: {model_name}")
print(f"✅ Test dataset tokenized: {len(tokenized_test)} examples")
print(f"\nTokenized example shape:")
print(f"  - input_ids length: {len(tokenized_test[0]['input_ids'])}")
print(f"  - Label: {tokenized_test[0]['label']}")

Loading tokenizer...
Tokenizing test dataset...


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

✅ Tokenizer loaded: distilbert-base-uncased
✅ Test dataset tokenized: 500 examples

Tokenized example shape:
  - input_ids length: 512
  - Label: 1


In [8]:
# Cell 4A: Check what files are in checkpoint-750
import os

checkpoint_path = "./checkpoint-750"

if os.path.exists(checkpoint_path):
    print(f"✅ Folder exists: {checkpoint_path}")
    print(f"\nFiles inside checkpoint-750:")
    files = os.listdir(checkpoint_path)
    for file in sorted(files):
        print(f"  - {file}")
    print(f"\nTotal files: {len(files)}")
else:
    print(f"❌ Folder NOT found: {checkpoint_path}")
    print(f"\nCurrent working directory: {os.getcwd()}")
    print("\nLet's check what's in the current directory:")
    print(os.listdir('.'))

❌ Folder NOT found: ./checkpoint-750

Current working directory: /Users/ermi/Desktop/AI:ML/AI-ML-Assignment-8-LLM-Evaluation

Let's check what's in the current directory:
['results', '.ipynb_checkpoints', 'llm_evaluation_metrics.ipynb']
