In [1]:
# notebooks/4_classifier.ipynb

import sys
import os
import random
import numpy as np
import torch

# Add the project root directory to sys.path for importing project modules
project_root = os.path.abspath("..")
sys.path.append(project_root)

# Import utility functions from the project
from src.data_utils import load_datasets, create_balanced_tasks
from src.model_utils import load_model_and_tokenizer, get_embeddings
from src.classifier_utils import train_classifier, evaluate_classifier

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set Random Seed for Reproducibility
SEED = 42

torch.manual_seed(SEED)  # Set seed for PyTorch
random.seed(SEED)  # Set seed for random
np.random.seed(SEED)  # Set seed for NumPy

In [3]:
# Model Configuration
MODEL_NAME = "Llama-2-7b-hf"  # Name of the pre-trained model
MODELS_ROOT_PATH = "../../models"  # Path to pre-trained models

In [None]:
# Load the pre-trained model and tokenizer
model_name = MODEL_NAME
root_path = MODELS_ROOT_PATH

# Concatenate the model path
model_path = os.path.join(root_path, model_name)

In [4]:
# Load model and tokenizer
model, tokenizer = load_model_and_tokenizer(model_path)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.44s/it]


In [5]:
# Load datasets
data_dir = '../data/processed'
datasets = load_datasets(data_dir, split='train')

# Generate balanced tasks
tasks = create_balanced_tasks(datasets, balanced=True, seed=42)


Loaded winogrande dataset from train split, shape: (2558, 6)
Loaded obqa dataset from train split, shape: (4957, 6)
Loaded wikitext2 dataset from train split, shape: (36718, 6)
Loaded gsm8k dataset from train split, shape: (7473, 6)
Loaded ai2_arc dataset from train split, shape: (1119, 6)
Loaded hellaswag dataset from train split, shape: (39905, 6)
Loaded boolq dataset from train split, shape: (9427, 6)
Loaded piqa dataset from train split, shape: (16113, 6)


In [6]:
# Extract Inputs and Task Types from Tasks
inputs = [t['input'] for t in tasks]  # Extract inputs (questions)
task_types = [t['task_type'] for t in tasks]  # Extract task types

In [7]:
# Generate embeddings and labels
embeddings, labels, task_type_to_label = get_embeddings(inputs, task_types, model, tokenizer)


Generating Embeddings: 100%|██████████| 8952/8952 [00:03<00:00, 2272.75input/s]


In [8]:
# Train the classifier
clf, X_test, y_test = train_classifier(embeddings, labels, test_size=0.2, random_state=42)


In [9]:
# Evaluate the classifier
evaluate_classifier(clf, X_test, y_test, task_type_to_label)

              precision    recall  f1-score   support

  winogrande       0.98      1.00      0.99       233
        obqa       0.90      0.96      0.93       228
   wikitext2       1.00      0.85      0.92       200
       gsm8k       0.94      1.00      0.97       235
     ai2_arc       0.98      0.84      0.91       228
   hellaswag       0.89      0.97      0.92       229
       boolq       0.87      0.97      0.92       221
        piqa       0.93      0.86      0.89       217

    accuracy                           0.93      1791
   macro avg       0.94      0.93      0.93      1791
weighted avg       0.94      0.93      0.93      1791

