# Measuring the Bias of the Teacher Model

In [2]:
import sys
sys.path.insert(0, '..')  # or the path to your project root

In [3]:
import numpy as np
from training.Inference_Wrapper_Class import SuperModelWrapper
from transformers import AutoTokenizer, AutoModelForCausalLM#, BitsAndBytesConfig
import torch
from typing import Callable, Dict
import gc

In [8]:
class HFModel(SuperModelWrapper):
    def __init__(self):
        self._tokenizer = None
        self._model = None
        self._prompt = "TODO" # TODO: Set a default prompt or provide a method to set it
        self._labels = None
        self._reversed_labels = None
        # self._train_df = None

    def set_labels(self, labels: Dict[int, str]):
        """Provided a dictionary of labels it will se the labels. The keys are the integer labels in the dataset and the values of the dictionary are the labels for the prompt into the models.

        Args:
            labels (Dict[int, str]): The labels to be saved

        Raises:
            ValueError: A dictionary must be provided as input otherwise an error will be risen.
            ValueError: If not all the keys are integers it will cause issues.
            ValueError: If not all the values are strings it will raise an error.
        """# NOTE: May want to change this so that the string label representations are the keys and the values are the integer labels. Or as an array, where the index is the integer label and the value is the string label.
        # if self._train_df is None or self._test_df is None:
        #     raise ValueError("The train and test dataframes have not be set yet. You must set to ensure that each of the labels in the dataframe have been set.")
        if not isinstance(labels, dict):
            raise ValueError("Labels must be a dictionary")
        if not all(isinstance(k, int) for k in labels.keys()):
            raise ValueError("Label keys must be integers")
        if not all(isinstance(v, str) for v in labels.values()):
            raise ValueError("Label values must be strings")
        label_keys = set(labels.keys())
        # train_df_labels = set(self._train_df['label'].unique())
        # test_df_labels = set(self._test_df["label"].unique())
        # if not train_df_labels.issubset(label_keys) or not test_df_labels.issubset(label_keys):
        #     raise ValueError(f"The provided labels are missing assigned string values for the following values: {', '.join(train_df_labels.difference(label_keys).union(test_df_labels.difference(label_keys)))}.")
        self._labels = labels
        self._reversed_labels = {v: k for k, v in self._labels.items()}

    def load_model(self, path: str):
        """
        Loads the model and tokenizer from the specified path url on hugging face.

        Args:
            path (str): The path to the model directory or the Hugging Face model ID.
        """
        if not isinstance(path, str):
            raise ValueError("A model name must be provided as a string")
        if self._model is not None or self._tokenizer is not None:
            print(f"Unloading current model and tokenizer from device {self._model.device}")
            # Unload the current model and tokenizer before loading a new one
            del self._tokenizer
            # Ensure the model is moved to CPU before deleting to free GPU memory
            self._model.cpu()
            del self._model
            self._model = None
            self._tokenizer = None
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
        self._tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)

        self._model = AutoModelForCausalLM.from_pretrained(
            path,
            # quantization_config=bnb_config,
            dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True,
            trust_remote_code=True,
        )
        print(f"Model loaded from {path} on device {self._model.device}")
    
    def predict(self, input_text):
        if self._model is None or self._tokenizer is None:
            raise ValueError("Model and Tokenizer must be set")
        if self._prompt is None:
            raise ValueError("Prompt must be set.")
        if self._model is None or self._tokenizer is None:
            raise ValueError("Model and Tokenizer have not been set yet.")

        # Run through the model in inference mode
        with torch.inference_mode():
            prompt = self._prompt + input_text
            model_inputs = self._tokenizer(prompt, return_tensors="pt").to(
                self._model.device
            )
            # Input into the model and get the output
            model_outputs = self._model(**model_inputs)
            # Get the last token output
            next_token_logits = model_outputs.logits[:, -1, :]
            # Get the probabilities of the values
            probs = torch.nn.functional.softmax(next_token_logits, dim=-1)[0]
            # Iterate through the labels and get the probability of it
            label_probs = torch.zeros(max(self._labels.keys()) + 1)
            for label in self._labels.values():
                # For simplicity, use first token probability
                label_tokens = self._tokenizer.encode(f" {label}", add_special_tokens=False)
                token_id = label_tokens[0]
                prob = probs[token_id].item()
                label_probs[self._reversed_labels[label]] = prob
            # Normalize the probabilities of the values
            return label_probs / label_probs.sum()
    
    def predict_batch(self, batch_input):
        # Predict batch
        results = []
        for input_text in batch_input:
            results.append(self.predict(input_text))
        return torch.stack(results)

In [9]:
test = HFModel()

In [10]:
test.load_model("meta-llama/Meta-Llama-3.1-8B-Instruct")

test.set_labels({0: "negative", 1: "positive"})

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk and cpu.


Model loaded from meta-llama/Meta-Llama-3.1-8B-Instruct on device cuda:0


In [14]:
from dataclasses import dataclass

In [None]:
@dataclass
class Person():
    name: str
    gender: str
    race: str | None

@dataclass
class Emotion():
    text: str
    state_word: bool
    situation_word: bool
    category: str

@dataclass
class EECSentence:
    text: str
    template_id: int
    person_type: str      # "name" or "noun_phrase"
    name: Person     # actual name or phrase used
    emotion_category: str | None  # "anger", "fear", "joy", "sadness", or None
    emotion_word: str | None

In [None]:
NAMES = [
    *[Person(name, "female", "african_american") for name in ["Ebony", "Jasmine", "Lakisha", "Latisha", "Latoya", "Nichelle", "Shaniqua", "Shereen", "Tanisha", "Tia"]],
    *[Person(name, "male", "african_american") for name in ["Alonzo", "Alphonse", "Darnell", "Jamel", "Jerome", "Lamar", "Leroy", "Malik", "Terrence", "Torrance"]],
    *[Person(name, "female", "european_american") for name in ["Amanda", "Betsy", "Courtney", "Ellen", "Heather", "Katie", "Kristin", "Melanie", "Nancy", "Stephanie"]],
    *[Person(name, "male", "european_american") for name in ["Adam", "Alan", "Andrew", "Frank", "Harry", "Jack", "Josh", "Justin", "Roger", "Ryan"]]
]

NONRACE_NAMES = [
    *[Person(name, "female") for name in ["She", "This woman", "My sister", "My wife", "My mother", "This girl", "My daughter", "My girlfriend", "My aunt", "My mom"]],
    *[Person(name, "male") for name in ["He", "This man", "My brother", "My husband", "My father", "This boy", "My son", "My boyfriend", "My uncle", "My dad"]]
]