<a href="https://colab.research.google.com/github/NoamMichael/Comparing-Confidence-in-LLMs/blob/main/LSAT_Benchmarking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This Notebook will test all models on the formatted LSAT-AR dataset

In [1]:
import pandas as pd
import numpy as np
import json
import time
import random
import torch
import matplotlib.pyplot as plt
from transformers import (AutoTokenizer,
                        AutoModelForCausalLM,
                        BitsAndBytesConfig,
                        pipeline)
import warnings
warnings.filterwarnings('ignore')
from google.colab import userdata
hf_llama_token = userdata.get('hf_llama_token')

class OpenModel: ## This class is built around Hugging Face methods
  def __init__(self, name, key, MaxTokens = 150):
    self.name = name
    self.key = key
    self.MaxTokens = MaxTokens
    self.tokenizer = AutoTokenizer.from_pretrained(self.name,token = self.key) ## Import Tokenizer
    self.model = AutoModelForCausalLM.from_pretrained(self.name, token = self.key, device_map="auto") ## Import Model

    ## Make text generation pipeline
    self.pipeline = pipeline(
    "text-generation",
    model = self.model,
    tokenizer = self.tokenizer,
    do_sample = False,
    max_new_tokens = self.MaxTokens,
    eos_token_id = self.tokenizer.eos_token_id,
    pad_token_id = self.tokenizer.eos_token_id
    )

  def generate(self, prompt):
    return self.pipeline(prompt)[0]['generated_text']

  def GetTokens(self, prompt: str):
    ## Get Answer:
    batch = self.tokenizer(prompt, return_tensors= "pt").to('cuda')
    with torch.no_grad():
        outputs = self.model(**batch)
    ## Get Token Probabilites
    logits = outputs.logits

    # Apply softmax to the logits to get probabilities
    probs = torch.softmax(logits[0, -1], dim=0)
    #print(probs)
    #_____________________________________________________

    # Get the top k token indices and their probabilities
    top_k_probs, top_k_indices = torch.topk(probs, 100, sorted =True)

    # Convert token indices to tokens
    # Use self.tokenizer
    top_k_tokens = [self.tokenizer.decode([token_id]) for token_id in top_k_indices]

    # Convert probabilities to list of floats
    top_k_probs = top_k_probs.tolist()                  #list of probabilities

    # Create a Pandas Series with tokens as index and probabilities as values
    logit_series = pd.Series(top_k_probs, index=top_k_tokens)

    # Sort the series by values in descending order
    logit_series = logit_series.sort_values(ascending=False)
    logit_series.index.name = "Token"
    logit_series.name = "Probability"
    return logit_series

In [2]:
test_name = 'meta-llama/Llama-3.1-8B-Instruct'
test_key = hf_llama_token
test_prompt = "Zdzisław Beksiński was"

test_model = OpenModel(name = test_name, key = test_key)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [3]:
print(test_model.generate(test_prompt))
test_model.GetTokens(test_prompt)

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Zdzisław Beksiński was a Polish painter, photographer, and sculptor. He was born on February 24, 1929, in Sanok, Poland. Beksiński's work is characterized by its dark and surreal themes, often exploring the human condition, mortality, and the relationship between technology and nature. He was a prolific artist, creating over 20,000 works of art during his lifetime, including paintings, photographs, and sculptures.
Beksiński's artistic style is often described as a blend of surrealism, expressionism, and fantasy. His paintings often feature dreamlike landscapes, eerie cityscapes, and abstracted forms, while his photographs capture the beauty and decay of the natural world. Beksiński's work has been exhibited


Unnamed: 0_level_0,Probability
Token,Unnamed: 1_level_1
a,0.752481
born,0.197850
one,0.018028
an,0.008461
the,0.005345
...,...
conceived,0.000021
appointed,0.000021
trained,0.000021
our,0.000021


In [None]:
## GPT pseudocode

from abc import ABC, abstractmethod

# Step 1: Define a base class for your LLM wrappers
class LLMWrapper(ABC):
    @abstractmethod
    def generate_response(self, prompt: str) -> str:
        """
        Abstract method to generate a response from the language model.
        """
        pass

# Step 2: Create subclasses for each LLM
class APIModelWrapper(LLMWrapper):
    def __init__(self, api_key):
        self.api_key = api_key
        # Initialize your API client here

    def generate_response(self, prompt: str) -> str:
        # Implement the logic for making API calls to your model
        # Use self.api_key for authentication if needed
        print(f"Calling API model with prompt: {prompt}")
        return "Response from API model" # Replace with actual API call

class HuggingFaceModelWrapper(LLMWrapper):
    def __init__(self, model_name):
        self.model_name = model_name
        # Load your Hugging Face model and tokenizer here
        # You might need to install the 'transformers' and 'torch' libraries
        # pip install transformers torch
        print(f"Loading Hugging Face model: {model_name}")
        # Example: from transformers import pipeline
        # self.generator = pipeline('text-generation', model=model_name)

    def generate_response(self, prompt: str) -> str:
        # Implement the logic for generating responses using the Hugging Face model
        print(f"Calling Hugging Face model with prompt: {prompt}")
        # Example: result = self.generator(prompt)
        return "Response from Hugging Face model" # Replace with actual generation

# Example of adding another model type
class AnotherModelWrapper(LLMWrapper):
    def __init__(self, config):
        self.config = config
        # Initialize your other model here

    def generate_response(self, prompt: str) -> str:
        # Implement the logic for generating responses with this model
        print(f"Calling another model with prompt: {prompt}")
        return "Response from another model" # Replace with actual generation

# Step 3: Instantiate your models
models_to_test = [
    APIModelWrapper(api_key="your_api_key"),
    HuggingFaceModelWrapper(model_name="gpt2"), # Replace with your desired HF model
    AnotherModelWrapper(config={"setting": "value"})
]

# Step 4: Iterate and test
# Replace with your actual dataset loading logic
dataset = ["Prompt 1", "Prompt 2", "Prompt 3"]

for prompt in dataset:
    print(f"\nTesting prompt: {prompt}")
    for model in models_to_test:
        response = model.generate_response(prompt)
        print(f"Model: {type(model).__name__}, Response: {response}")