In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

local_model_path = "Dasool/mathGemma-2-9b"

tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModelForCausalLM.from_pretrained(
    local_model_path,
    device_map="auto",  
)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


In [7]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN_ID = tokenizer.eos_token_id 

In [8]:
import re

def extract_response(generated_text):
    match = re.search(r"Answer:\s*([a-eA-E])", generated_text)
    if match:
        return match.group(1)  
    else:
        return None  

In [4]:
import json
from transformers import TextStreamer

# Load the MathQA test dataset
test_data_path = "Datasets/MathQA/test.json"
with open(test_data_path, "r") as f:
    test_data = json.load(f)

total_examples = 0
correct_predictions = 0

model = model.to("cuda:1")

# Iterate over the dataset and generate predictions
for example in test_data:
    total_examples += 1
    # Extract the problem, options, and correct answer
    problem = example["Problem"]
    options = example["options"]
    correct_answer = example["correct"]

    # Prepare the input prompt using the alpaca format
    formatted_prompt = alpaca_prompt.format(
        "Please select one of the following options (a, b, c, d or e) for the given question",
        f"Problem: {problem}\nOptions: {options}\nFirst, think step by step to analyze the image and the question, then choose the best answer. Answer with only an Alphabet (a, b, c, d or e). Do not provide any explanation.",
        ""  # Leave response empty for generation
    )
    
    # Tokenize the input and move it to cuda:1
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").to("cuda:1")

    # Set up the text streamer for live printing
    text_streamer = TextStreamer(tokenizer)

    # Generate the output without restricting tokens
    outputs = model.generate(
        **input_ids,
        max_new_tokens=32,  
        eos_token_id=EOS_TOKEN_ID,  
        streamer=text_streamer  
    )

    # Decode the generated response
    generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_letter = extract_response(generated_response)

    # Print or log the generated response
    print(f"Generated Response: {generated_letter}")
    print(f"Correct Answer: {correct_answer}\n")
    
    if generated_letter is not None:
        if generated_letter.lower() == correct_answer.lower():
            correct_predictions += 1
    else:
        print("Generated Response is None, marking as incorrect.")

# Calculate and print accuracy
accuracy = correct_predictions / total_examples * 100
print(f"Accuracy: {accuracy:.2f}%")

You shouldn't move a model that is dispatched using accelerate hooks.


<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Please select one of the following options (a, b, c, d or e) for the given question

### Input:
Problem: a shopkeeper sold an article offering a discount of 5 % and earned a profit of 31.1 % . what would have been the percentage of profit earned if no discount had been offered ?
Options: a ) 38 , b ) 27.675 , c ) 30 , d ) data inadequate , e ) none of these
First, think step by step to analyze the image and the question, then choose the best answer. Answer with only an Alphabet (a, b, c, d or e). Do not provide any explanation.

### Response:


The 'max_batch_size' argument of HybridCache is deprecated and will be removed in v4.46. Use the more precisely named 'batch_size' argument instead.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Answer: a 
Rationale: "explanation : let c . p . be rs . 100 . then , s . p . = rs .
Generated Response: a
Correct Answer: a

<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Please select one of the following options (a, b, c, d or e) for the given question

### Input:
Problem: what will be the difference between simple and compound interest at 14 % per annum on a sum of rs . 1000 after 4 years ?
Options: a ) 129 , b ) 130 , c ) 124 , d ) 133 , e ) 145
First, think step by step to analyze the image and the question, then choose the best answer. Answer with only an Alphabet (a, b, c, d or e). Do not provide any explanation.

### Response:
Answer: c 
Rationale: "s . i . = ( 1000 * 14 * 4 ) / 100
Generated Response: c
Correct Answer: a

<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response th

In [5]:
accuracy

48.91122278056951