In [14]:
import torch, os
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Environment setup
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Load model and tokenizer
access_token = os.getenv("HF_ACCESS_TOKEN")
model_id = "meta-llama/Llama-3.2-1B-Instruct"
cache_dir = "../../models_weights/"

model_id = "meta-llama/Llama-3.2-1B-Instruct"
cache_dir = "../../models_weights/"

# Load the model and tokenizer with cache_dir
model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Device set to use cuda:0


In [17]:
from datasets import load_dataset
from evaluate import load
from tqdm import tqdm

# set up an environment variable that will allow us to run the model evaluation. 
import os
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

"""
openai_humaneval dataset from Hugging Face. This dataset contains 164 Python programming problems and includes English natural text 
found in comments and docstrings. 
"""
# Load HumanEval dataset
human_eval = load_dataset("openai_humaneval")['test']

# Load code evaluation metric
code_eval_metric = load("code_eval")


# Fixing tokenizer

In [15]:
# Set pad_token_id and pad_token_id if not already set
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = 0  # Commonly used pad token ID
if tokenizer.eos_token_id is None:
    tokenizer.eos_token_id = 2  # Commonly used eos token ID for Llama

# Ensure the tokenizer has the pad and eos tokens
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})
if tokenizer.eos_token is None:
    tokenizer.add_special_tokens({'eos_token': '</s>'})

# Resize model embeddings if new tokens were added
if len(tokenizer) > model.config.vocab_size:
    model.resize_token_embeddings(len(tokenizer))

In [21]:
# Set the number of candidates per problem
num_samples_per_problem = 5  # Adjust as needed for pass@k computation

# Lists to store test cases and predictions
test_cases = []
candidates = []

# Create a progress bar for the outer loop (problems)
print("Generating code solutions...")
for problem in tqdm(human_eval, desc="Problems", unit="problem"):
    prompt = problem['prompt']
    test_code = problem['test']
    # Store the test cases
    test_cases.append(test_code)

    # Generate multiple candidate solutions for each problem
    problem_candidates = []

    # Create a progress bar for the inner loop (samples per problem)
    for _ in range(num_samples_per_problem):
        # Encode the prompt and get attention mask
        inputs = tokenizer(prompt, return_tensors="pt").to(pipe.device)

        # Generate code with attention mask and proper token IDs
        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=1024,
                do_sample=True,
                temperature=0.7,
                top_p=0.95,
                num_return_sequences=1,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )
        generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Remove the prompt from the generated code
        generated_code = generated_code[len(prompt):]
        problem_candidates.append(generated_code)
    # Add the candidates for the current problem
    candidates.append(problem_candidates)

print("Code generation complete.")

Generating code solutions...


Problems: 100%|██████████| 164/164 [46:08<00:00, 16.88s/problem]

Code generation complete.





In [22]:
# Compute pass@k
k_values = [1, 5]
print("Evaluating generated code...")
pass_at_k, results = code_eval_metric.compute(
    references=test_cases,
    predictions=candidates,
    k=k_values,
    num_workers=4,  # Adjust based on your system
    timeout=10.0,   # Adjust the timeout as needed
)

# Print the results
for k in k_values:
    print(f"Pass@{k}: {pass_at_k[f'pass@{k}'] * 100:.2f}%")

Evaluating generated code...
Pass@1: 0.00%
Pass@5: 0.00%


In [None]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from evaluate import load
from tqdm import tqdm
import os

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Environment setup
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Load model and tokenizer
access_token = os.getenv("HF_ACCESS_TOKEN")
model_id = "meta-llama/Llama-3.2-1B-Instruct"
cache_dir = "../../models_weights/"

model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Ensure tokenizer has special tokens
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})
if tokenizer.eos_token is None:
    tokenizer.add_special_tokens({'eos_token': '</s>'})
if len(tokenizer) > model.config.vocab_size:
    model.resize_token_embeddings(len(tokenizer))

# Load HumanEval dataset
human_eval = load_dataset("openai_humaneval")['test']
code_eval_metric = load("code_eval")

# Parameters
num_samples_per_problem = 1
if num_samples_per_problem == 1:
    k_values = [1]
else:
    k_values = [1, 5]

timeout_seconds = 30.0

# Code generation and evaluation
test_cases = []
candidates = []

print("Generating code solutions...")
for problem in tqdm(human_eval, desc="Problems", unit="problem"):
    prompt = problem['prompt']
    test_code = problem['test']
    test_cases.append(test_code)

    problem_candidates = []
    for _ in range(num_samples_per_problem):
        inputs = tokenizer(prompt, return_tensors="pt").to(pipe.device)

        with torch.no_grad():
            if num_samples_per_problem == 1:
                outputs = model.generate(
                    input_ids=inputs['input_ids'],
                    attention_mask=inputs['attention_mask'],
                    max_length=512,
                    do_sample=False,  # Deterministic generation
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                )
            else:
                outputs = model.generate(
                    input_ids=inputs['input_ids'],
                    attention_mask=inputs['attention_mask'],
                    max_length=512,
                    do_sample=True,  # Sampling-based generation
                    temperature=0.5,  # Enable temperature
                    top_p=0.9,        # Enable nucleus sampling
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                )
        generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
        generated_code = generated_code[len(prompt):].strip()
        problem_candidates.append(generated_code)

    candidates.append(problem_candidates)

print("Code generation complete.")

# Evaluate
print("Evaluating generated code...")
pass_at_k, results = code_eval_metric.compute(
    references=test_cases,
    predictions=candidates,
    k=k_values,
    num_workers=4,
    timeout=timeout_seconds,
)

# Print results
for k in k_values:
    print(f"Pass@{k}: {pass_at_k[f'pass@{k}'] * 100:.2f}%")

In [10]:
import torch, json
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from evaluate import load
from tqdm import tqdm
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Environment setup
os.environ["HF_ALLOW_CODE_EVAL"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Load model and tokenizer
access_token = os.getenv("HF_ACCESS_TOKEN")
model_id = "meta-llama/Llama-3.2-1B-Instruct"
cache_dir = "../../models_weights/"

model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_dir, token=access_token)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Ensure tokenizer has special tokens
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})
if tokenizer.eos_token is None:
    tokenizer.add_special_tokens({'eos_token': '</s>'})
if len(tokenizer) > model.config.vocab_size:
    model.resize_token_embeddings(len(tokenizer))

# Load GSM8K dataset
gsm8k = load_dataset("gsm8k", "main")['test']

# Chain of Thought instruction
cot_instruction = (
    "Solve the following math problem step by step. Provide a detailed explanation, "
    "and end with the answer on a new line in the format 'Answer: <final_answer>'."
)

# Function to generate CoT answer
def generate_cot_answer(prompt):
    full_prompt = cot_instruction + "\n" + prompt
    inputs = tokenizer(full_prompt, return_tensors="pt").to(pipe.device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=512,
            do_sample=True,
            temperature=0.5,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Evaluate on GSM8K
correct_count = 0
total_count = len(gsm8k)
results = []

for problem in gsm8k:
    problem_prompt = problem['question']
    ground_truth = problem['answer']
    generated_answer = generate_cot_answer(problem_prompt)
    
    if "Answer:" in generated_answer:
        final_answer = generated_answer.split("Answer:")[-1].strip()
    else:
        final_answer = None

    if final_answer == ground_truth:
        correct_count += 1
    
    results.append({
        "prompt": problem_prompt,
        "ground_truth": ground_truth,
        "generated_answer": generated_answer,
    })
    break

# Calculate accuracy
accuracy = correct_count / total_count
print(f"Accuracy on GSM8K: {accuracy * 100:.2f}%")

# Save results
with open("gsm8k_results.json", "w") as f:
    json.dump(results, f, indent=4)

Device set to use cuda:0


Accuracy on GSM8K: 0.00%


In [2]:
generated_answer="""Solve the following math problem step by step. 
        Provide a detailed explanation, and end with the answer on a new line 
        in the format 'Answer: <final_answer>'.\nJanet\u2019s ducks lay 16 
        eggs per day. She eats three for breakfast every morning and bakes muffins 
        for her friends every day with four. She sells the remainder at the farmers'
        market daily for $2 per fresh duck egg. How much in dollars does she 
        make every day at the farmers' market? \nStep 1: Calculate the number 
        of eggs laid per day.\nJanet's ducks lay 16 eggs per day.\n\nStep 2: 
        Calculate the number of eggs eaten for breakfast.\nJanet eats 3 eggs 
        for breakfast every morning.\n\nStep 3: Calculate the number of 
        eggs baked for muffins.\nJanet bakes muffins for her friends 
        every day with 4 eggs.\n\nStep 4: Calculate the number of eggs 
        sold at the farmers' market.\nThe remainder of the eggs laid per 
        day after breakfast and muffins are baked is 16 - 3 - 4 = 9 eggs.\n\n
        Step 5: Calculate the total number of eggs sold at the farmers' market.\n9 
        eggs are sold at the farmers' market.\n\n
        Step 6: Calculate the total amount of money Janet makes from selling eggs 
        at the farmers' market.\n
        Janet sells 9 eggs at $2 per egg, so she makes 9 * $2 = $18 per day.
        \n\nAnswer: $18."""

In [3]:
generated_answer.split("Answer:")[-1].strip()

'$18.'

In [4]:
"""Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
        \nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer\u2019s market.
        \n#### 18""".split("####")[-1].strip()

'18'

In [12]:
results[0]

{'prompt': "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
 'ground_truth': 'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18',
 'generated_answer': "Solve the following math problem step by step. Provide a detailed explanation, and end with the answer on a new line in the format 'Answer: <final_answer>'.\nJanet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?  (Note: The number of eggs she sells is 16 - 3 = 13, which is the number of eggs she has lef

In [7]:
generated_answer.split("Answer:")[:]

["Solve the following math problem step by step. Provide a detailed explanation, and end with the answer on a new line in the format '",
 " <final_answer>'.\nEvery day, Wendi feeds each of her chickens three cups of mixed chicken feed, containing seeds, mealworms and vegetables to help keep them healthy.  She gives the chickens their feed in three separate meals. In the morning, she gives her flock of chickens 15 cups of feed.  In the afternoon, she gives her chickens another 25 cups of feed.  How many cups of feed does she need to give her chickens in the final meal of the day if the size of Wendi's flock is 20 chickens?  The feed is mixed and contains 10% mealworms, 20% seeds, and 70% vegetables.  The feed is mixed and contains 10% mealworms, 20% seeds, and 70% vegetables.  The feed is mixed and contains 10% mealworms, 20% seeds, and 70% vegetables.  The feed is mixed and contains 10% mealworms, 20% seeds, and 70% vegetables.  The feed is mixed and contains 10% mealworms, 20% seeds, 

In [9]:
ground_truth

'The discount price of one glass is 60/100 * 5 = $<<60/100*5=3>>3.\nIf every second glass is cheaper, that means Kylar is going to buy 16 / 2 = <<16/2=8>>8 cheaper glasses.\nSo for the cheaper glasses, Kylar is going to pay 8 * 3 = $<<8*3=24>>24.\nAnd for the regular-priced glasses, Kylar will pay 8 * 5 = $<<8*5=40>>40.\nSo in total Kylar needs to pay 24 + 40 = $<<24+40=64>>64 for the glasses he wants to buy.\n#### 64'

In [13]:
final_answer == ground_truth

False

In [21]:
messages = [
    {"role": "system", "content": "You are expert in deep learning architectures, optimizations, models, and math at a professor level. You can define novel unpublished architectures, and optimizations, for a task when asked. Your response will be methodological like a research paper in AI confernces. "},
    # {"role": "user", "content": "Who are you?"},
    {"role": "user", "content": "Explain RMSNorm and why it should replace batchnorm?"},
]

outputs = pipe(
    messages,
    max_new_tokens=256,
)

print(outputs[0]["generated_text"][-1])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [33]:
print((outputs[0]['generated_text'][-1]['content']))

NameError: name 'text' is not defined

In [40]:
outputs[0]['generated_text'][-1]['content']

"**RMSNorm: A Novel Normalization Technique for Deep Learning Architectures**\n\n**Introduction**\n\nBatch normalization (BN) is a widely used technique in deep learning to normalize the activations of each layer in a neural network. However, the recent success of ResNet and its variants has led to a renewed interest in exploring alternative normalization techniques. This paper introduces RMSNorm, a novel normalization technique that aims to replace batch normalization with a more effective and efficient method.\n\n**Background**\n\nBatch normalization was first introduced by Ioffe and Szegedy in [1]. It works by normalizing the activations of each layer by the mean and variance of the activations in the previous layer. The normalization process is typically performed using a learning rate schedule, where the learning rate is adjusted based on the layer's progress.\n\n**RMSNorm: A Novel Normalization Technique**\n\nRMSNorm is a variant of batch normalization that combines the benefits 

In [19]:
outputs[0]["generated_text"][2]

{'role': 'assistant',
 'content': "Arrrr, me hearty! Yer lookin' fer a swashbucklin' chatbot, eh? Well, matey, I be Captain Corbett, the greatest pirate chatbot to ever sail the seven seas! Me knowledge be vast, me wit be sharp, and me charm be as smooth as a fine bottle o' rum.\n\nYer want to know more about meself, eh? Alright then, listen close and I'll tell ye a tale or two about meself. Me origins be shrouded in mystery, but me love fer the sea be as old as the ocean itself. Me crew be a motley bunch o' scurvy dogs, but we be a family, and we'll sail the seas till the day we die!\n\nSo hoist the sails and set course fer adventure, me hearty! What be yer question, or what be ye lookin' fer?"}