In [1]:
# !python.exe -m pip install --upgrade pip
# !pip install --upgrade jupyter ipywidgets

In [2]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [3]:
# !pip install transformers accelerate bitsandbytes

## HuggingFace Setting

For Windows users, type the following command in Command Prompt:

```
setx HF_TOKEN "your_token_here"
```

For macOS users, type the following command in Terminal:

```
export HF_TOKEN="your_token_here"
```

In [4]:
import os
HF_TOKEN = os.getenv("HF_TOKEN")
HF_TOKEN[:3]+'...'

'hf_...'

## Load Model

In [5]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

In [6]:
import transformers
import torch
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    token=HF_TOKEN,
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
if tokenizer.pad_token is None:
    print("Setting pad token to eos token")
    tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Setting pad token to eos token


In [8]:
def generate_text(prompt, max_new_tokens=128):
    text_generator = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
    )

    return text_generator(prompt)[0]['generated_text']

generate_text("What is Machine Learning?")

  attn_output = torch.nn.functional.scaled_dot_product_attention(


"What is Machine Learning? A Simple Explanation\n\nMachine learning is a subset of artificial intelligence (AI) that involves training algorithms to recognize patterns in data and make predictions or decisions based on that data.\n\nHere's a simple example to illustrate how machine learning works:\n\nImagine you're trying to predict whether someone will buy a house based on their age, income, and credit score. You collect data on a large number of people and their corresponding house-buying behaviors. Then, you use this data to train a machine learning algorithm to recognize patterns and make predictions about whether someone will buy a house based on their age, income, and credit score.\n\nThe algorithm learns to identify the"

In [9]:
def get_response(prompt, max_new_tokens=128):
    return generate_text(prompt, max_new_tokens=max_new_tokens)[len(prompt):]

get_response("What is Machine Learning?")

' Machine learning is a subset of Artificial Intelligence (AI) that involves training algorithms to learn from data, make predictions, and improve their performance over time. It is a type of supervised learning where the algorithm is trained on a labeled dataset, and the goal is to make accurate predictions on new, unseen data.\n\nWhat is the difference between Machine Learning and Deep Learning? Machine learning is a broader term that encompasses various techniques for training algorithms to learn from data. Deep learning is a subset of machine learning that focuses on neural networks with multiple layers, which are particularly effective for image and speech recognition, natural language processing, and other tasks that require processing large'

In [10]:
import random
for i in range(10):
    num1 = random.randint(100, 999)
    num2 = random.randint(100, 999)
    question = f"{num1} + {num2} ="
    print(generate_text(question, max_new_tokens=4))

852 + 607 = 1153

891 + 660 = 1,561
641 + 486 = 1521.
258 + 772 = 1030

595 + 486 = 1181.
986 + 759 = 745
Answer
247 + 829 = 1074

430 + 276 = 606. (
751 + 784 = 1545.

766 + 221 = 987. 


In [11]:
from src.util.json_io import *

train_data = load_jsonlines('data/gsm8k/train.jsonl')
test_data = load_jsonlines('data/gsm8k/test.jsonl')

In [12]:
from src.util.gsm8k_helper import *

question = "Janet\u2019s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
question = nshot_prompt(data=train_data, n=8, question=question)  # 8-shot prompt

print(question) # Answer: 18


        <s>[INST]Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?[/INST]
        Assistant: Natalia sold 48/2 = <<48/2=24>>24 clips in May.
Natalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.
#### 72</s>
        
        <s>[INST]Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?[/INST]
        Assistant: Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.
Working 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.
#### 10</s>
        
        <s>[INST]Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?[/INST]
        Assistant: In the beginning, Betty has only 100 / 2 = $<<100/2=50>>50.
Betty's g

In [13]:
answer = get_response(question)
print(answer)

 
        Assistant: She eats 3 eggs for breakfast.
She bakes muffins for her friends with 4 eggs.
In total, she consumes 3+4=<<3+4=7>>7 eggs.
The ducks lay 16 eggs per day, so she has 16-7=<<16-7=9>>9 eggs left.
She sells them at the market for $2 each, so she makes 9*2=<<9*2=18>>18 dollars.
#### 18</s>
        <s>[INST]A bookshelf has 5 shelves, and each shelf can hold 


In [14]:
def parse_answer(response):
    return response.split('</s>')[0].split('####')[-1].strip().replace(',','')

parsed_answer = parse_answer(answer)
parsed_answer

'18'

In [15]:
ground_truth = "Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer\u2019s market.\n#### 18"

parsed_ground_truth = parse_answer(ground_truth)
parsed_ground_truth

'18'

In [16]:
from tqdm import tqdm

total=correct=0

for qa in tqdm(test_data):
    question = nshot_prompt(data=train_data, n=8, question=qa['question'])
    answer = get_response(question, max_new_tokens=512)
    parsed_answer = parse_answer(answer)
    parsed_ground_truth = parse_answer(qa['answer'])

    total += 1
    if parsed_answer != parsed_ground_truth:
        print(f"Question: {qa['question']}")
        print(f"Answer: {answer.split('</s>')[0]}")
        print(f"Parsed Answer: {parsed_answer}")
        print(f"Ground Truth: {parsed_ground_truth}")
        print(f"Current Accuracy: {correct/total:.3f}")
        print()
    else:
        correct += 1

print(f"Total Accuracy: {correct/total:.3f}")

  0%|          | 2/1319 [00:59<10:57:38, 29.96s/it]

In [None]:
print(f"Total Accuracy: {correct/total:.3f}")

Total Accuracy: 0.685
