# 1. General Settings

### 1.1 Import libraries

In [1]:
### bugs for transformers(4.28.1), update to newest version 4.31.0
from dataclasses import dataclass, field
from typing import Optional
import torch
import re
import numpy as np
import torch
import random
from accelerate import Accelerator
from datasets import load_dataset
import pandas as pd

from tqdm import tqdm
from transformers import Adafactor, AutoTokenizer, HfArgumentParser, pipeline
from transformers import LlamaForCausalLM, LlamaTokenizer
from transformers import (
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel

[2023-09-04 23:00:04,112] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


### 1.2 Random Seed

In [2]:
def set_seed(seed: int):
    """
   A function to fix random seed in `numpy`,`random`,  and `torch`.

    Args:
        seed (`int`): The seed to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
set_seed(1000)

### 1.3 Metrics

In [49]:
# Reference: https://www.nltk.org/

import operator
from functools import reduce
from math import fabs
from random import shuffle

try:
    from scipy.stats.stats import betai
except ImportError:
    betai = None

from nltk.util import LazyConcatenation, LazyMap



def accuracy(reference, test):
    """
    Given a list of reference values and a corresponding list of test
    values, return the fraction of corresponding values that are
    equal.  In particular, return the fraction of indices
    ``0<i<=len(test)`` such that ``test[i] == reference[i]``.

    :type reference: list
    :param reference: An ordered list of reference values.
    :type test: list
    :param test: A list of values to compare against the corresponding
        reference values.
    :raise ValueError: If ``reference`` and ``length`` do not have the
        same length.
    """
    if len(reference) != len(test):
        raise ValueError("Lists must have the same length.")
    return sum(float(x) == float(y) for x, y in zip(reference, test) ) / len(test)



def precision(reference, test):
    """
    Given a set of reference values and a set of test values, return
    the fraction of test values that appear in the reference set.
    In particular, return card(``reference`` intersection ``test``)/card(``test``).
    If ``test`` is empty, then return None.

    :type reference: set
    :param reference: A set of reference values.
    :type test: set
    :param test: A set of values to compare against the reference set.
    :rtype: float or None
    """
    if not hasattr(reference, "intersection") or not hasattr(test, "intersection"):
        raise TypeError("reference and test should be sets")

    if len(test) == 0:
        return None
    else:
        return len(reference.intersection(test)) / len(test)



def recall(reference, test):
    """
    Given a set of reference values and a set of test values, return
    the fraction of reference values that appear in the test set.
    In particular, return card(``reference`` intersection ``test``)/card(``reference``).
    If ``reference`` is empty, then return None.

    :type reference: set
    :param reference: A set of reference values.
    :type test: set
    :param test: A set of values to compare against the reference set.
    :rtype: float or None
    """
    if not hasattr(reference, "intersection") or not hasattr(test, "intersection"):
        raise TypeError("reference and test should be sets")

    if len(reference) == 0:
        return None
    else:
        return len(reference.intersection(test)) / len(reference)



def f_measure(reference, test, alpha=0.5):
    """
    Given a set of reference values and a set of test values, return
    the f-measure of the test values, when compared against the
    reference values.  The f-measure is the harmonic mean of the
    ``precision`` and ``recall``, weighted by ``alpha``.  In particular,
    given the precision *p* and recall *r* defined by:

    - *p* = card(``reference`` intersection ``test``)/card(``test``)
    - *r* = card(``reference`` intersection ``test``)/card(``reference``)

    The f-measure is:

    - *1/(alpha/p + (1-alpha)/r)*

    If either ``reference`` or ``test`` is empty, then ``f_measure``
    returns None.

    :type reference: set
    :param reference: A set of reference values.
    :type test: set
    :param test: A set of values to compare against the reference set.
    :rtype: float or None
    """
    p = precision(reference, test)
    r = recall(reference, test)
    if p is None or r is None:
        return None
    if p == 0 or r == 0:
        return 0
    return 1.0 / (alpha / p + (1 - alpha) / r)


### 1.4 Prompt format/ examples

In [4]:
svamp_format = """[INST] «SYS»\n
Given an arithmetic question, generate thoughts about the question step by step and then only give the answer as a number. Please follow the format below:

Thoughts:
<Step by Step Thoughts>
Answer:
<Number>

\n«/SYS»
Qestion:
{}
Thoughts:[/INST]"""

In [5]:
svamp_format.format('test')

'[INST] «SYS»\n\nGiven an arithmetic question, generate thoughts about the question step by step and then only give the answer as a number. Please follow the format below:\n\nThoughts:\n<Step by Step Thoughts>\nAnswer:\n<Number>\n\n\n«/SYS»\nQestion:\ntest\nThoughts:[/INST]'

### 1.5 Dataset

In [6]:
def build_test_dataset(
    dataset_name="/root/autodl-tmp/SVAMP",prompt_examples=svamp_format,prompt_format='default'
):
    """
    Build dataset for training. This builds the dataset from `load_dataset`, one should
    customize this function to train the model on its own dataset.

    Args:
        dataset_name (`str`):
            The name of the dataset to be loaded.
        prompt_examples ('str'):
            The few shot examples for testing
        prompt_format ('str'):
            The format of the prompting for testing

    Returns:
        dataset for test of checkpoints of llms
    """


    ds_test = load_dataset(dataset_name, split="test")
    original_columns = ds_test.column_names


    def preprocess_function(examples):

        body=examples["Body"]
        question=examples["Question"]
        if prompt_format=='default':
            query = svamp_format.format( body +' '+ question )

        return {"query": query,"full_question":body+' '+question }
    

    ds_test = ds_test.map(
        preprocess_function,
        batched=False,
        #remove_columns=original_columns,
    )


    print(ds_test)
    return ds_test

In [7]:
# We retrieve the dataloader by calling the `build_dataset` function.
dataset = build_test_dataset(dataset_name="/root/autodl-tmp/SVAMP")
prompt_test=dataset["query"]
questions_test=dataset["full_question"]
answer_test=dataset["Answer"]

HF google storage unreachable. Downloading and preparing it from source


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Dataset({
    features: ['Equation', 'Answer', 'Type', 'Body', 'Question', 'ID', 'query', 'full_question'],
    num_rows: 300
})


In [9]:
prompt_test[1]

'[INST] «SYS»\n\nGiven an arithmetic question, generate thoughts about the question step by step and then only give the answer as a number. Please follow the format below:\n\nThoughts:\n<Step by Step Thoughts>\nAnswer:\n<Number>\n\n\n«/SYS»\nQestion:\nPaul got a box of some crayons for his birthday. During the school year he gave 52 crayons to his friends while he lost 535 crayons. If he only had 492 crayons left How many crayons had been lost or given away?\nThoughts:[/INST]'

### 1.6 Function to Process Responses

In [53]:
ANS_RE = re.compile(r"Answer: .*?(\$?)(\-?[0-9\.\,]+)")
ANS_RE =re.compile(r"Answer:.*?(\$?)(\-?[0-9\.\,]+)", re.DOTALL)

INVALID_ANS = "[invalid]"
# extract the numerical answer for arithmetic dataset of the default prompt format
def _extract_answer(completion: str):
    match = ANS_RE.search(completion)
    if match:
        match_str = match.group(2).strip()
        match_str = match_str.replace(",", "")
        return match_str
    else:
        return INVALID_ANS



### 1.7 Tokenizer

In [10]:
# tokenizer for llama
model_name='/root/autodl-tmp/Llama-2-7b-chat-hf'
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

### 1.8 Generate responses to questions

In [13]:
"""
def generate_responses(model,tokenizer,prompt_test=prompt_test,answer_test=answer_test,test_epochs=1,batch_size=2):


    # because of sampling of tokens, we can increase epochs of test for more precise evaluation
    test_epochs=test_epochs

    outputs_text=[]
    # the prompt and answer correspondong to output_text
    prompts_for_outputs=[]
    answers_for_outputs=[]

    batch_size = batch_size  

    for epoch in tqdm(range(test_epochs)):
        for i in tqdm(range(0, len(prompt_test), batch_size)):  # Increment by batch_size

            # Slice the data to create a batch
            batch_questions = prompt_test[i:i+batch_size]
            batch_answers = answer_test[i:i+batch_size]

            batch_inputs = tokenizer(batch_questions, padding=True, return_tensors="pt", truncation=True, max_length=2048).to('cuda')

            batch_outputs = model.generate(**batch_inputs,    
                                     max_new_tokens=256,
                                     top_k=50,
                                     temperature=1.0,
                                     top_p=0.95,
                                     do_sample=True,
                                     repetition_penalty=1.1)
            batch_input_length = batch_inputs.input_ids.shape[1]
            batch_generated_tokens = batch_outputs[:, batch_input_length:]
            batch_output_text = tokenizer.batch_decode(batch_generated_tokens)
            print(batch_output_text)
        
        
            outputs_text.extend(batch_output_text)
            prompts_for_outputs.extend(batch_questions)
            answers_for_outputs.extend(batch_answers)
    return outputs


outputs=generate_responses(model=base_model,tokenizer=tokenizer)
"""

In [26]:
def generate_responses(generator,prompt_test=prompt_test,answer_test=answer_test,questions_test=questions_test,test_epochs=1,batch_size=4):
    generator=generator

    # if using sampling of tokens, we can increase epochs of test for more precise evaluation(law of large numbers)
    test_epochs=test_epochs

    outputs=[]
    prompts_for_outputs=[]
    questions_for_outputs=[]
    answers_for_outputs=[]
    

    batch_size = batch_size  

    for epoch in tqdm(range(test_epochs)):
        for i in tqdm(range(0, len(prompt_test), batch_size)):  # Increment by batch_size

            # Slice the data to create a batch
            batch_prompts = prompt_test[i:i+batch_size]
            batch_questions=questions_test[i:i+batch_size]
            batch_answers = answer_test[i:i+batch_size]

            batch_generations = generator(batch_prompts,batch_size=batch_size,return_full_text=False)
            print(batch_generations)
            outputs.extend([gen[0]['generated_text'] for gen in batch_generations])
            prompts_for_outputs.extend(batch_prompts)
            questions_for_outputs.extend(batch_questions)
            answers_for_outputs.extend(batch_answers)
    return outputs,questions_for_outputs,answers_for_outputs,prompts_for_outputs

# 2.base model

### 2.1 Load base model and builld pipeline

In [11]:
# load base model
model_name='/root/autodl-tmp/Llama-2-7b-chat-hf'
base_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                  torch_dtype=torch.float16,
                                                  device_map=0,
                                                 )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [16]:
gen_kwargs=dict(
    max_new_tokens=256,
    top_k=50,
    temperature=1.0,
    top_p=0.95,
    do_sample=True,
    repetition_penalty=1.1
)


# Create the pipeline for base model. To make results comparable, we use greedy search to eliminate the influence of sampling
base_generator= pipeline("text-generation", model=base_model, tokenizer=tokenizer,
                         max_new_tokens=512,
                         #top_k=50,
                         temperature=1.0,
                         #top_p=0.95,
                         do_sample=False,
                         #repetition_penalty=1.1,
                         #device=0,
                        )

### 2.2 Generate reponses by base model

In [27]:
base_responses,base_response_questions,base_response_answers,base_prompts=generate_responses(base_generator,batch_size=10)

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/30 [00:00<?, ?it/s][A
  3%|▎         | 1/30 [00:09<04:42,  9.75s/it][A

[[{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\n1. Let's start by identifying the given information:\n* Mary has already added 5 cups of flour.\n* The recipe calls for 6 cups of flour.\n* The recipe also calls for 8 cups of sugar.\n* The recipe calls for 7 cups of salt.\n2. Now, we need to find out how many more cups of sugar Mary needs to add than cups of salt. Let's do this by subtracting the amount of flour she has already added from the total amount of flour needed:\n6 cups of flour - 5 cups of flour = 1 cup of flour remaining\nNow, we need to find out how many cups of sugar are left to add after subtracting the amount of flour remaining from the total amount of sugar needed:\n8 cups of sugar - 1 cup of flour = 7 cups of sugar remaining\n3. Finally, we can see that Mary needs to add 7 cups of sugar more than cups of salt to complete the recipe.\nAnswer: 7\n\nI hope this helps! Let me know if you have any questions."}], [{'genera


  7%|▋         | 2/30 [00:18<04:08,  8.87s/it][A

[[{'generated_text': '\nThoughts:\n\n* Step 1: Identify the given information: Lewis earns $403 per week during the 233 weeks of harvest, and he has to pay $49 rent every week.\n* Step 2: Calculate the total amount of money Lewis earns during harvest season: 233 weeks x $403 per week = $93,530.\n* Step 3: Subtract the amount of rent Lewis has to pay each week: $93,530 - $49 per week = $89,530.\nAnswer: $89,530.'}], [{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n\n* Jessie weighed 92 kilograms initially.\n* She lost 56 kilograms in the first week.\n* She lost 99 kilograms in the second week.\n\nNow, let's calculate Jessie's weight after the first week of jogging:\n\n* Initial weight = 92 kg\n* Loss in first week = 56 kg\n* New weight after first week = Initial weight - Loss in first week = 92 kg - 56 kg = 36 kg\n\nSo, Jessie weighed 36 kilograms after the first week of jogging.\nAnswer: 36 kg"}], [{'generated_text': '\nThoughts:\n\n1


 10%|█         | 3/30 [00:25<03:36,  8.02s/it][A

[[{'generated_text': "\nThoughts:\nLet's break down the information given in the question:\n\n1. Jerry had some action figures on a shelf in his room.\n2. Later, he added 2 more action figures to the shelf.\n3. He removed 7 of the old action figures from the shelf.\n4. Now, there are 10 action figures on the shelf.\n\nSo, if Jerry had initially X action figures on the shelf, and he added 2 more and removed 7, then the total number of action figures he has now is X + 2 - 7 = 10.\nTo find out how many action figures Jerry had initially on the shelf, we can solve for X in the equation:\nX + 2 - 7 = 10\nX = 10 - 7 = 3\nSo, Jerry had 3 action figures on the shelf initially.\nAnswer: 3"}], [{'generated_text': "\nThoughts:\n\n1. Let's assume the harvesting process is continuous and doesn't stop for any reason.\n2. The question asks how many days it will take to harvest 24 sacks of oranges, given that they harvest 8 sacks per day.\n3. To find out, we need to divide the number of sacks to be ha


 13%|█▎        | 4/30 [00:39<04:38, 10.70s/it][A

[[{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the given information:\n\t* Mary has already added 7 cups of flour.\n\t* The recipe calls for 10 cups of flour.\n\t* The recipe also calls for 2 cups of sugar.\n\t* Mary needs to add more flour than sugar.\n2. To find out how many more cups of flour Mary needs to add, we can subtract the amount of flour she has already added from the total amount of flour needed:\n\t* 10 cups - 7 cups = 3 cups\nSo, Mary needs to add 3 more cups of flour.\nAnswer: 3"}], [{'generated_text': "\nThoughts:\nLet's break down the problem step by step:\n\nStep 1: We know that the mailman has to give 32 pieces of junk mail to each of the 55 blocks.\nStep 2: We are also given that the mailman gives 8 mails to each house in a block.\nStep 3: So, the total number of mails the mailman gives to each block is 32 x 8 = 256.\nStep 4: Since there are 55 blocks in total, the total number of mails the mailman gives is 256 x 55 = 14,000.\nStep 5: To find ou


 17%|█▋        | 5/30 [00:49<04:13, 10.16s/it][A

[[{'generated_text': "\nThoughts:\nLet's first identify the information given in the question:\n\n* Danny found 30 bottle caps at the park.\n* Danny threw away 63 old bottle caps.\n* Now Danny has 42 bottle caps in his collection.\n\nSo, the difference between the number of bottle caps Danny found and the number of bottle caps he threw away is:\n63 - 30 = 33\n\nTherefore, Danny threw away 33 more bottle caps than he found at the park.\n\nAnswer: 33"}], [{'generated_text': "\nThoughts:\nLet's break down the information given in the question:\n\n* There were 15 roses in the vase initially.\n* Jessica threw away 33 roses from the vase.\n* She cut some more new roses from her flower garden to put in the vase.\n* Now, there are 17 roses in the vase.\nFrom this information, we can conclude that Jessica cut more roses than she threw away. To find out exactly how many more roses she cut, we need to subtract the number of roses she threw away from the total number of roses she added.\nAnswer: 1


 20%|██        | 6/30 [00:55<03:36,  9.04s/it][A

[[{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\n1. Bryan has 9 books in each of his 10 bookshelves, so he has a total of 9 x 10 = 90 books.\n2. Bryan also has 46 magazines in each of his 10 bookshelves, so he has a total of 46 x 10 = 460 magazines.\n3. To find the total number of magazines Bryan has, we need to add the number of magazines in each bookshelf: 460.\nAnswer: 460\n\nSo, Bryan has a total of 460 magazines."}], [{'generated_text': '\nThoughts:\n\n1. Josh had 19 marbles in his collection to start with.\n2. He lost 11 marbles, so he has 19 - 11 = 8 marbles left.\n3. He found 5 new marbles, so he has 8 + 5 = 13 marbles now.\nAnswer: 13'}], [{'generated_text': "\nThoughts:\nLet's break this question down step by step:\n\nDay 1: The ring toss game made $X dollars.\nDay 2: The ring toss game made $X dollars.\nDay 3: The ring toss game made $X dollars.\nDay 4: The ring toss game made $X dollars.\nDay 5: The ring toss game made $X


 23%|██▎       | 7/30 [01:02<03:07,  8.15s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n* There are 635 girls in the school.\n* There are 510 more boys than girls.\nSo, the total number of boys in the school is:\n635 + 510 =?\nAnswer: 1145"}], [{'generated_text': "\nThoughts:\n\n1. Let's assume that the Ferris wheel has already reached its maximum capacity of 16 people.\n2. Each seat can hold 14 people, so the total capacity of the Ferris wheel is 16 x 14 = 224 people.\n3. If there are already 16 people on the wheel, that means there are 224 - 16 = 208 people who can still join the wheel.\nAnswer: 208"}], [{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the information given in the question:\n\t* The book has 193 pages.\n\t* The book is divided into 15 chapters.\n\t* It took Frank 660 days to finish the book.\n2. To find out how many chapters Frank read per day, we need to divide the number of chapters by the number of days it took him to finish the book:


 27%|██▋       | 8/30 [01:10<03:03,  8.33s/it][A

[[{'generated_text': "\nThoughts:\n\n1. Let's count the number of storks and birds initially: 6 storks + 2 birds = 8.\n2. Now, 3 more birds have come to join them, so the total number of birds is 8 + 3 = 11.\n3. Therefore, there are 6 storks - 3 birds = 3 more storks than birds sitting on the fence.\nAnswer: 3."}], [{'generated_text': "\nThoughts:\n\nLet's start by using the information given in the question:\n\n* Paul had 136 books initially.\n* He sold some books in a garage sale, leaving him with 27 books.\n\nSo, the number of books Paul sold is:\n\n136 - 27 = 109\n\nTherefore, Paul sold 109 books in the garage sale.\n\nAnswer: 109"}], [{'generated_text': "\nThoughts:\n\n1. Let's first calculate the total number of seats on the Ferris wheel: 3 small seats + 7 large seats = 10 seats.\n2. Each small seat can hold 16 people, so the total number of people who can ride the Ferris wheel on small seats is 16 x 3 = 48 people.\n3. Now, let's calculate the number of people who can ride the Fe


 30%|███       | 9/30 [01:26<03:40, 10.49s/it][A

[[{'generated_text': '\nThoughts:\n\n1. Danny already has 21 bottle caps in his collection.\n2. He found some more bottle caps at the park.\n3. The total number of bottle caps he has now is 53.\n4. To find out how many bottle caps he found at the park, we need to subtract the number of bottle caps he already had (21) from the total number of bottle caps he has now (53).\nAnswer: 32'}], [{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\nStep 1: Understand the question\nThe question asks how many sacks of oranges they will have after 51 days of harvest. We are given that they harvest 74 sacks per day and discard 71 of them.\nStep 2: Identify the relevant information\nWe need to identify the number of days they will be harvesting oranges. In this case, it is 51 days.\nStep 3: Calculate the total number of sacks of oranges harvested\nWe know that they harvest 74 sacks per day, so the total number of sacks of oranges harvested in 51 days is:


 33%|███▎      | 10/30 [01:37<03:35, 10.75s/it][A

[[{'generated_text': "\nThoughts:\n\n* Let's start by identifying the information given in the question:\n\t+ Debby drank 109 water bottles per day.\n\t+ The bottles lasted for 74 days.\n* Now, let's think about how we can use this information to find the number of bottles Debby bought:\n\t+ If Debby drank 109 bottles per day and the bottles lasted for 74 days, then she must have bought a total of 74 x 109 = 7960 bottles.\n* So, the answer is 7960 bottles.\nAnswer: 7960"}], [{'generated_text': "\nThoughts:\nLet's first identify the information given in the question:\n* There are 11 different books in the 'crazy silly school' series.\n* There are 17 different movies in the 'crazy silly school' series.\n* If you read 7 of the books, you have read 7 books.\n* If you watched 21 of the movies, you have watched 21 movies.\nNow, let's calculate the difference between the number of movies you watched and the number of books you read:\nMovies watched - Books read = 21 - 7 = 14\nSo, you have rea


 37%|███▋      | 11/30 [01:45<03:07,  9.86s/it][A

[[{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\nLet's start by using the information given in the question to find the total number of sacks of oranges harvested per day:\n44 sacks of ripe oranges + 25 sacks of unripe oranges = 69 sacks of oranges per day\nNow, let's find the difference between the number of ripe oranges harvested and the number of unripe oranges harvested:\nNumber of ripe oranges harvested per day - Number of unripe oranges harvested per day = 44 - 25 = 19\nSo, there are 19 more sacks of ripe oranges harvested than unripe oranges harvested per day.\nAnswer: 19\n\nI hope this helps! Let me know if you have any questions."}], [{'generated_text': '\nThoughts:\n\n1. Paul had 65 crayons at the beginning of the school year.\n2. He gave 213 crayons to his friends during the school year.\n3. He lost 16 crayons.\n\nSo, the total number of crayons that Paul had at the end of the school year is:\n65 + 213 - 16 = 294\n\nAnswer


 40%|████      | 12/30 [01:52<02:43,  9.10s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n* The number of kids who stay home during summer break in Lawrence county is 907611.\n* The number of kids who go to camp is 455682.\nWe can start by dividing the number of kids who go to camp by the number of kids who stay home to find out the percentage of kids who go to camp:\n455682 / 907611 = 0.05\nNow, we can multiply the percentage by 100 to find out the actual number of kids who go to camp:\n0.05 x 100 = 5000\nSo, there are approximately 5000 kids who go to camp in Lawrence county.\nAnswer: 5000"}], [{'generated_text': '\nThoughts:\n\n1. Danny had 12 bottle caps in his collection to start with.\n2. He found 53 bottle caps at the park.\n3. So, the total number of bottle caps he has now is 12 + 53 = 65 bottle caps.\nAnswer: 65'}], [{'generated_text': '\nThoughts:\n\n1. Randy has 36 blocks to build a tower.\n2. If he had 59 blocks, he would have 23 blocks left (59 - 36 = 23).\nAns


 43%|████▎     | 13/30 [01:59<02:23,  8.47s/it][A

[[{'generated_text': "\nThoughts:\n\n1. Let's count the number of action figures Jerry had initially: 2\n2. Jerry added 4 more action figures to the shelf, so the total number of action figures now is: 2 + 4 = 6\n3. How many books did Jerry have initially? 10\n4. How many books did Jerry add? 0 (since the question doesn't mention any books being removed)\n5. Therefore, the number of books on the shelf now is: 10 + 0 = 10\nAnswer: 10"}], [{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\n1. Mary has already added 10 cups of sugar to the recipe, so she needs to add 1 cup more.\n2. The recipe calls for 9 cups of flour, and Mary has already added 12 cups. So, she needs to subtract 3 cups of flour.\n3. To find out how many more cups of sugar Mary needs to add, we need to subtract the number of cups of flour she needs to subtract from the total number of cups of sugar called for in the recipe.\n4. The recipe calls for 11 cups of sugar, and Ma


 47%|████▋     | 14/30 [02:13<02:40, 10.02s/it][A

[[{'generated_text': '\nThoughts:\n\n1. Rebecca has a total of eggs.\n2. She wants to split them into groups of 11 eggs each.\n3. Each group will have 2 eggs, so the total number of eggs in each group is 2.\n4. To find out how many eggs Rebecca has, we need to divide the total number of eggs by the number of groups she wants to make.\nAnswer: 22 eggs.'}], [{'generated_text': "\nThoughts:\n\n1. Let's assume that the number of bird families that migrated for the winter is equal to the number of bird families that remained near the mountain.\n2. Since 27 bird families flew away for the winter, the number of bird families that remained near the mountain is 41 - 27 = 14.\nAnswer: 14"}], [{'generated_text': "\nThoughts:\nLet's start by using the information given in the question to find the distance the frog jumped. We know that the grasshopper jumped 39 inches and the frog jumped 19 inches farther than the grasshopper, so the frog jumped 39 + 19 = 58 inches.\nNow, we know that the mouse jum


 50%|█████     | 15/30 [02:21<02:21,  9.43s/it][A

[[{'generated_text': "\nThoughts:\nLet's see, if there were initially 11 people on the train, and 8 more people got on, then the total number of people on the train now is... (counting on my fingers)...11 + 8 = 19 people! Yes, that's right! There are 19 people on the train now.\nAnswer: 19"}], [{'generated_text': "\nThoughts:\n\n1. Let's start by finding the total number of pages in the first chapter. We know it's 66 pages long, so the total number of pages is 66.\n2. Next, let's find the total number of pages in the second chapter. We know it's 35 pages long, so the total number of pages is now 66 + 35 = 101.\n3. Finally, let's find the total number of pages in the third chapter. We know it's 24 pages long, so the total number of pages is now 101 + 24 = 125.\nAnswer: 125 pages."}], [{'generated_text': "\nThoughts:\n\n1. Let's start by using the information given in the question: the chef had 43 apples initially and after making some pies, he had 2 apples left.\n2. To find out how many


 53%|█████▎    | 16/30 [02:29<02:07,  9.10s/it][A

[[{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the information given in the question:\n\t* Baker made 155 cakes.\n\t* Baker's friend bought 140 cakes from him.\n2. Now, let's think about how much cakes Baker would still have after his friend bought some:\n\t* If Baker made 155 cakes and his friend bought 140, that means Baker has 15 - 140 = -125 cakes left.\n\t* Wait, that can't be right! Baker can't have negative cakes! Let's try again.\n\t* If Baker made 155 cakes and his friend bought 140, that means Baker has 155 - 140 = 15 cakes left.\n3. Okay, so Baker has 15 cakes left after his friend bought some.\nAnswer: 15"}], [{'generated_text': '\nThoughts:\n\n1. Bobby initially had 36 pieces of candy.\n2. He ate 17 pieces of candy, so he has 36 - 17 = 19 pieces of candy left.\n3. Then he ate 15 more pieces of candy, so he has 19 - 15 = 4 pieces of candy left.\nAnswer: 4'}], [{'generated_text': "\nThoughts:\nLet's start by using the information given in the question: Ja


 57%|█████▋    | 17/30 [02:38<01:55,  8.91s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by using the information given in the question to find out how many crayons Paul had at the end of the school year. We know that he had 521 crayons initially and that he didn't lose any erasers. So, the number of crayons he had at the end of the school year is:\n521 - 66 = 455\nNow, we need to find out how many crayons Paul had at the beginning of the school year. We are told that he had 154 crayons left at the end of the school year, so the number of crayons he had at the beginning of the school year is:\n455 - 154 = 301\nTherefore, Paul had 301 crayons at the beginning of the school year.\nAnswer: 301"}], [{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n* The pickers fill 8 drums of grapes per day.\n* There are 36 pickers.\n* The total number of drums of grapes to be filled is 240.\nNow, let's think step by step how many days it will take to fill 240 drums of grapes:\nStep 1: Divide the


 60%|██████    | 18/30 [02:45<01:42,  8.55s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by using the information given in the question: David did 44 push-ups in gym class today. David did 9 more push-ups than Zachary.\nSo, Zachary did:\n44 + 9 = 53 push-ups\n\nAnswer: 53"}], [{'generated_text': '\nThoughts:\n\n1. Kelly has 80 Nintendo games initially.\n2. She found 31 more Nintendo games, so now she has 80 + 31 = 111 games.\n3. To find out how many games Kelly needs to give away to have 6 games left, we need to subtract the number of games she has already given away from the total number of games she has.\n4. So, Kelly needs to give away 111 - 6 = 105 games.\nAnswer: 105'}], [{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the number of girls in the school: 315 girls x 1 = 315\n2. Now, let's find the number of boys in the school: 309 boys x 1 = 309\n3. Next, let's find the total number of students in the school: 315 + 309 = 624\n4. Now, let's find the number of teachers in the school: 772 teachers x 1 = 772\n


 63%|██████▎   | 19/30 [02:55<01:37,  8.86s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n* David did 40 more push-ups than Zachary.\n* David did 17 less crunches than Zachary.\n* Zachary did 34 push-ups and 62 crunches.\nFrom these statements, we can conclude that David did:\n* 34 push-ups (since Zachary did 34)\n* 40 + 17 = 57 crunches (since David did 40 more crunches than Zachary)\nTherefore, David did 57 crunches in gym class today.\nAnswer: 57"}], [{'generated_text': "\nThoughts:\n\n1. Let's start by counting the total number of eggs Rebecca has: 9 eggs.\n2. Now, let's count the number of bananas Rebecca has: 99 bananas.\n3. Next, let's count the number of marbles Rebecca has: 27 marbles.\n4. Now, let's divide the total number of eggs by 3 to find out how many groups Rebecca can create: 9 eggs / 3 = 3 groups.\n5. So, Rebecca can create 3 groups of eggs.\nAnswer: 3"}], [{'generated_text': "\nThoughts:\nLet's first calculate the total number of sacks of oranges harveste


 67%|██████▋   | 20/30 [03:08<01:40, 10.05s/it][A

[[{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\n1. Let's start by identifying the given information:\n* Mary has already added 12 cups of flour.\n* The recipe calls for 6 cups of flour.\n* The recipe also calls for 11 cups of sugar and 9 cups of salt.\n2. Now, let's think about how much sugar Mary needs to add:\n* If Mary has already added 12 cups of flour, that means she has 6 cups of flour left to add (since the recipe calls for 6 cups of flour).\n* The recipe calls for 11 cups of sugar, so Mary needs to add 5 cups of sugar (since she has 6 cups of flour left to add).\n3. Next, let's think about how much salt Mary needs to add:\n* The recipe calls for 9 cups of salt.\n* Since Mary has already added 12 cups of flour, she has 3 cups of flour left to add (since the recipe calls for 6 cups of flour).\n* Therefore, Mary needs to add 3 cups of salt (since the recipe calls for 9 cups of salt).\n4. Now, let's summarize the information:\n*


 70%|███████   | 21/30 [03:24<01:48, 12.04s/it][A

[[{'generated_text': "  Sure, I'd be happy to help you with that! Here are my thoughts on the question:\nThoughts:\nStep 1: Identify the information given in the question:\n* The Razorback t-shirt shop makes $87 per t-shirt sold.\n* They sold a total of 95 t-shirts during the Arkansas game and the Texas Tech game.\n* They sold 47 t-shirts during the Arkansas game.\nStep 2: Determine the total amount of money made from selling t-shirts during the Arkansas game:\n* Total amount of money made = Number of t-shirts sold x Price per t-shirt\n= 47 t-shirts x $87 per t-shirt\n= $4,099\nStep 3: Find out how much money the Razorback t-shirt shop made from selling t-shirts during the Texas Tech game:\n* Total amount of money made = Number of t-shirts sold x Price per t-shirt\n= 58 t-shirts x $87 per t-shirt\n= $5,262\nStep 4: Find the total amount of money the Razorback t-shirt shop made from selling t-shirts during both games:\n* Total amount of money made = Total amount of money made during the


 73%|███████▎  | 22/30 [03:33<01:27, 10.89s/it][A

[[{'generated_text': "\nThoughts:\nLet's break down the problem step by step:\n\n1. We know that Jack received 4 emails in the morning.\n2. The problem states that Jack received a total of 5 emails in the day.\n3. Since the morning is a part of the day, we can say that the total number of emails Jack received in the day is equal to the number of emails he received in the morning plus the number of emails he received in the afternoon.\n4. So, the number of emails Jack received in the afternoon is equal to 5 - 4 = 1.\nAnswer: 1"}], [{'generated_text': "\nThoughts:\n\n1. Let's count the number of pencils Faye has: 14 rows x 11 pencils in each row = 14 x 11 = 154 pencils.\n2. Let's make sure we don't count each pencil more than once: since each pencil is in 14 rows, we need to divide the total number of pencils by 14 to get the number of pencils in each row: 154 pencils / 14 rows = 11 pencils in each row.\n3. Now, let's count the number of pencils in each row: 11 pencils in each row x 14 r


 77%|███████▋  | 23/30 [03:42<01:13, 10.48s/it][A

[[{'generated_text': "\nThoughts:\nLet's break down the information given in the question:\n\n* Jack received 4 emails in the morning.\n* Jack received 5 emails in the afternoon.\n* Jack received 8 emails in the evening.\n\nSo, the total number of emails Jack received is: 4 + 5 + 8 = 17.\n\nAnswer: 17"}], [{'generated_text': '\nThoughts:\n\n1. Sofia asked 310 students to suggest specific types of food.\n2. 185 students suggested adding mashed potatoes.\n3. Therefore, the number of students who suggested bacon is 185 - 185 = 0.\n\nAnswer: 0'}], [{'generated_text': "\nThoughts:\nLet's break down the information given in the question:\n\n* Lawrence county has 898051 kids in total.\n* During summer break, 629424 kids go to camp.\n* So, the number of kids who stay home is 898051 - 629424 = 268627 kids.\nAnswer: 268627\n\nPlease let me know if you have any further questions!"}], [{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n\n* Jake has 


 80%|████████  | 24/30 [03:52<01:00, 10.16s/it][A

[[{'generated_text': '\nThoughts:\n\n1. Allan brought 5 balloons to the park.\n2. Jake brought 6 balloons to the park.\n3. Jake then bought 3 more balloons at the park.\n4. So, the total number of balloons Jake brought to the park is 6 + 3 = 9 balloons.\nAnswer: 9'}], [{'generated_text': "  Sure, I'd be happy to help you with that! Here are my thoughts on the question:\nThoughts:\n* Let's start by identifying the information given in the question:\n* The person needs a total of 49 g of colors to paint the feathers.\n* The feathers require 288 g of wax to stick them together.\n* The person currently has 260 g of wax.\n* To find out how many grams of wax the person already has, we need to subtract the amount of wax they currently have from the total amount of wax required to stick the feathers together.\n* So, the answer is 288 g - 260 g = 28 g.\nAnswer: 28 g\n\nI hope this helps! Let me know if you have any other questions."}], [{'generated_text': "\nThoughts:\nLet's break down the prob


 83%|████████▎ | 25/30 [03:59<00:46,  9.23s/it][A

[[{'generated_text': "  Sure, I'd be happy to help! Here are my thoughts on the question:\nThoughts:\nLet's start by using the information given in the question to find the total number of sacks of oranges harvested per day:\n5 sacks of ripe oranges per day + 74 sacks of unripe oranges per day = 89 sacks of oranges per day\nNow, let's find the difference between the number of sacks of unripe oranges and ripe oranges harvested per day:\n74 sacks of unripe oranges per day - 5 sacks of ripe oranges per day = 69 sacks of unripe oranges per day\nSo, there are 69 more sacks of unripe oranges than ripe oranges harvested per day.\nAnswer: 69\n\nI hope this helps! Let me know if you have any questions."}], [{'generated_text': '\nThoughts:\n\n1. Jerry had 8 action figures on the shelf to start with.\n2. He added 4 more action figures later, so now he has 8 + 4 = 12 action figures on the shelf.\n3. He removed 5 old action figures, so now he has 12 - 5 = 7 action figures on the shelf.\nAnswer: 7'}


 87%|████████▋ | 26/30 [04:07<00:35,  8.88s/it][A

[[{'generated_text': "\nThoughts:\nLet's first identify the information given in the question:\n\n* There are 8 different movies in the 'crazy silly school' series.\n* There are 21 different books in the series.\n* If you read 7 of the books, that means you have read 7/21 = 1/3 of the books.\n* If you watched 4 of the movies, that means you have watched 4/8 = 1/2 of the movies.\nNow, we need to find out how many movies you still have to watch. To do this, we need to subtract the number of movies you have watched from the total number of movies in the series:\nTotal movies = 8\nMovies watched = 4\nNumber of movies still to watch = 8 - 4 = 4\nSo, you still have 4 movies to watch from the 'crazy silly school' series.\nAnswer: 4"}], [{'generated_text': "\nThoughts:\nLet's start by using the information given in the question to find the distance jumped by the frog:\nFrog jumped 33 inches farther than the grasshopper, so the frog jumped a total of 33 + 24 = 57 inches.\nNow, let's find the di


 90%|█████████ | 27/30 [04:17<00:27,  9.27s/it][A

[[{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n* There are 384 oranges in Philip's collection.\n* There are 192 bananas in Philip's collection.\n* The bananas are organized into 345 groups.\n* The oranges are organized into 16 groups.\nNow, let's think about how we can use this information to find the size of each group of oranges:\n* If there are 16 groups of oranges, and each group contains x oranges, then the total number of oranges in all 16 groups is 16x.\n* Since there are 384 oranges in total, the number of oranges in each group is 16x / 384 = x / 24.\nSo, each group of oranges contains x / 24 oranges.\nAnswer: x / 24."}], [{'generated_text': "\nThoughts:\nLet's break down the problem step by step:\n\nStep 1: We know that Jerry added 7 more action figures to the shelf later. So, the total number of action figures on the shelf now is 10 (initially + added).\nStep 2: We want to find out how many action figures Jerry had initia


 93%|█████████▎| 28/30 [04:27<00:18,  9.45s/it][A

[[{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the information given in the question:\n\t* Mom buys 28 packages.\n\t* Each package contains 56 white t-shirts in total.\n2. To find out how many white t-shirts each package has, we need to divide the total number of t-shirts in each package by the number of packages:\n\t* 56 white t-shirts / 28 packages =?\n3. Now, let's simplify the fraction:\n\t* 56 / 28 = 2\n4. So, each package has 2 white t-shirts.\nAnswer: 2"}], [{'generated_text': '\nThoughts:\n\n1. Dan spent $ 7 on the candy bar.\n2. Dan spent $ 6 on the chocolate.\n3. Dan spent a total of $ 13 on the candy bar and chocolate.\nAnswer: $ 13'}], [{'generated_text': '\nThoughts:\n\n1. Danny had 25 bottle caps in his collection earlier.\n2. He found 32 bottle caps at the park.\n3. To find out how many more bottle caps he found, we need to subtract the number he already had from the number he found.\n4. So, the number of bottle caps he found at the park is 32 - 25 = 


 97%|█████████▋| 29/30 [04:36<00:09,  9.32s/it][A

[[{'generated_text': '\nThoughts:\n\n1. Dan spent $2 to buy a candy bar.\n2. He spent $3 to buy a chocolate.\n3. So, the total amount he spent on both is $2 + $3 = $5.\n4. Therefore, he spent more money to buy the chocolate than he did to buy the candy bar.\nAnswer: $3'}], [{'generated_text': "\nThoughts:\n\n1. Let's start by identifying the information given in the question:\n\t* Tiffany had 7 bags of cans on Monday.\n\t* The next day, she found 12 more bags worth of cans.\n2. Let's convert the number of bags of cans from Monday to a numerical value:\n\t* 7 bags = 7 x 50 = 350 cans (since each bag contains 50 cans)\n3. Now, let's add the number of bags of cans Tiffany found on the next day to the number of bags she had on Monday:\n\t* 350 + 12 = 362 cans\n4. Therefore, Tiffany found 12 more bags of cans on the next day than she had on Monday.\nAnswer: 12"}], [{'generated_text': "\nThoughts:\nLet's start by identifying the information given in the question:\n\n* There are some baskets 


100%|██████████| 30/30 [04:47<00:00,  9.57s/it][A
100%|██████████| 1/1 [04:47<00:00, 287.02s/it]

[[{'generated_text': "\nThoughts:\n\n1. Let's start by counting the total number of bottles of both regular and diet soda: 22 + 61 = 83 bottles.\n2. Now, let's subtract the number of regular soda bottles from the total number of bottles: 83 - 22 = 61 bottles.\n3. Therefore, the grocery store had 61 more bottles of diet soda than regular soda.\nAnswer: 61"}], [{'generated_text': "\nThoughts:\nLet's start by using the information we know:\n* Paul had 551 crayons at some point during the school year.\n* He lost or gave away 551 crayons.\n* He only had 177 crayons left at the end of the school year.\nNow, let's think about how we can use this information to find out how many crayons Paul got for his birthday:\n* If Paul had 551 crayons at some point during the school year, and he lost or gave away 551 crayons, then he must have had 0 crayons at the beginning of the school year.\n* So, the number of crayons Paul got for his birthday is equal to the number of crayons he had at the beginning 




### 2.3 Process Responses for base_model

In [55]:
base_extracted_responses = [_extract_answer(response) for response in base_responses]

print('invalid rate:',sum(np.array(base_extracted_responses)=='[invalid]')/len(base_extracted_responses))
np.array(base_responses)[np.array(base_extracted_responses)=='[invalid]']
#extracted_responses
#_extract_answer("Answer: $-12.1333,1 and 2,2")

invalid rate: 0.0


array([], dtype='<U1360')

### 2.4 Record the results for Base Model

In [41]:
import pandas as pd

# Create a DataFrame with named columns
df_base_svamp = pd.DataFrame({'Prompt':base_prompts,'Question': base_response_questions, 'Response': base_responses,'extracted_response':base_extracted_responses, 'Answer': base_response_answers})

# Write to CSV
df_base_svamp.to_csv('test_zeroshot_base_svamp.csv', index=True)

### 2.5 calculate metrics for base model

In [56]:
svamp_base_accuracy=accuracy(reference=base_response_answers, test=base_extracted_responses)

In [57]:
svamp_base_accuracy

0.4533333333333333

# 3.fine-tuned model

In [8]:
# load fine-tuned model
new_model_name="/root/autodl-tmp/msc_ml/llama_2/ckpts_svamp/checkpoint_3000"
new_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                 torch_dtype=torch.float16,
                                                 device_map=0,
                                                )

new_model = PeftModel.from_pretrained(new_model, new_model_name)
new_model = new_model.merge_and_unload()

#model.add_adapter(lora_config, adapter_name="adapter_1")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

NameError: name 'base_model' is not defined

In [9]:
# Create the pipeline for new model
new_generator= pipeline("text-generation", model=new_model, tokenizer=tokenizer,
                         max_new_tokens=512,
                         #top_k=50,
                         temperature=1.0,
                         #top_p=0.95,
                         do_sample=False,
                         #repetition_penalty=1.1,
                         #device=0,
                       )

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [50]:
svamp_3000_accuracy=accuracy(reference=extracted_responses, test=response_answers)
svamp_3000_accuracy 

NameError: name 'response_answers' is not defined

In [13]:
import rich
from rich.table import Table
rich_table = Table(*["prompt"], title='1', show_lines=True)
for ix in range(2):
    rich_table.add_row("""[/INST][INST]""")
rich_table

In [44]:
ds_all = load_dataset("/root/autodl-tmp/SVAMP", split="all")

In [45]:
ds_train = load_dataset("/root/autodl-tmp/SVAMP", split="train")

In [48]:
sum(np.array(ds_train['Body'])==np.array(ds_all['Body'][:700]))

700

In [61]:
ds_train['Answer']

[145.0,
 19.0,
 3.0,
 198.0,
 63.0,
 322.0,
 30.0,
 192.0,
 26.0,
 17.0,
 1.0,
 20.0,
 4.0,
 2.0,
 7.0,
 70.0,
 345.0,
 266.0,
 54.0,
 807.0,
 3.0,
 12.0,
 23.0,
 31.0,
 1201565.0,
 89.0,
 574.0,
 8.0,
 146.0,
 1.0,
 14.0,
 2.0,
 7.0,
 12.0,
 21.0,
 14.0,
 2.0,
 51.0,
 2.0,
 15.0,
 20.0,
 3.0,
 7.0,
 42.0,
 756.0,
 11.0,
 5.0,
 12.0,
 65.0,
 1.0,
 111.0,
 1.0,
 27.0,
 4.0,
 17.0,
 13.0,
 10.0,
 77.0,
 1092.0,
 5.0,
 7.0,
 45.0,
 10.0,
 52.0,
 2.0,
 6.0,
 16.0,
 469.0,
 12518.0,
 1.0,
 2.0,
 131.0,
 92.0,
 640.0,
 42.0,
 7.0,
 25.0,
 1.0,
 2.0,
 11.0,
 15.0,
 20.0,
 8722.0,
 5.0,
 83.0,
 29.0,
 53.0,
 3.0,
 33.0,
 1.0,
 16.0,
 47.0,
 5590.0,
 1.0,
 1.0,
 49.0,
 9.0,
 78.0,
 34.0,
 7.0,
 24.0,
 34.0,
 10.0,
 14.0,
 10.0,
 15.0,
 621.0,
 1.0,
 2.0,
 14.0,
 8.0,
 2.0,
 14.0,
 89.0,
 20.0,
 7.0,
 1.0,
 16.0,
 21.0,
 3.0,
 5.0,
 9.0,
 76.0,
 28.0,
 30.0,
 19.0,
 2.0,
 22.0,
 7.0,
 42.0,
 14.0,
 24.0,
 8.0,
 27.0,
 31.0,
 2.0,
 21.0,
 4.0,
 80.0,
 3.0,
 2.0,
 7.0,
 1.0,
 4.0,
 4.0,
 32.0,
 60