In [1]:
!pip show transformers

Name: transformers
Version: 4.39.0.dev0
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /work/pi_mccallum_umass_edu/aparashar_umass_edu/.conda/envs/as01/lib/python3.10/site-packages
Editable project location: /work/pi_dhruveshpate_umass_edu/aparashar_umass_edu/transformers_local
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: 


In [1]:
qa_pairs = [
    {
        "question": "Do hamsters provide food for any animals?",
        "answer": "Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals. So the answer is yes."
    },
    {
        "question": "Could Brooke Shields succeed at University of Pennsylvania?",
        "answer": "Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania. So the answer is yes."
    },
    {
        "question": "Yes or no: Hydrogen’s atomic number squared exceeds number of Spice Girls?",
        "answer": "Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen’s atomic number squared is less than 5. So the answer is no."
    },
    {
        "question": "Yes or no: Is it common to see frost during some college commencements?",
        "answer": "College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements. So the answer is yes."
    },
    {
        "question": "Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?",
        "answer": "The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam. So the answer is no."
    },
    {
        "question": "Yes or no: Would a pear sink in water?",
        "answer": "The density of a pear is about 0.6 g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float. So the answer is no."
    }
]


In [2]:
def run_experiments():
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", use_fast=True)
    model = AutoModelForCausalLM.from_pretrained("google/gemma-2").to('cuda')
    torch.device = 'cuda'
    # set pad_token_id to eos_token_id because GPT2 does not have a EOS token
    model.config.pad_token_id = model.config.eos_token_id
    model.generation_config.pad_token_id = model.config.eos_token_id
    data = random.sample(load_hf_data_set('validation','wmt14','de-en')['translation'],200)
    
    default_fwd_instruction = f"Yes or No: {question}"
    default_fwd_input_prefix = "Yes or No: "
    default_fwd_target_prefix = "Answer: "
    demos = random.sample(qa_pairs,5)
    demos = [[default_fwd_input_prefix + d["question"]] + [d["answer"]] for d in demos]
    prompt_arr = demos.append([default_fwd_instruction,default_fwd_input_prefix])
    for N in [5, 10]:
        for temp in [0.5, 1.0]:
            test(prompt_arr, model, tokenizer, data, default_fwd_target_prefix, N, temp)


In [3]:
def test(prompt_arr, model, tokenizer, data, default_fwd_target_prefix, N = 10, temp = 0.5):
    output_dict = {}
    for idx, d in enumerate(tqdm(data, desc="Predicting")):
        prompt_arr.append(d['de'])
        prompt_arr.append(default_fwd_target_prefix)
        input_prompt = (' ').join(prompt_arr)
        input_ids = tokenizer(input_prompt, truncation=True, return_tensors="pt").input_ids.to('cuda')
        outputs_arith = model.generate(
            input_ids = input_ids,
            num_return_sequences = N,
            do_sample = True,
            max_new_tokens = 100,
            temperature = temp,
            num_beams = 1,
            use_arithmetic = True
            )
        outputs_sample = model.generate(
            input_ids = input_ids,
            num_return_sequences = N,
            do_sample = True,
            temperature = temp,
            num_beams = 1,
            max_new_tokens = 100,
            use_arithmetic = False
            )
        output_dict[idx] = {}
        output_dict[idx]['gt'] = d['en']
        output_dict[idx]['arithmetic'] = [i.split('English sentence: ')[-1].strip('\n') for i in tokenizer.batch_decode(outputs_arith, skip_special_tokens=True)]
        output_dict[idx]['sampling'] = [i.split('English sentence: ')[-1].strip('\n') for i in tokenizer.batch_decode(outputs_sample, skip_special_tokens=True)]
        output_dict[idx]['bleu_score_arith'], output_dict[idx]['n_gram_div_arith'] = calculate_bleu_and_ngram_diversity(output_dict[idx]['gt'], output_dict[idx]['arithmetic'])
        output_dict[idx]['bleu_score_sample'], output_dict[idx]['n_gram_div_sample'] = calculate_bleu_and_ngram_diversity(output_dict[idx]['gt'], output_dict[idx]['sampling'])
    with open(f'flan_t5_wmt14_de-en_{N}__temp_{temp}_output.json','a+') as f:
        json.dump(output_dict,f)


In [None]:
with open('filepath',r) as f:
    strat_data = json.load(f)['examples']
demo_questions = [d['question'] for d in demos]
strat_data = [k:v for k,v in strat_data.items() if k not in demo_questions]
