In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Use CPU for inference

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from datasets import load_dataset
from tqdm import tqdm

def generate_prompt(train_examples, test_example):
    prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options: {}. Answer: {}"

    prompt = prompt_template_head
    for idx in range(5):
        sample = train_examples[idx]
        ctx = sample["ctx"]
        endings = sample["endings"]
        answer = endings[int(sample["label"])]

        prompt += prompt_template_example.format(ctx, endings, answer)

    prompt += prompt_template_example.format(
        test_example["ctx"], test_example["endings"], ""
    )
    return prompt

def gemma_few_shot_pipeline():
    set_seed(42)

    # Load the gemma-2b model and tokenizer
    model_name = "google/gemma-2b-it"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    hellaswag = load_dataset("Rowan/hellaswag")
    train_examples = hellaswag["train"]
    test_examples = hellaswag["validation"]
#     for key in hellaswag.keys() :
#         print(key)

    correct_answers = 0
    total_examples = len(test_examples)
#     print(test_examples[3])

    # Use tqdm to show progress bar
    for test_example in tqdm(test_examples):
        prompt = generate_prompt(train_examples, test_example)
        inputs = tokenizer(prompt, return_tensors="pt")

        # Generate
        generate_ids = model.generate(inputs.input_ids, max_length=800)
        answer = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

        # Evaluate the answer
#         print(test_example["label"])
        if answer == test_example["endings"][int(test_example["label"])]:
            correct_answers += 1
#         print(correct_answers)
    accuracy = correct_answers / total_examples
    print("Accuracy:", accuracy)

if __name__ == "__main__":
    gemma_few_shot_pipeline()


## Use GPU for inference

In [5]:
!pip install transformers accelerate optimum

Collecting optimum
  Downloading optimum-1.17.1-py3-none-any.whl.metadata (18 kB)
Collecting coloredlogs (from optimum)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->optimum)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading optimum-1.17.1-py3-none-any.whl (407 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m407.1/407.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: humanfriendly, coloredlogs, optimum
Successfully installed coloredlogs-15.0.1 humanfriendly-10.0 opt

In [6]:
from huggingface_hub import login

login("INSERTTOKEN")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [7]:
!pip install -q -U git+https://github.com/huggingface/transformers.git

In [8]:
!pip install "torch>=2.1.1" -U
!pip install aqlm[gpu,cpu]



In [9]:
def last_occurrence_indices(sentence):
    first_index = sentence.rfind("Answer: ")
    
    if first_index != -1:
        last_index = first_index + 8
        return last_index
    else:
        return None
    
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from datasets import load_dataset
from tqdm import tqdm
from optimum.bettertransformer import BetterTransformer

# from optimum.bettertransformer import BetterTransformer


def generate_prompt(train_examples, test_example):
    prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options:\n{}\n{}\n{}\n{}\nAnswer: {}"

    prompt = prompt_template_head
    for idx in range(5):
        sample = train_examples[idx]
        ctx = sample["ctx"]
        endings = sample["endings"]
        answer = endings[int(sample["label"])]

        prompt += prompt_template_example.format(ctx,endings[0],endings[1],endings[2],endings[3],answer)
        prompt +="\n"
    prompt += prompt_template_example.format(
        test_example["ctx"], test_example["endings"][0], test_example["endings"][1], test_example["endings"][2], test_example["endings"][3], ""
    )
    return prompt

def gemma_few_shot_pipeline():
    set_seed(42)

    # Load the Gemma-2b model and tokenizer
    model_name = "google/gemma-2b-it"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")
    # convert the model to BetterTransformer
    model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", device_map="auto")

# input_text = "Write me a poem about Machine Learning."
    hellaswag = load_dataset("Rowan/hellaswag")
    train_examples = hellaswag["train"]
    test_examples = hellaswag["validation"]
#     for key in hellaswag.keys() :
#         print(key)

    correct_answers = 0
    total_examples = len(test_examples)
#     print(test_examples[3])

    # Use tqdm to show progress bar
    ctr = 0
    for test_example in tqdm(test_examples):
        prompt = generate_prompt(train_examples, test_example)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        # Generate
#         print("Prompt Length: ",inputs.data['input_ids'].size(dim=1),"\n")
        input_length = inputs.data['input_ids'].size(dim=1) + 30
        generate_ids = model.generate(inputs.input_ids, max_length = input_length)
        answer = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
#         print(answer)
        end_index = last_occurrence_indices(answer)
#         print("GENERATED ANSWER:",answer[end_index:])
#         print("ACTUAL ANSWER:",test_example["endings"][int(test_example["label"])])

        # Evaluate the answer
#         print(test_example["label"])
        if answer[end_index:] == test_example["endings"][int(test_example["label"])]:
            correct_answers += 1
        if ctr % 1000 == 0:
            print("Correct Answers:", correct_answers)
        ctr+=1
    accuracy = correct_answers / total_examples
    print("Accuracy:", accuracy)


if __name__ == "__main__":
    gemma_few_shot_pipeline()

2024-02-27 22:35:38.401557: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-27 22:35:38.401676: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-27 22:35:38.534066: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/888 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/4.36k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.53k [00:00<?, ?B/s]

Downloading and preparing dataset hellaswag/default (download: 68.18 MiB, generated: 62.18 MiB, post-processed: Unknown size, total: 130.36 MiB) to /root/.cache/huggingface/datasets/Rowan___hellaswag/default/0.1.0/512a66dd8b1b1643ab4a48aa4f150d04c91680da6a4096498a5e5f799623d5ae...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/12.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.04M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.14M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/39905 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10003 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10042 [00:00<?, ? examples/s]

Dataset hellaswag downloaded and prepared to /root/.cache/huggingface/datasets/Rowan___hellaswag/default/0.1.0/512a66dd8b1b1643ab4a48aa4f150d04c91680da6a4096498a5e5f799623d5ae. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 1/10042 [00:02<7:33:09,  2.71s/it]

Correct Answers: 0


 10%|▉         | 1001/10042 [17:04<2:23:22,  1.05it/s]

Correct Answers: 20


 20%|█▉        | 2001/10042 [33:45<2:07:41,  1.05it/s]

Correct Answers: 40


 30%|██▉       | 3001/10042 [50:46<1:55:05,  1.02it/s]

Correct Answers: 61


 40%|███▉      | 4001/10042 [1:12:57<2:23:14,  1.42s/it]

Correct Answers: 69


 50%|████▉     | 5001/10042 [1:37:03<1:52:43,  1.34s/it]

Correct Answers: 74


 60%|█████▉    | 6001/10042 [2:01:09<1:32:09,  1.37s/it]

Correct Answers: 78


 70%|██████▉   | 7001/10042 [2:25:16<1:14:23,  1.47s/it]

Correct Answers: 84


 80%|███████▉  | 8001/10042 [2:49:23<50:42,  1.49s/it]  

Correct Answers: 90


 90%|████████▉ | 9001/10042 [3:13:25<25:55,  1.49s/it]

Correct Answers: 98


100%|█████████▉| 10001/10042 [3:37:13<00:58,  1.42s/it]

Correct Answers: 104


100%|██████████| 10042/10042 [3:38:13<00:00,  1.30s/it]

Accuracy: 0.010356502688707428





In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from datasets import load_dataset
from tqdm import tqdm
from optimum.bettertransformer import BetterTransformer

# from optimum.bettertransformer import BetterTransformer


def generate_prompt(train_examples, test_example):
    # prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    # prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options: {}. Answer: {}"
    prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options:\n A. '{}'\n B. '{}'\n C. '{}'\n D. '{}'\n Answer: {}"

    prompt = prompt_template_head
    for idx in range(4,10):  # Adjusted to display 4 training examples
        sample = train_examples[idx]
        ctx = sample["ctx"]
        endings = sample["endings"]
        label = int(sample["label"])
        answer = chr(ord('A') + label)  # Convert numeric label to alphabetic choice
        answer+="\n"
        # Display choices as A, B, C, D
        prompt += prompt_template_example.format(ctx, endings[0], endings[1], endings[2], endings[3], answer)

    # Display the test example
    label = ""  # Test example doesn't have a label
    prompt += prompt_template_example.format(
        test_example["ctx"], test_example["endings"][0], test_example["endings"][1], test_example["endings"][2], test_example["endings"][3], label
    )
    return prompt

def gemma_few_shot_pipeline():
    set_seed(42)

    # Load the gemma-2b model and tokenizer
    model_name = "google/gemma-2b-it"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")
    # convert the model to BetterTransformer
    model = BetterTransformer.transform(model)

    hellaswag = load_dataset("Rowan/hellaswag")
    train_examples = hellaswag["train"]
    test_examples = hellaswag["validation"]
#     for key in hellaswag.keys() :
#         print(key)

    correct_answers = 0
    total_examples = len(test_examples)
#     print(test_examples[3])

    # Use tqdm to show progress bar
    ctr = 0
    for test_example in tqdm(test_examples):
        prompt = generate_prompt(train_examples, test_example)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        # Generate
        generate_ids = model.generate(inputs.input_ids, max_length=700)
        answer = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        print(answer)
        # Evaluate the answer
#         print(test_example["label"])
        if answer[-2] == test_example["endings"][int(test_example["label"])]:
            correct_answers += 1
#         if ctr % 1000 == 0:
#             print("Correct Answers:", correct_answers)
#         print("Correct Answers:", correct_answers)
        ctr+=1
    accuracy = correct_answers / total_examples
    print("Accuracy:", accuracy)


if __name__ == "__main__":
    gemma_few_shot_pipeline()
