In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install transformers accelerate optimum

Collecting optimum
  Downloading optimum-1.19.0-py3-none-any.whl.metadata (19 kB)
Collecting coloredlogs (from optimum)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->optimum)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading optimum-1.19.0-py3-none-any.whl (417 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.7/417.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: humanfriendly, coloredlogs, optimum
Successfully installed coloredlogs-15.0.1 humanfrien

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
from datasets import load_dataset
from tqdm import tqdm
from optimum.bettertransformer import BetterTransformer

# from optimum.bettertransformer import BetterTransformer

def last_occurrence_indices(sentence):
    first_index = sentence.rfind("Answer: ")
    
    if first_index != -1:
        last_index = first_index + 8
        return last_index
    else:
        return None
    
def generate_prompt(train_examples, test_example):
    # prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    # prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options: {}. Answer: {}"
    prompt_template_head = "Your task is to choose the correct completion for a given sentence from 4 available options.\n"
    prompt_template_example = "Sentence: '{}', Return the most likely ending for this sentence from these 4 options:\n A. '{}'\n B. '{}'\n C. '{}'\n D. '{}'\n Answer: {}"

    prompt = prompt_template_head
    for idx in range(4,10):  # Adjusted to display 4 training examples
        sample = train_examples[idx]
        ctx = sample["ctx"]
        endings = sample["endings"]
        label = int(sample["label"])
        answer = endings[label]  # Convert numeric label to alphabetic choice
        answer+="\n"
        # Display choices as A, B, C, D
        prompt += prompt_template_example.format(ctx, endings[0], endings[1], endings[2], endings[3], answer)

    # Display the test example
    label = ""  # Test example doesn't have a label
#     print(type(test_example))
    prompt += prompt_template_example.format(
        test_example["ctx"], test_example["endings"][0], test_example["endings"][1], test_example["endings"][2], test_example["endings"][3], label
    )
    return prompt

def opt_few_shot_pipeline():
    set_seed(42)

    # Load the OPT-350M model and tokenizer
    model_name = "facebook/opt-350m"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")

    hellaswag = load_dataset("Rowan/hellaswag")
    
    train_examples = hellaswag["train"]
    test_examples = hellaswag["validation"]
#     test_examples = test_examples.select(range(10))
    # Create empty dataframe to store results
    results_df = pd.DataFrame(columns=["Input Prompt", "Generated Output", "Expected Output"])

    correct_answers = 0
#     test_examples = test_examples.select(range(10))
    total_examples = len(test_examples)

    # Use tqdm to show progress bar
    for test_example in tqdm(test_examples):
        prompt = generate_prompt(train_examples, test_example)
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        # Generate
        input_length = inputs.data['input_ids'].size(dim=1) + 30
        generate_ids = model.generate(inputs.input_ids, max_length=input_length)
        answer = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

        # Extract generated answer
        end_index = last_occurrence_indices(answer)
        generated_answer = answer[end_index:]

        # Save results to dataframe
        new_row = pd.DataFrame({"Input Prompt": [prompt], "Generated Output": [answer], "Expected Output": [test_example["endings"][int(test_example["label"])]]})
        results_df = pd.concat([results_df, new_row], ignore_index=True)
        # Evaluate the answer 
        if generated_answer == test_example["endings"][int(test_example["label"])]:
            correct_answers += 1

    # Save dataframe as CSV
    results_df.to_csv("opt_few_shot_results_autocomplete.csv", index=False)

    accuracy = correct_answers / total_examples
    print("Accuracy:", accuracy)


if __name__ == "__main__":
    opt_few_shot_pipeline()


2024-04-16 19:01:04.308439: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-16 19:01:04.308542: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-16 19:01:04.444403: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Downloading data: 100%|██████████| 24.4M/24.4M [00:00<00:00, 30.2MB/s]
Downloading data: 100%|██████████| 6.11M/6.11M [00:01<00:00, 5.59MB/s]
Downloading data: 100%|██████████| 6.32M/6.32M [00:00<00:00, 15.2MB/s]


Generating train split:   0%|          | 0/39905 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10003 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10042 [00:00<?, ? examples/s]

100%|██████████| 10042/10042 [2:08:35<00:00,  1.30it/s]


Accuracy: 9.958175662218681e-05
