# Evaluate ICL Methods on Selected Datasets

In [1]:
import pandas as pd
import torch
from datasets import load_dataset, Dataset, DatasetDict
from openicl import DatasetReader, PromptTemplate, TopkRetriever, PPLInferencer, AccEvaluator

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# from wilds import get_dataset
# from wilds.common.data_loaders import get_train_loader
# dataset = get_dataset(dataset="amazon", download=True)
# display(dataset)

# for record in dataset.get_subset("train"):
#     print(record)
#     break

# train_dict = {
#     "text": [],
#     "label": [],
#     "reviewer_id": []
# }
# for text, label, reviewer_id in dataset.get_subset("train"):
#     train_dict["text"].append(text)
#     train_dict["label"].append(label.item())
#     train_dict["reviewer_id"].append(reviewer_id.tolist())

# test_dict = {
#     "text": [],
#     "label": [],
#     "reviewer_id": []
# }
# for text, label, reviewer_id in dataset.get_subset("test"):
#     test_dict["text"].append(text)
#     test_dict["label"].append(label.item())
#     test_dict["reviewer_id"].append(reviewer_id.tolist())

# full_dataset = DatasetDict()
# full_dataset["train"] = Dataset.from_pandas(pd.DataFrame(train_dict))
# full_dataset["test"] = Dataset.from_pandas(pd.DataFrame(test_dict))
# full_dataset

# display(full_dataset["train"].to_pandas().head())
# display(full_dataset["test"].to_pandas().head())

# full_dataset

In [7]:
# full_dataset["test"].to_pandas().value_counts("label")

In [66]:
# Define a DatasetReader, with specified column names where input and output are stored.
dataset = load_dataset("ag_news")
dataset["train"] = dataset["train"].select(range(1000))
dataset["test"] = dataset["test"].select(range(1000))
data = DatasetReader(dataset, input_columns=["text"], output_column="label")
tp_dict = {
    0: "</E>World (0) Article: </text>",
    1: "</E>Sports (1) Article: </text>",
    2: "</E>Business (2) Article: </text>",
    3: "</E>Sci/Tech (3) Article: </text>",
}

template = PromptTemplate(tp_dict, {'text': '</text>'}, ice_token='</E>')
# display(template.generate_item(dataset[4590], output_field='label'))
# display(template.generate_item(dataset[6174], output_field='label'))
# display(template.generate_item(dataset[2190], output_field='label'))
# display(template.generate_item(dataset[4983], output_field='label'))

# TopK Retriever
retriever = TopkRetriever(data, ice_num=2, index_split='train', test_split='test')

# # Define a Inferencer
# inferencer = PPLInferencer(model_name='distilgpt2')

# # Inference
# predictions = inferencer.inference(retriever, ice_template=template, output_json_filename='sst2')
# print(predictions)

Found cached dataset ag_news (/home/kyle/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)
100%|██████████| 2/2 [00:00<00:00, 21.84it/s]
[2023-04-26 02:17:55,529] [openicl.icl_retriever.icl_topk_retriever] [INFO] Creating index for index set...
  0%|          | 0/1000 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 1000/1000 [00:11<00:00, 88.15it/s]


In [7]:
sequence = "They've caught his eye In quot;helping themselves, quot; Ricky Bryant, Chas Gessner, Michael Jennings, and David Patten did nothing Friday night to make Bill Belichick's decision on what to do with his receivers any easier."
# retriever.tokenizer(sequence, return_tensors="pt")

In [68]:
input_ids = retriever.tokenizer(sequence, return_tensors="pt")["input_ids"]
example_embedding = retriever.model.encode([sequence], convert_to_numpy=True)
# example_embedding
distances, indices = retriever.index.search(example_embedding, 3)
print(f"distances: {distances}")
print(f"indices: {indices}")

distances: [[0.4040242  0.34218064 0.3253032 ]]
indices: [[486 472 870]]


In [69]:
for index in indices[0]:
    print(dataset["train"][int(index)]["text"])
    print(dataset["train"][int(index)]["label"])
    # print(index)

Offense Needs Work There were few offensive highlights during Virginia Tech's first scrimmage of fall practice on Saturday.
1
Throwbacks: Gannon, Collins in good form Rich Gannon , the 2002 NFL MVP who was knocked out of the Raiders' loss to Kansas City last Oct. 20 and had shoulder surgery in November, was 9 for 15 for 69 yards in visiting Oakland's 33-30 exhibition win over the San Francisco 49ers last night.
1
Patriots Sign First-Round Pick Watson (AP) AP - The New England Patriots signed first-round draft pick Benjamin Watson on Monday, ending the tight end's lengthy holdout.
1


## LLaMA 7B Test

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
import torch

device = torch.device("cuda")
model = AutoModelForCausalLM.from_pretrained("decapoda-research/llama-7b-hf", torch_dtype=torch.float16).to(device)

Loading checkpoint shards: 100%|██████████| 33/33 [00:28<00:00,  1.17it/s]


In [5]:
from transformers import LlamaTokenizer
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")

Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 25.2MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 2.00/2.00 [00:00<00:00, 20.5kB/s]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [18]:
input_text = "Offense Needs Work There were few offensive highlights during Virginia Tech's first scrimmage"
input = tokenizer(input_text, return_tensors="pt").to(device)
output = model.generate(**input, 
               max_length=100, 
               num_beams=5, 
               num_return_sequences=1, 
               temperature=0, 
               top_k=50, 
               top_p=0.95, 
               repetition_penalty=1.0, 
               length_penalty=1.0, 
               no_repeat_ngram_size=3,
               bos_token_id=1, 
               pad_token_id=0, 
               eos_token_id=2, 
               decoder_start_token_id=1, 
               use_cache_for_decoding=None)

tokenizer.decode(output[0])


" ⁇  Offense Needs Work There were few offensive highlights during Virginia Tech's first scrimmage of the preseason. The Hokies' offense struggled to move the ball against the first-team defense, and quarterbacks Logan Thomas and Michael Brewer combined to go 10-for-26 with one touchdown and two interceptions. The offense did not score a touchdown until late in the third quarter, when Thomas"