# Evaluate ICL Methods on Selected Datasets

In [2]:
import pandas as pd
import torch
from datasets import load_dataset, Dataset, DatasetDict
from openicl import DatasetReader, PromptTemplate, TopkRetriever, PPLInferencer, AccEvaluator

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# from wilds import get_dataset
# from wilds.common.data_loaders import get_train_loader
# dataset = get_dataset(dataset="amazon", download=True)
# display(dataset)

# for record in dataset.get_subset("train"):
#     print(record)
#     break

# train_dict = {
#     "text": [],
#     "label": [],
#     "reviewer_id": []
# }
# for text, label, reviewer_id in dataset.get_subset("train"):
#     train_dict["text"].append(text)
#     train_dict["label"].append(label.item())
#     train_dict["reviewer_id"].append(reviewer_id.tolist())

# test_dict = {
#     "text": [],
#     "label": [],
#     "reviewer_id": []
# }
# for text, label, reviewer_id in dataset.get_subset("test"):
#     test_dict["text"].append(text)
#     test_dict["label"].append(label.item())
#     test_dict["reviewer_id"].append(reviewer_id.tolist())

# full_dataset = DatasetDict()
# full_dataset["train"] = Dataset.from_pandas(pd.DataFrame(train_dict))
# full_dataset["test"] = Dataset.from_pandas(pd.DataFrame(test_dict))
# full_dataset

# display(full_dataset["train"].to_pandas().head())
# display(full_dataset["test"].to_pandas().head())

# full_dataset

In [4]:
# full_dataset["test"].to_pandas().value_counts("label")

In [5]:
# Define a DatasetReader, with specified column names where input and output are stored.
dataset = load_dataset("ag_news")
dataset["train"] = dataset["train"].select(range(1000))
dataset["test"] = dataset["test"].select(range(1000))
data = DatasetReader(dataset, input_columns=["text"], output_column="label")
tp_dict = {
    0: "</E>World (0) Article: </text>",
    1: "</E>Sports (1) Article: </text>",
    2: "</E>Business (2) Article: </text>",
    3: "</E>Sci/Tech (3) Article: </text>",
}

template = PromptTemplate(tp_dict, {'text': '</text>'}, ice_token='</E>')

# TopK Retriever
retriever = TopkRetriever(data, ice_num=2, index_split='train', test_split='test')

# # Define a Inferencer
# inferencer = PPLInferencer(model_name='distilgpt2')

# # Inference
# predictions = inferencer.inference(retriever, ice_template=template, output_json_filename='sst2')
# print(predictions)

Found cached dataset ag_news (/home/kyle/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)
100%|██████████| 2/2 [00:00<00:00, 26.30it/s]
[2023-04-26 21:51:30,191] [openicl.icl_retriever.icl_topk_retriever] [INFO] Creating index for index set...
  0%|          | 0/1000 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 1000/1000 [00:27<00:00, 36.13it/s]


In [6]:

# Entry in which we wish to edit
input_entry = dataset["test"][999]
display(input_entry)

# input_ids.shape
example_embedding = retriever.model.encode([input_entry["text"]], convert_to_numpy=True)
distances, indices = retriever.index.search(example_embedding, 3)

print(f"embedding shape: {example_embedding.shape}")
print(f"distances: {distances}")
print(f"indices: {indices}")

for index in indices[0]:
    print()
    print(dataset["train"][int(index)]["text"])
    print(dataset["train"][int(index)]["label"])

{'text': 'Gunmen ambush Chalabi #39;s convoy, wound 2 BAGHDAD - Gunmen ambushed the convoy of former Iraqi governing council president Ahmed Chalabi on Wednesday, wounding two of his bodyguards, aides said.',
 'label': 0}

embedding shape: (1, 768)
distances: [[0.5458629 0.535096  0.52293  ]]
indices: [[989 505 569]]

2 More Turkish Men Taken Hostage in Iraq (AP) AP - Armed assailants attacked a convoy of Turkish trucks delivering supplies to U.S. forces in Iraq and took two Turkish drivers hostage, their company said Monday.
0

Mortars Mark Opening of Iraqi Political Conference  BAGHDAD (Reuters) - Insurgents fired mortars at a meeting  where Iraqi leaders met to pick an interim national assembly  Sunday, killing at least two people in a grim reminder of the  country's tortured path toward democracy.
0

Mortars Mark Opening of Iraqi Political Conference (Reuters) Reuters - Insurgents fired mortars at a meeting\where Iraqi leaders met to pick an interim national assembly\Sunday, killing at least two people in a grim reminder of the\country's tortuous path toward democracy.
0


In [7]:
# template.generate_label_prompt_item(entry={'text': input_entry["text"]}, label=input_entry["label"])
template.generate_ice_item(input_entry, input_entry["label"])

'World (0) Article: Gunmen ambush Chalabi #39;s convoy, wound 2 BAGHDAD - Gunmen ambushed the convoy of former Iraqi governing council president Ahmed Chalabi on Wednesday, wounding two of his bodyguards, aides said.'

In [8]:
retriever.model.get_sentence_embedding_dimension()

768

In [56]:
from faiss import IndexIDMap, IndexFlatIP
import numpy as np

edit_index = IndexIDMap(IndexFlatIP(retriever.model.get_sentence_embedding_dimension()))
edits = []

edit_entry = {
    "text": input_entry["text"],
    "label": 3,
}
edits.append(edit_entry)
display(edits)

edit_index.add_with_ids(example_embedding, np.array([0]))


[{'text': "Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.",
  'label': 3}]

In [60]:
edit_index.search(example_embedding, 3)

# I can add elements to the index where the key is the embedding and the value is an index where I can lookup the edit
# I can't rely on the inferences, but rather must iterat through the test set myself

(array([[ 9.9999982e-01, -3.4028235e+38, -3.4028235e+38]], dtype=float32),
 array([[ 0, -1, -1]]))

## LLaMA 7B Test

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
import torch

device = torch.device("cuda")
model = AutoModelForCausalLM.from_pretrained("decapoda-research/llama-7b-hf", torch_dtype=torch.float16).to(device)

  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)lve/main/config.json: 100%|██████████| 427/427 [00:00<00:00, 3.56MB/s]
Downloading (…)model.bin.index.json: 100%|██████████| 25.5k/25.5k [00:00<00:00, 54.3MB/s]
Downloading (…)l-00001-of-00033.bin: 100%|██████████| 405M/405M [00:06<00:00, 66.9MB/s]
Downloading (…)l-00002-of-00033.bin: 100%|██████████| 405M/405M [00:05<00:00, 73.5MB/s]
Downloading (…)l-00003-of-00033.bin: 100%|██████████| 405M/405M [00:04<00:00, 94.7MB/s]
Downloading (…)l-00004-of-00033.bin: 100%|██████████| 405M/405M [00:05<00:00, 77.4MB/s]
Downloading (…)l-00005-of-00033.bin: 100%|██████████| 405M/405M [00:05<00:00, 75.0MB/s]
Downloading (…)l-00006-of-00033.bin: 100%|██████████| 405M/405M [00:04<00:00, 93.7MB/s]
Downloading (…)l-00007-of-00033.bin: 100%|██████████| 405M/405M [00:04<00:00, 94.2MB/s]
Downloading (…)l-00008-of-00033.bin: 100%|██████████| 405M/405M [00:05<00:00, 78.2MB/s]
Downloading (…)l-00009-of-00033.bin: 100%|██████████| 405M/405M [00:05

In [2]:
from transformers import LlamaTokenizer
tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")

Downloading tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 3.82MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 2.00/2.00 [00:00<00:00, 18.3kB/s]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [3]:
input_text = "Offense Needs Work There were few offensive highlights during Virginia Tech's first scrimmage"
input = tokenizer(input_text, return_tensors="pt").to(device)
output = model.generate(**input, 
               max_new_tokens=1,
               num_return_sequences=1, 
               temperature=0)

tokenizer.decode(output[0])




" ⁇  Offense Needs Work There were few offensive highlights during Virginia Tech's first scrimmage of"