# Evaluate ICL Methods on Selected Datasets

In [2]:
from datasets import load_dataset
from openicl import DatasetReader, PromptTemplate, TopkRetriever, PPLInferencer, AccEvaluator

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define a DatasetReader, with specified column names where input and output are stored.
datset = load_dataset("ag_news")
data = DatasetReader(datset, input_columns=["text"], output_column="label")
tp_dict = {
    0: "</E>World (0) Article: </text>",
    1: "</E>Sports (1) Article: </text>",
    2: "</E>Business (2) Article: </text>",
    3: "</E>Sci/Tech (3) Article: </text>",
}

template = PromptTemplate(tp_dict, {'text': '</text>'}, ice_token='</E>')
# display(template.generate_item(dataset[4590], output_field='label'))
# display(template.generate_item(dataset[6174], output_field='label'))
# display(template.generate_item(dataset[2190], output_field='label'))
# display(template.generate_item(dataset[4983], output_field='label'))

# TopK Retriever
retriever = TopkRetriever(data, ice_num=2, index_split='train', test_split='test')

# Define a Inferencer
inferencer = PPLInferencer(model_name='distilgpt2')

# Inference
predictions = inferencer.inference(retriever, ice_template=template, output_json_filename='sst2')
print(predictions)

Found cached dataset ag_news (/home/kyle/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)
100%|██████████| 2/2 [00:00<00:00, 523.31it/s]
[2023-04-25 17:30:37,745] [openicl.icl_retriever.icl_topk_retriever] [INFO] Creating index for index set...
  0%|          | 0/120000 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
100%|██████████| 120000/120000 [21:21<00:00, 93.65it/s] 
[2023-04-25 17:52:02,823] [openicl.icl_retriever.icl_topk_retriever] [INFO] Embedding test set...
  0%|          | 0/7600 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.

[2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 2, 3, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 3, 1, 1, 0, 3, 0, 1, 0, 1, 0, 2, 2, 2, 2, 0, 2, 2, 0, 1, 1, 3, 0, 3, 0, 0, 1, 0, 3, 3, 3, 0, 3, 1, 0, 1, 0, 1, 0, 1, 2, 3, 0, 0, 2, 0, 0, 3, 0, 2, 3, 2, 1, 1, 1, 2, 0, 2, 1, 2, 3, 3, 0, 2, 0, 1, 0, 3, 3, 3, 3, 3, 3, 1, 3, 3, 2, 1, 2, 0, 3, 0, 3, 2, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 2, 1, 1, 1, 0, 1, 0, 2, 2, 2, 1, 1, 0, 1, 2, 3, 0, 0, 2, 2, 2, 0, 0, 3, 3, 2, 3, 0, 2, 3, 1, 3, 1, 2, 1, 1, 3, 2, 0, 3, 0, 1, 3, 3, 0, 0, 0, 2, 2, 0, 1, 2, 1, 2, 3, 3, 0, 1, 1, 1, 2, 1, 3, 1, 0, 1, 1, 1, 3, 2, 2, 3, 1, 0, 0, 3, 1, 2, 2, 0, 0, 2, 0, 1, 1, 2, 3, 2, 1, 2, 1, 0, 2, 2, 1, 0, 3, 2, 2, 3, 3, 3, 0, 2, 0, 1, 2, 2, 3, 2, 3, 3, 3, 1, 0, 2, 3, 0, 3, 1, 1, 1, 1, 1, 0, 2, 3, 1, 3, 3, 1, 2, 3, 2, 1, 1, 3, 1, 0, 0, 2, 0, 1, 1, 3, 3, 0, 1, 3, 0, 2, 1, 2, 1, 2, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 3, 2, 2, 0, 2, 3, 1, 2, 2, 0, 1, 3, 0, 3, 0, 3, 3, 2, 3, 1, 0, 

In [4]:
score = AccEvaluator().score(predictions=predictions, references=data.references)
print(score)

{'accuracy': 0.8913157894736842}


In [5]:
# sst2_dataset = load_dataset('gpt3mix/sst2')

# template = PromptTemplate(template={
#                                         0: 'Positive Movie Review: </text>',
#                                         1: 'Negative Movie Review: </text>' 
#                                     },
#                           column_token_map={'text' : '</text>'} 
#            )

# entry = sst2_dataset['validation'][322]
# display(f'entry:\n{entry}\n')

# # Generate ouput
# output = template.generate_item(entry, output_field='label')
# display(f'output:\n{output}')