In [1]:
import torch
import os
import numpy as np
from dsicl.utils import set_seed
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

### Load model directly from huggingface

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "princeton-nlp/Sheared-LLaMA-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="right")
model = AutoModelForCausalLM.from_pretrained(model_path).half().to(torch.device('cuda'))

Read data via get_data_reader(task_name) \
Data is organized in json format：\
{ \
   'data_info':{\
    'data_name': 'rte_train',\
    'label_space': ['yes', 'no'],\
    'columns': ['premise', 'hypothesis', 'label']\
   },\
   'data':[\
    sample1,\
    sample2,\
    ...\
   ]\
}\

In [2]:
from dsicl.data_reader import read_demo_benchmark
task = 'trec'
trainset, testset = read_demo_benchmark(data_path='./DEmO_data', task=task, seed=0)

In [3]:
print(trainset[0])

{'sentence': 'How did serfdom develop in and then leave Russia ?', 'label': 'Description'}


### Given a template and an optional prompt header, Initialize a prompter to for generating context

In [12]:
from dsicl.prompter import Prompter
from dsicl.template import DEMO_TEMPLATE, DEMO_HEAD
template = DEMO_TEMPLATE[task]
head = DEMO_HEAD[task]

prompter = Prompter(template=template, head=head, sep='\n\n')

In [None]:
print(prompter.generate_context(trainset[:2], testset[0]))

In [14]:
from dsicl.ranker import DEmORanker
set_seed(0)
original_demos = trainset.get_subset(24, balance=True)

In [15]:
original_demos

<dsicl.data_reader.DataReader at 0x7f4ef9e04100>

In [16]:
ranker = DEmORanker(model, tokenizer, prompter, trainset.data_info['label_space'])

In [17]:
demos_l = [ranker.rank(original_demos, d, len(original_demos)) for d in testset]

100%|██████████| 100/100 [00:16<00:00,  6.17it/s]


### Initialize an inferencer for inference. Currently, it supports the direct inferencer (which directly obtains the probability on the label using greedy decoding) and the generation inferencer.

In [18]:
from dsicl.inferencer import DirectInferencer

labels = trainset.data_info['label_space']

direct_inferencer = DirectInferencer(model, tokenizer, prompter, labels)

### Inference

In [19]:
y_p = direct_inferencer.batch_infer(demos_l, testset)
y_p[:5]

  0%|          | 0/256 [00:00<?, ?it/s]

100%|██████████| 256/256 [00:13<00:00, 19.69it/s]


['no', 'yes', 'no', 'no', 'no']

### Evaluate

In [20]:
from dsicl.evaluator import Evaluator
y_t = [testset[_]['label'] for _ in range(len(testset))]
evaluator = Evaluator()

In [21]:
evaluator.acc_evaluate(y_p, y_t)

0.55859375

In [22]:
evaluator.f1_evaluate(y_p, y_t)

0.5580475135589336