In [2]:
import torch
import os
import numpy as np
from dsicl.utils import set_seed
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

### Load model directly from huggingface

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
model_path = "princeton-nlp/Sheared-LLaMA-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="right")
model = AutoModelForCausalLM.from_pretrained(model_path).half().to(torch.device('cuda'))

Read data via get_data_reader(task_name) \
Data is organized in json format：\
{ \
   'data_info':{\
    'data_name': 'rte_train',\
    'label_space': ['entailment', 'not_entailment'],\
    'columns': ['premise', 'hypothesis', 'label']\
   },\
   'data':[\
    sample1,\
    sample2,\
    ...\
   ]\
}\

In [1]:
from dsicl.data_reader import read_demo_benchmark
task = 'cr'
trainset, testset = read_demo_benchmark(task=task, seed=0)

In [4]:
print(trainset[0])

{'sentence': "it 's not as stylized as a sony or samsung .", 'label': 'negative'}


### Given a template and an optional prompt header, Initialize a prompter to for generating context

In [6]:
from dsicl.prompter import Prompter
from dsicl.template import DEMO_TEMPLATE, DEMO_HEAD
template = DEMO_TEMPLATE[task]
head = DEMO_HEAD[task]

prompter = Prompter(template=template, head=head, sep='\n\n')

In [7]:
print(prompter.generate_context(trainset[:2], testset[0]))

Classify the reviews based on whether their sentiment type is positive or negative.

Review:it 's not as stylized as a sony or samsung .
Sentiment:negative

Review:the 6600 will provide similar service in more developed areas of the states and not as well in more remote areas .
Sentiment:negative

Review:a ) feel cheap -- the plastic is feels like it would break very easily , and it definately wouldnt survive a drop
Sentiment:


In [8]:
from dsicl.ranker import DEmORanker
set_seed(0)
original_demos = trainset.get_subset(8, balance=True)

In [9]:
original_demos

<dsicl.data_reader.DataReader at 0x7fa5b563ef80>

In [10]:
ranker = DEmORanker(model, tokenizer, prompter, trainset.data_info['label_space'])

In [12]:
demos_l = [ranker.rank(original_demos, d, len(original_demos)) for d in testset]

100%|██████████| 100/100 [00:12<00:00,  7.84it/s]


### Initialize an inferencer for inference. Currently, it supports the direct inferencer (which directly obtains the probability on the label using greedy decoding) and the generation inferencer.

In [13]:
from dsicl.inferencer import DirectInferencer

labels = trainset.data_info['label_space']

direct_inferencer = DirectInferencer(model, tokenizer, prompter, labels)

### Inference

In [14]:
y_p = direct_inferencer.batch_infer(demos_l, testset)
y_p[:5]

100%|██████████| 256/256 [00:08<00:00, 30.29it/s]


['negative', 'positive', 'negative', 'negative', 'positive']

### Evaluate

In [15]:
from dsicl.evaluator import Evaluator
y_t = [testset[_]['label'] for _ in range(len(testset))]
evaluator = Evaluator()

In [16]:
evaluator.acc_evaluate(y_p, y_t)

0.7421875

In [17]:
evaluator.f1_evaluate(y_p, y_t)

0.7226162332545312