In [1]:
from transformers import pipeline
import evaluate
import dspy



In [5]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=30, test_size=0)

colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(rm=colbertv2_wiki17_abstracts)
retrieve = dspy.Retrieve()
p = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
contexts = [retrieve(x).passages for x in dataset.dev]
predicted_answers = [p(question=x['question'], context=contexts[i][0])['answer'] for i, x in enumerate(dataset.dev)]
answers = [x['answer'] for x in dataset.dev]

acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)

acc


{'exact_match': 0.0}

In [6]:
p = pipeline("question-answering", model="deepset/tinyroberta-squad2")
predicted_answers = [p(question=x['question'], context=contexts[i][0])['answer'] for i, x in enumerate(dataset.dev)]
acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)
acc

{'exact_match': 0.0}

In [7]:
from dspy.datasets.gsm8k import GSM8K

gsm8k = GSM8K()
gsm8k.train = gsm8k.train[:30]
gsm8k.dev = gsm8k.dev[:25]

100%|██████████| 7473/7473 [00:00<00:00, 58646.94it/s]
100%|██████████| 1319/1319 [00:00<00:00, 56264.74it/s]


In [10]:
p = pipeline("question-answering", model="deepset/tinyroberta-squad2")
contexts = [retrieve(x).passages for x in gsm8k.dev]
answers = [x['answer'] for x in gsm8k.dev]
predicted_answers = [p(question=x['question'], context=contexts[i][0])['answer'] for i, x in enumerate(gsm8k.dev)]
acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)
acc

{'exact_match': 0.0}

In [12]:
p = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
predicted_answers = [p(question=x['question'], context=contexts[i][0])['answer'] for i, x in enumerate(gsm8k.dev)]
acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)
acc

{'exact_match': 0.0}

In [23]:
from imdb import Imdb

# dataset = load_dataset("imdb")
dataset = Imdb()
p = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

answers = [x['answer'] for x in dataset.dev]
predicted_answers = []
count = 0
for x in dataset.dev:
    try:
        predicted_answers.append(p(x['text'])['label'])
    except:
        count+=1
        predicted_answers.append('-1')

# "distilbert-base-uncased-finetuned-sst-2-english",
acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)
acc

100%|██████████| 25000/25000 [00:00<00:00, 72613.60it/s]
100%|██████████| 25000/25000 [00:00<00:00, 71181.35it/s]
Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{'exact_match': 0.0}

In [24]:
p = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

predicted_answers = []
count = 0
for x in dataset.dev:
    try:
        predicted_answers.append(p(x['text'])['label'])
    except:
        count+=1
        predicted_answers.append('-1')

# "distilbert-base-uncased-finetuned-sst-2-english",
acc = evaluate.load('exact_match').compute(references=answers, predictions=predicted_answers)
acc

Token indices sequence length is longer than the specified maximum sequence length for this model (1388 > 512). Running this sequence through the model will result in indexing errors


{'exact_match': 0.0}