In [9]:
import os
import torch
import spacy  # make sure to run python -m spacy download en_core_web_sm
from selfcheckgpt.modeling_selfcheck import SelfCheckNLI

In [10]:
# setup pytorch
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # for multi-GPU systems, force single GPU
if torch.cuda.is_available():
    device_map = "cuda:0"  # force single, first GPU
    device_type = "cuda"
elif torch.backends.mps.is_available():
    device_map = "auto"
    device_type = "mps"
else:
    device_map = "auto"
    device_type = "cpu"

print(f"Using device: {device_type}")

Using device: mps


In [11]:
# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_sm")

In [13]:
# LLM's text (e.g. GPT-3 response) to be evaluated at the sentence level  & Split it into sentences
passage = "Michael Alan Weiner (born March 31, 1942) is an American radio host. He is the host of The Savage Nation."
sentences = [
    sent.text.strip() for sent in nlp(passage).sents
]  # spacy sentence tokenization
print(sentences)
[
    "Michael Alan Weiner (born March 31, 1942) is an American radio host.",
    "He is the host of The Savage Nation.",
]

# Other samples generated by the same LLM to perform self-check for consistency
sample1 = "Michael Alan Weiner (born March 31, 1942) is an American radio host. He is the host of The Savage Country."
sample2 = "Michael Alan Weiner (born January 13, 1960) is a Canadian radio host. He works at The New York Times."
sample3 = "Michael Alan Weiner (born March 31, 1942) is an American radio host. He obtained his PhD from MIT."

selfcheck_nli = SelfCheckNLI(
    device=device_type
)  # set device to 'cuda' if GPU is available

sent_scores_nli = selfcheck_nli.predict(
    sentences=sentences,  # list of sentences
    sampled_passages=[sample1, sample2, sample3],  # list of sampled passages
)
print(sent_scores_nli)
# [0.334014 0.975106 ] -- based on the example above

['Michael Alan Weiner (born March 31, 1942) is an American radio host.', 'He is the host of The Savage Nation.']


tokenizer_config.json:   0%|          | 0.00/400 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/883 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

SelfCheck-NLI initialized to device mps
[0.33401403 0.97510584]
