In [None]:
from lm_polygraph.estimators import *
from lm_polygraph.utils.model import WhiteboxModel
from lm_polygraph.utils.dataset import Dataset
from lm_polygraph.utils.processor import Logger
from lm_polygraph.utils.manager import UEManager
from lm_polygraph.utils.manager import estimate_uncertainty
from lm_polygraph.ue_metrics import ReversedPairsProportion, PredictionRejectionArea, RiskCoverageCurveAUC
from lm_polygraph.generation_metrics import RougeMetric, BartScoreSeqMetric, ModelScoreSeqMetric, ModelScoreTokenwiseMetric

# Specify HyperParameters

In [None]:
model_name_or_path = "bigscience/bloomz-3b"
device = "cuda:0"
dataset_name = "../workdir/data/triviaqa.csv"
batch_size = 4
seed = 42

# Initialize Model

In [None]:
model = WhiteboxModel.from_pretrained(
    model_name_or_path,
    device=device,
)

# Train and Eval Datasets

In [None]:
dataset = Dataset.load(
    dataset_name,
    'question', 'answer',
    batch_size=batch_size,
)
dataset.subsample(16, seed=seed)

X_train, X_test, y_train, y_test = dataset.train_test_split(
    test_size=0.7,
    seed=seed,
    split="eval"
)
train_dataset = Dataset(x=X_train, y=y_train, batch_size=batch_size)
train_dataset.subsample(16, seed=seed)

# Metric, UE Metric, and UE Methods

In [None]:
ue_methods = [MaximumSequenceProbability(), 
              SemanticEntropy(),
              MahalanobisDistanceSeq("decoder"),]

ue_metrics = [RiskCoverageCurveAUC()]

metrics = [RougeMetric('rougeL'),
           BartScoreSeqMetric('rh'),]

loggers = [Logger()] 

# Initialize UE Manager

In [None]:
man = UEManager(
    dataset,
    model,
    ue_methods,
    metrics,
    ue_metrics,
    loggers,
    train_data=train_dataset,
)

# Compute Results

In [None]:
results = man()

In [None]:
for key in results.keys():
    print(f"UE Score: {key[1]}, Metric: {key[2]}, UE Metric: {key[3]}, Score: {results[key]:.3f}")