In [1]:
from IPython.core.debugger import set_trace

### Define lists of datasets, methods and metrics to consider

In [2]:
pwd = !pwd
pwd = pwd[0]

# Absolute path to default Hydra config for normalization script. Take a look at this script to see how normalization is parametrized.
config_path = '/'.join(pwd.split('/')[:-1]) + '/examples/configs/normalization/fit/default.yaml'

EVAL_MAN_PATH = pwd + '/polygraph_tacl_stablelm12b_wmt19.man'
TRAIN_MAN_PATH = pwd + '/polygraph_tacl_stablelm12b_wmt19_train.man'
UE_METHOD = 'MaximumSequenceProbability'

# A quality metric for model's outputs that is naturally bounded on [0, 1]
GEN_METRIC_NAME = 'Comet'

### Fit normalizers

In [3]:
# Download all managers to current directory
!wget -r --cut-dirs=2 -nH --no-parent -A 'polygraph_tacl_stablelm12b_wmt19*man' http://209.38.249.180:8000/polygraph_data/mans/

--2024-12-03 17:02:31--  http://209.38.249.180:8000/polygraph_data/mans/
Connecting to 209.38.249.180:8000... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4500 (4.4K) [text/html]
Saving to: ‘index.html.tmp’


2024-12-03 17:02:33 (4.19 KB/s) - ‘index.html.tmp’ saved [4500/4500]

Loading robots.txt; please ignore errors.
--2024-12-03 17:02:33--  http://209.38.249.180:8000/robots.txt
Connecting to 209.38.249.180:8000... connected.
HTTP request sent, awaiting response... 404 File not found
2024-12-03 17:02:33 ERROR 404: File not found.

Removing index.html.tmp since it should be rejected.

--2024-12-03 17:02:33--  http://209.38.249.180:8000/polygraph_data/mans/polygraph_tacl_stablelm12b_wmt19.man
Connecting to 209.38.249.180:8000... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2243332 (2.1M) [application/x-troff-man]
Saving to: ‘polygraph_tacl_stablelm12b_wmt19.man’


2024-12-03 17:03:16 (51.6 KB/s) - ‘polygraph_tacl_stablelm12b_wmt19.man’ saved [2

In [5]:
import os

def format_for_hydra(param):
    return f'\'["{param}"]\''

# Run polygraph_normalize to fit normalizer using train dataset.
# Format path to manager so that Hydra correctly recognizes it as override with list of paths.
os.system(f'HYDRA_CONFIG={config_path} polygraph_normalize save_path="./" man_paths={format_for_hydra(TRAIN_MAN_PATH)} gen_metric_names={format_for_hydra(GEN_METRIC_NAME)} ue_method_names={format_for_hydra(UE_METHOD)}')

[2024-12-03 17:03:57,716][lm_polygraph][INFO] - Initializing stat calculators...
[2024-12-03 17:03:57,717][lm_polygraph][INFO] - Initializing GreedyProbsCalculator
[2024-12-03 17:03:57,717][lm_polygraph][INFO] - Stat calculators: [<lm_polygraph.stat_calculators.greedy_probs.GreedyProbsCalculator object at 0x31c5f5000>]
[2024-12-03 17:03:57,717][lm_polygraph][INFO] - Done intitializing stat calculators...


0

### Normalize UE from test sets

In [6]:
import pickle
import pprint

# Load saved fitted normalizer.
with open('fitted_normalizers.pickle', 'rb') as f:
    fitted_normalizers = pickle.load(f)

In [7]:
from lm_polygraph.normalizers.isotonic_pcc import IsotonicPCCNormalizer

# Restore saved normalizer
normalizer = IsotonicPCCNormalizer.loads(fitted_normalizers[GEN_METRIC_NAME, UE_METHOD, 'isotonic_pcc'])

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from lm_polygraph.utils.manager import UEManager

test_man = UEManager.load(EVAL_MAN_PATH)

de_sentence = test_man.stats['input_texts'][0]
# Remove prompt
de_sentence = de_sentence.split('\n')[-3]

translation = test_man.stats['greedy_texts'][0]

ue = test_man.estimations[('sequence', 'MaximumSequenceProbability')][0]

In [10]:
calibrated_confidence = normalizer.transform([ue])[0]
print(de_sentence)
print('=' * 100)
print(translation)
print('=' * 100)
print('Confidence: ', calibrated_confidence)

Zwar werde es noch dauern, bis die bislang der Kirchengemeinde gehörende Fläche im Gemeindebesitz ist, doch gibt es jetzt kein Planungshindernis für das von einem privaten Investor zu stemmende Projekt mehr.
Although it will take some time before the land currently belonging to the church community is in the possession of the municipality, there is no longer any planning obstacle for the project to be financed by a private investor.
Confidence:  0.7774244338428103
