### Set up Prism

In [None]:
!git clone https://github.com/thompsonb/prism

In [None]:
!cd prism

In [None]:
!pip install -r '/content/prism/requirements.txt'

In [None]:
import sys
sys.path.append('/prism')

In [None]:
!wget http://data.statmt.org/prism/m39v1.tar

In [None]:
!tar xf m39v1.tar

In [None]:
# this setup of env variable works
import os
os.environ['MODEL_DIR'] = 'm39v1/'

In [None]:
from prism.prism import Prism

prism = Prism(model_dir=os.environ['MODEL_DIR'], lang='en')
print('Prism identifier:', prism.identifier())

In [None]:
cand = ['Hi world.', 'This is a Test.']
ref = ['Hello world.', 'This is a test.']
src = ['Bonjour le monde.', "C'est un test."]

print('System-level metric:', prism.score(cand=cand, ref=ref))
print('Segment-level metric:', prism.score(cand=cand, ref=ref, segment_scores=True))
print('System-level QE-as-metric:', prism.score(cand=cand, src=src))
print('Segment-level QE-as-metric:', prism.score(cand=cand, src=src, segment_scores=True))

In [None]:
# from prism.prism import Prism
# prism = Prism(model_dir=os.environ['MODEL_DIR'], lang='de')
# prism.identifier()

#### Load WMT15 data

In [None]:
import pandas as pd
import pickle

In [None]:
with open("/content/mt.txt") as f:
    hyp_snts = [line[:-1] for line in f] # remove last symbol which is '\n'

with open("/content/reference.txt") as f:
    ref_snts = [line[:-1] for line in f]

with open("/content/source.txt") as f:
    src_snts = [line[:-1] for line in f]

with open("/content/newstest2015.human.de-en") as f:
    human_scores = [float(line[:-1]) for line in f] 

### Load WMT21 data

In [None]:
with open('/content/all_ref_snts_21.pickle', 'rb') as fp:
    ref_snts = pickle.load(fp)

with open('/content/all_mt_snts_21.pickle', 'rb') as fp:
    hyps_snts = pickle.load(fp)

with open('/content/all_src_snts_21.pickle', 'rb') as fp:
    src_snts = pickle.load(fp)

In [None]:
len(ref_snts), len(hyp_snts), len(src_snts)

In [None]:
# Metrics: prism_ref, prism_src

ref_metric_scores = prism.score(cand=hyp_snts, ref=ref_snts, segment_scores=True)
# ref_metric_scores

In [None]:
src_metric_scores = prism.score(cand=hyp_snts, src=src_snts, segment_scores=True)
# src_metric_scores

In [None]:
prism_scores = pd.DataFrame({'prism_ref': ref_metric_scores, 'prism_src': src_metric_scores})
prism_scores

In [None]:
with open('prism_scores_wmt15_long_form.pickle', 'wb') as f:
    pickle.dump(prism_scores, f, pickle.HIGHEST_PROTOCOL)

In [None]:
ref = "I never wrote this article, I just edited it."
hyp0 = "I never wrote this article, I just edited it."
hyp1 = "It is not my article, I just edited it."
hyp2 = "I never wrote this article, I never edited it."

prism_ref_example = [-0.159257, -1.621714, -1.331593]

print(f'Reference:\t{ref}')
print(f'Hypothesis 0:\t{hyp0}')
print(f'Hypothesis 1:\t{hyp1}')
print(f'Hypothesis 2:\t{hyp2}\n')

print(f'Prism score for hyp0:\t{prism_ref_example[0]}')
print(f'Prism score for hyp1:\t{prism_ref_example[1]}')
print(f'Prism score for hyp2:\t{prism_ref_example[2]}')

[-0.15925716, -1.621714, -1.3315935]

In [None]:
prism_scores['human'] = human_scores
prism_scores = prism_scores[['human', 'prism_ref',	'prism_src']]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(15, 1)})
sns.set(font_scale=1.9)

m = prism_scores.corr().round(4)[:1]

fig = sns.heatmap(m, annot=True, vmax=1, vmin=-1, center=0, cmap='vlag', annot_kws={"size": 16},
                 cbar_kws={'fraction' : 0.1}, linewidth=4)

fig.set_xticklabels(fig.get_xticklabels(), rotation=35, horizontalalignment='right')
fig.set_yticklabels(fig.get_yticklabels(), rotation=0, horizontalalignment='right')
plt.show()