In [1]:
from nenequitia.models import AttentionalModule

model = AttentionalModule.load_from_checkpoint(
    "/home/thibault/dev/Medieval-Model/explogs/AttentionalModule/0/checkpoints/sample-epoch=21.ckpt"
)
model.eval()

AttentionalModule(
  (_emb): Sequential(
    (0): Embedding(148, 128)
    (1): Dropout(p=0.1, inplace=False)
  )
  (_rnn): LSTM(128, 256, batch_first=True, bidirectional=True)
  (_rnn_dropout): Dropout(p=0.1, inplace=False)
  (_rnn_dense): Linear(in_features=512, out_features=512, bias=False)
  (_lin): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=512, out_features=4, bias=True)
  )
)

In [2]:
from pandas import read_hdf

df = read_hdf("texts.hdf5", key="df", index_col=0)
df = df[df.manuscript == "SBB_PK_Hdschr25"]
df["bin"] = ""
df.loc[df.CER < 10, "bin"] = "Good"
df.loc[df.CER.between(10, 20, inclusive="left"), "bin"] = "Acceptable"
df.loc[df.CER.between(20, 50, inclusive="left"), "bin"] = "Bad"
df.loc[df.CER >= 50, "bin"] = "Very bad"
df.tail()

Unnamed: 0,transcription,model,manuscript,page_id,line_id,CER,lang,bin
71436,nisam a labus iquis:ta,data-cremma-medieval_0.mlmodel,SBB_PK_Hdschr25,7,8,37.037036,lat,Bad
166,m,data-lat_only_3.mlmodel,SBB_PK_Hdschr25,12,2,71.428573,lat,Very bad
65002,uisti.eEt qloriamiu oims,data-bad_1.mlmodel,SBB_PK_Hdschr25,16,6,40.74074,lat,Bad
22371,bredicaut tuinairegentẽ,data-cremma-medieval_2.mlmodel,SBB_PK_Hdschr25,14,6,23.076923,lat,Bad
196,⁊ p p a,data-lat_only_3.mlmodel,SBB_PK_Hdschr25,14,5,85.714287,lat,Very bad


In [13]:
from nenequitia.metrics import computer_cer
from collections import Counter
from unicodedata import combining, normalize
from IPython.core.display import display, HTML

pages = []
lines = []
labels = ['Good', 'Acceptable', 'Bad', 'Very bad']
for (local_model, m, p), group in df.groupby(["model", "manuscript", "page_id"]):
    page = group.sort_values("line_id")
    gt = df[(df.model == "GT") & (df.manuscript == m) & (df.page_id == p)]
    page = "".join(group.transcription.tolist())
    gt = "".join(gt.transcription.tolist())
    cer = min(computer_cer(page, gt)*100, 100)
    print(f"{p} for model `{local_model}` CER is {cer}%")
    preds, attention = model(*model.encoder.pad_pred([
        model.encoder.encode_string(string, lang="lat")
        for string in group.transcription.tolist()
    ]))
    preds = preds.argmax(dim=-1).tolist()
    #print(model.encoder.ys)
    pages.append({"cer": cer, "counter": Counter([model.encoder.ys[pred] for pred in preds])})
    for line_attention, line, line_pred in zip(attention.tolist(), group.transcription.tolist(), preds):
        max_attention = max(line_attention)
        colors = [int(255*(att/max_attention)) for att in line_attention]
        if True:
            display(HTML(
                f"<b>Prediction: {model.encoder.ys[line_pred]}</b> " + "".join([
                   f'<span style="color:rgb({color}, 0, 0);" title="{char_att:.2f}">{char}</span>'
                    for color, char_att, char in zip(
                        colors,
                        line_attention,
                        (
                            ["[BOS] ", "[LANG] "] + 
                            [(normalize("NFC", "◌"+char) if combining(char) != 0 else char)for char in line]+
                            [" [EOS] "]
                        )
                    )
                ])
            ))


            

0 for model `GT` CER is 0.0%


  from IPython.core.display import display, HTML


1 for model `GT` CER is 0.0%


2 for model `GT` CER is 0.0%


3 for model `GT` CER is 0.0%


4 for model `GT` CER is 0.0%


5 for model `GT` CER is 0.0%


6 for model `GT` CER is 0.0%


7 for model `GT` CER is 0.0%


8 for model `GT` CER is 0.0%


9 for model `GT` CER is 0.0%


10 for model `GT` CER is 0.0%


11 for model `GT` CER is 0.0%


12 for model `GT` CER is 0.0%


13 for model `GT` CER is 0.0%


14 for model `GT` CER is 0.0%


15 for model `GT` CER is 0.0%


16 for model `GT` CER is 0.0%


0 for model `data-CREMMA-Medieval.mlmodel` CER is 70.3812301158905%


1 for model `data-CREMMA-Medieval.mlmodel` CER is 60.38251519203186%


2 for model `data-CREMMA-Medieval.mlmodel` CER is 71.98275923728943%


3 for model `data-CREMMA-Medieval.mlmodel` CER is 56.647396087646484%


4 for model `data-CREMMA-Medieval.mlmodel` CER is 67.67241358757019%


5 for model `data-CREMMA-Medieval.mlmodel` CER is 70.91412544250488%


6 for model `data-CREMMA-Medieval.mlmodel` CER is 63.93442749977112%


7 for model `data-CREMMA-Medieval.mlmodel` CER is 72.2857117652893%


8 for model `data-CREMMA-Medieval.mlmodel` CER is 68.09815764427185%


9 for model `data-CREMMA-Medieval.mlmodel` CER is 53.721684217453%


10 for model `data-CREMMA-Medieval.mlmodel` CER is 63.56382966041565%


11 for model `data-CREMMA-Medieval.mlmodel` CER is 65.11628031730652%


12 for model `data-CREMMA-Medieval.mlmodel` CER is 71.75925970077515%


13 for model `data-CREMMA-Medieval.mlmodel` CER is 68.94409656524658%


14 for model `data-CREMMA-Medieval.mlmodel` CER is 70.46070694923401%


15 for model `data-CREMMA-Medieval.mlmodel` CER is 75.47169923782349%


16 for model `data-CREMMA-Medieval.mlmodel` CER is 70.96773982048035%


0 for model `data-bad_0.mlmodel` CER is 73.90029430389404%


1 for model `data-bad_0.mlmodel` CER is 73.49726557731628%


2 for model `data-bad_0.mlmodel` CER is 69.3965494632721%


3 for model `data-bad_0.mlmodel` CER is 71.67630195617676%


4 for model `data-bad_0.mlmodel` CER is 70.68965435028076%


5 for model `data-bad_0.mlmodel` CER is 70.63711881637573%


6 for model `data-bad_0.mlmodel` CER is 69.3989098072052%


7 for model `data-bad_0.mlmodel` CER is 74.00000095367432%


8 for model `data-bad_0.mlmodel` CER is 74.84662532806396%


9 for model `data-bad_0.mlmodel` CER is 73.13916087150574%


10 for model `data-bad_0.mlmodel` CER is 71.54255509376526%


11 for model `data-bad_0.mlmodel` CER is 69.18604373931885%


12 for model `data-bad_0.mlmodel` CER is 73.61111044883728%


13 for model `data-bad_0.mlmodel` CER is 72.36024737358093%


14 for model `data-bad_0.mlmodel` CER is 72.899729013443%


15 for model `data-bad_0.mlmodel` CER is 73.27044010162354%


16 for model `data-bad_0.mlmodel` CER is 72.25806713104248%


0 for model `data-bad_1.mlmodel` CER is 73.60703945159912%


1 for model `data-bad_1.mlmodel` CER is 74.04371500015259%


2 for model `data-bad_1.mlmodel` CER is 67.67241358757019%


3 for model `data-bad_1.mlmodel` CER is 73.98843765258789%


4 for model `data-bad_1.mlmodel` CER is 69.82758641242981%


5 for model `data-bad_1.mlmodel` CER is 72.29917049407959%


6 for model `data-bad_1.mlmodel` CER is 70.76502442359924%


7 for model `data-bad_1.mlmodel` CER is 75.14285445213318%


8 for model `data-bad_1.mlmodel` CER is 76.99386477470398%


9 for model `data-bad_1.mlmodel` CER is 74.75728392601013%


10 for model `data-bad_1.mlmodel` CER is 71.27659320831299%


11 for model `data-bad_1.mlmodel` CER is 77.32558250427246%


12 for model `data-bad_1.mlmodel` CER is 72.22222089767456%


13 for model `data-bad_1.mlmodel` CER is 73.29192757606506%


14 for model `data-bad_1.mlmodel` CER is 69.6476936340332%


15 for model `data-bad_1.mlmodel` CER is 71.06918096542358%


16 for model `data-bad_1.mlmodel` CER is 77.4193525314331%


0 for model `data-bad_2.mlmodel` CER is 75.9530782699585%


1 for model `data-bad_2.mlmodel` CER is 70.76502442359924%


2 for model `data-bad_2.mlmodel` CER is 68.1034505367279%


3 for model `data-bad_2.mlmodel` CER is 70.52023410797119%


4 for model `data-bad_2.mlmodel` CER is 68.53448152542114%


5 for model `data-bad_2.mlmodel` CER is 72.02215790748596%


6 for model `data-bad_2.mlmodel` CER is 71.58470153808594%


7 for model `data-bad_2.mlmodel` CER is 71.71428799629211%


8 for model `data-bad_2.mlmodel` CER is 72.69938588142395%


9 for model `data-bad_2.mlmodel` CER is 74.11003112792969%


10 for model `data-bad_2.mlmodel` CER is 75.53191781044006%


11 for model `data-bad_2.mlmodel` CER is 69.4767415523529%


12 for model `data-bad_2.mlmodel` CER is 79.62962985038757%


13 for model `data-bad_2.mlmodel` CER is 71.7391312122345%


14 for model `data-bad_2.mlmodel` CER is 75.33875107765198%


15 for model `data-bad_2.mlmodel` CER is 71.38364911079407%


16 for model `data-bad_2.mlmodel` CER is 75.1612901687622%


0 for model `data-bad_3.mlmodel` CER is 67.15542674064636%


1 for model `data-bad_3.mlmodel` CER is 72.40437269210815%


2 for model `data-bad_3.mlmodel` CER is 61.63793206214905%


3 for model `data-bad_3.mlmodel` CER is 72.25433588027954%


4 for model `data-bad_3.mlmodel` CER is 68.1034505367279%


5 for model `data-bad_3.mlmodel` CER is 68.42105388641357%


6 for model `data-bad_3.mlmodel` CER is 70.49180269241333%


7 for model `data-bad_3.mlmodel` CER is 75.99999904632568%


8 for model `data-bad_3.mlmodel` CER is 69.01840567588806%


9 for model `data-bad_3.mlmodel` CER is 70.8737850189209%


10 for model `data-bad_3.mlmodel` CER is 72.07446694374084%


11 for model `data-bad_3.mlmodel` CER is 66.27907156944275%


12 for model `data-bad_3.mlmodel` CER is 75.0%


13 for model `data-bad_3.mlmodel` CER is 72.98136353492737%


14 for model `data-bad_3.mlmodel` CER is 73.44173192977905%


15 for model `data-bad_3.mlmodel` CER is 71.69811129570007%


16 for model `data-bad_3.mlmodel` CER is 74.8387098312378%


0 for model `data-cremma-medieval_0.mlmodel` CER is 68.03519129753113%


1 for model `data-cremma-medieval_0.mlmodel` CER is 71.58470153808594%


2 for model `data-cremma-medieval_0.mlmodel` CER is 65.94827771186829%


3 for model `data-cremma-medieval_0.mlmodel` CER is 74.56647157669067%


4 for model `data-cremma-medieval_0.mlmodel` CER is 69.3965494632721%


5 for model `data-cremma-medieval_0.mlmodel` CER is 73.13019633293152%


6 for model `data-cremma-medieval_0.mlmodel` CER is 68.30601096153259%


7 for model `data-cremma-medieval_0.mlmodel` CER is 70.85714340209961%


8 for model `data-cremma-medieval_0.mlmodel` CER is 62.88343667984009%


9 for model `data-cremma-medieval_0.mlmodel` CER is 57.281553745269775%


10 for model `data-cremma-medieval_0.mlmodel` CER is 75.2659559249878%


11 for model `data-cremma-medieval_0.mlmodel` CER is 74.41860437393188%


12 for model `data-cremma-medieval_0.mlmodel` CER is 73.14814925193787%


13 for model `data-cremma-medieval_0.mlmodel` CER is 73.60248565673828%


14 for model `data-cremma-medieval_0.mlmodel` CER is 75.88075995445251%


15 for model `data-cremma-medieval_0.mlmodel` CER is 71.06918096542358%


16 for model `data-cremma-medieval_0.mlmodel` CER is 62.90322542190552%


0 for model `data-cremma-medieval_1.mlmodel` CER is 63.04985284805298%


1 for model `data-cremma-medieval_1.mlmodel` CER is 68.5792326927185%


2 for model `data-cremma-medieval_1.mlmodel` CER is 73.2758641242981%


3 for model `data-cremma-medieval_1.mlmodel` CER is 68.78612637519836%


4 for model `data-cremma-medieval_1.mlmodel` CER is 65.94827771186829%


5 for model `data-cremma-medieval_1.mlmodel` CER is 70.63711881637573%


6 for model `data-cremma-medieval_1.mlmodel` CER is 65.30054807662964%


7 for model `data-cremma-medieval_1.mlmodel` CER is 69.1428542137146%


8 for model `data-cremma-medieval_1.mlmodel` CER is 77.91411280632019%


9 for model `data-cremma-medieval_1.mlmodel` CER is 61.812299489974976%


10 for model `data-cremma-medieval_1.mlmodel` CER is 72.8723406791687%


11 for model `data-cremma-medieval_1.mlmodel` CER is 67.73256063461304%


12 for model `data-cremma-medieval_1.mlmodel` CER is 66.66666865348816%


13 for model `data-cremma-medieval_1.mlmodel` CER is 73.9130437374115%


14 for model `data-cremma-medieval_1.mlmodel` CER is 73.71273636817932%


15 for model `data-cremma-medieval_1.mlmodel` CER is 81.13207817077637%


16 for model `data-cremma-medieval_1.mlmodel` CER is 67.7419364452362%


0 for model `data-cremma-medieval_2.mlmodel` CER is 65.98240733146667%


1 for model `data-cremma-medieval_2.mlmodel` CER is 70.76502442359924%


2 for model `data-cremma-medieval_2.mlmodel` CER is 66.37930870056152%


3 for model `data-cremma-medieval_2.mlmodel` CER is 71.96531891822815%


4 for model `data-cremma-medieval_2.mlmodel` CER is 71.12069129943848%


5 for model `data-cremma-medieval_2.mlmodel` CER is 64.54293727874756%


6 for model `data-cremma-medieval_2.mlmodel` CER is 74.8633861541748%


7 for model `data-cremma-medieval_2.mlmodel` CER is 77.42857336997986%


8 for model `data-cremma-medieval_2.mlmodel` CER is 69.32515501976013%


9 for model `data-cremma-medieval_2.mlmodel` CER is 59.54692363739014%


10 for model `data-cremma-medieval_2.mlmodel` CER is 69.41489577293396%


11 for model `data-cremma-medieval_2.mlmodel` CER is 75.58139562606812%


12 for model `data-cremma-medieval_2.mlmodel` CER is 75.46296119689941%


13 for model `data-cremma-medieval_2.mlmodel` CER is 71.7391312122345%


14 for model `data-cremma-medieval_2.mlmodel` CER is 69.37669515609741%


15 for model `data-cremma-medieval_2.mlmodel` CER is 75.47169923782349%


16 for model `data-cremma-medieval_2.mlmodel` CER is 72.5806474685669%


0 for model `data-lat_only_1.mlmodel` CER is 86.51026487350464%


1 for model `data-lat_only_1.mlmodel` CER is 86.61202192306519%


2 for model `data-lat_only_1.mlmodel` CER is 88.79310488700867%


3 for model `data-lat_only_1.mlmodel` CER is 84.9711000919342%


4 for model `data-lat_only_1.mlmodel` CER is 85.34482717514038%


5 for model `data-lat_only_1.mlmodel` CER is 86.1495852470398%


6 for model `data-lat_only_1.mlmodel` CER is 86.06557250022888%


7 for model `data-lat_only_1.mlmodel` CER is 86.28571629524231%


8 for model `data-lat_only_1.mlmodel` CER is 88.03681135177612%


9 for model `data-lat_only_1.mlmodel` CER is 88.99676203727722%


10 for model `data-lat_only_1.mlmodel` CER is 86.17021441459656%


11 for model `data-lat_only_1.mlmodel` CER is 85.75581312179565%


12 for model `data-lat_only_1.mlmodel` CER is 88.88888955116272%


13 for model `data-lat_only_1.mlmodel` CER is 87.57764101028442%


14 for model `data-lat_only_1.mlmodel` CER is 85.36585569381714%


15 for model `data-lat_only_1.mlmodel` CER is 86.79245114326477%


16 for model `data-lat_only_1.mlmodel` CER is 89.03225660324097%


0 for model `data-lat_only_2.mlmodel` CER is 81.23167157173157%


1 for model `data-lat_only_2.mlmodel` CER is 80.6010901927948%


2 for model `data-lat_only_2.mlmodel` CER is 81.46551847457886%


3 for model `data-lat_only_2.mlmodel` CER is 80.05780577659607%


4 for model `data-lat_only_2.mlmodel` CER is 80.6034505367279%


5 for model `data-lat_only_2.mlmodel` CER is 78.39335203170776%


6 for model `data-lat_only_2.mlmodel` CER is 77.86885499954224%


7 for model `data-lat_only_2.mlmodel` CER is 79.42857146263123%


8 for model `data-lat_only_2.mlmodel` CER is 79.75460290908813%


9 for model `data-lat_only_2.mlmodel` CER is 80.25889992713928%


10 for model `data-lat_only_2.mlmodel` CER is 80.31914830207825%


11 for model `data-lat_only_2.mlmodel` CER is 75.87209343910217%


12 for model `data-lat_only_2.mlmodel` CER is 81.4814805984497%


13 for model `data-lat_only_2.mlmodel` CER is 79.50310707092285%


14 for model `data-lat_only_2.mlmodel` CER is 78.04877758026123%


15 for model `data-lat_only_2.mlmodel` CER is 81.13207817077637%


16 for model `data-lat_only_2.mlmodel` CER is 80.0000011920929%


0 for model `data-lat_only_3.mlmodel` CER is 76.53958797454834%


1 for model `data-lat_only_3.mlmodel` CER is 77.32240557670593%


2 for model `data-lat_only_3.mlmodel` CER is 78.44827771186829%


3 for model `data-lat_only_3.mlmodel` CER is 74.85548853874207%


4 for model `data-lat_only_3.mlmodel` CER is 77.58620977401733%


5 for model `data-lat_only_3.mlmodel` CER is 75.34626126289368%


6 for model `data-lat_only_3.mlmodel` CER is 77.04917788505554%


7 for model `data-lat_only_3.mlmodel` CER is 76.85714364051819%


8 for model `data-lat_only_3.mlmodel` CER is 77.60736346244812%


9 for model `data-lat_only_3.mlmodel` CER is 75.4045307636261%


10 for model `data-lat_only_3.mlmodel` CER is 76.8617033958435%


11 for model `data-lat_only_3.mlmodel` CER is 76.16279125213623%


12 for model `data-lat_only_3.mlmodel` CER is 77.31481194496155%


13 for model `data-lat_only_3.mlmodel` CER is 75.46584010124207%


14 for model `data-lat_only_3.mlmodel` CER is 74.79674816131592%


15 for model `data-lat_only_3.mlmodel` CER is 77.67295837402344%


16 for model `data-lat_only_3.mlmodel` CER is 78.38709950447083%


0 for model `data-lat_only_4.mlmodel` CER is 74.19354915618896%


1 for model `data-lat_only_4.mlmodel` CER is 75.95628499984741%


2 for model `data-lat_only_4.mlmodel` CER is 77.15517282485962%


3 for model `data-lat_only_4.mlmodel` CER is 73.98843765258789%


4 for model `data-lat_only_4.mlmodel` CER is 75.0%


5 for model `data-lat_only_4.mlmodel` CER is 72.85318374633789%


6 for model `data-lat_only_4.mlmodel` CER is 75.40983557701111%


7 for model `data-lat_only_4.mlmodel` CER is 74.28571581840515%


8 for model `data-lat_only_4.mlmodel` CER is 76.68711543083191%


9 for model `data-lat_only_4.mlmodel` CER is 77.0226538181305%


10 for model `data-lat_only_4.mlmodel` CER is 77.1276593208313%


11 for model `data-lat_only_4.mlmodel` CER is 76.16279125213623%


12 for model `data-lat_only_4.mlmodel` CER is 75.46296119689941%


13 for model `data-lat_only_4.mlmodel` CER is 73.60248565673828%


14 for model `data-lat_only_4.mlmodel` CER is 74.79674816131592%


15 for model `data-lat_only_4.mlmodel` CER is 74.21383857727051%


16 for model `data-lat_only_4.mlmodel` CER is 74.19354915618896%


0 for model `data-lat_only_5.mlmodel` CER is 75.9530782699585%


1 for model `data-lat_only_5.mlmodel` CER is 72.40437269210815%


2 for model `data-lat_only_5.mlmodel` CER is 75.0%


3 for model `data-lat_only_5.mlmodel` CER is 75.43352842330933%


4 for model `data-lat_only_5.mlmodel` CER is 75.43103694915771%


5 for model `data-lat_only_5.mlmodel` CER is 73.13019633293152%


6 for model `data-lat_only_5.mlmodel` CER is 75.40983557701111%


7 for model `data-lat_only_5.mlmodel` CER is 73.14285635948181%


8 for model `data-lat_only_5.mlmodel` CER is 73.92638325691223%


9 for model `data-lat_only_5.mlmodel` CER is 77.0226538181305%


10 for model `data-lat_only_5.mlmodel` CER is 76.06382966041565%


11 for model `data-lat_only_5.mlmodel` CER is 76.45348906517029%


12 for model `data-lat_only_5.mlmodel` CER is 78.24074029922485%


13 for model `data-lat_only_5.mlmodel` CER is 73.9130437374115%


14 for model `data-lat_only_5.mlmodel` CER is 75.60975551605225%


15 for model `data-lat_only_5.mlmodel` CER is 76.41509175300598%


16 for model `data-lat_only_5.mlmodel` CER is 77.09677219390869%


0 for model `data-manumffrench2.mlmodel` CER is 74.48680400848389%


1 for model `data-manumffrench2.mlmodel` CER is 67.75956153869629%


2 for model `data-manumffrench2.mlmodel` CER is 75.0%


3 for model `data-manumffrench2.mlmodel` CER is 78.03468108177185%


4 for model `data-manumffrench2.mlmodel` CER is 76.29310488700867%


5 for model `data-manumffrench2.mlmodel` CER is 73.68420958518982%


6 for model `data-manumffrench2.mlmodel` CER is 71.85792326927185%


7 for model `data-manumffrench2.mlmodel` CER is 75.14285445213318%


8 for model `data-manumffrench2.mlmodel` CER is 72.69938588142395%


9 for model `data-manumffrench2.mlmodel` CER is 72.16828465461731%


10 for model `data-manumffrench2.mlmodel` CER is 73.67021441459656%


11 for model `data-manumffrench2.mlmodel` CER is 74.70930218696594%


12 for model `data-manumffrench2.mlmodel` CER is 77.31481194496155%


13 for model `data-manumffrench2.mlmodel` CER is 67.70186424255371%


14 for model `data-manumffrench2.mlmodel` CER is 77.2357702255249%


15 for model `data-manumffrench2.mlmodel` CER is 73.27044010162354%


16 for model `data-manumffrench2.mlmodel` CER is 69.35483813285828%


[('Very bad', 10)]


In [None]:

print(pages[-1]["counter"].most_common(1))