In [1]:
%load_ext autoreload
%autoreload now

In [2]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "MIG-08137aa2-e69b-5e74-8390-7997329b1336"
# os.environ["WORLD_SIZE"] = "1"

# Download and convert data

In [3]:
from tqdm import tqdm

from document_segmentation.pagexml.generale_missiven import GeneraleMissiven
from document_segmentation.settings import (
    GENERALE_MISSIVEN_DOCUMENT_DIR,
    GENERALE_MISSIVEN_SHEET,
)

N = None

GENERALE_MISSIVEN_DOCUMENT_DIR.mkdir(parents=True, exist_ok=True)

sheet = GeneraleMissiven(GENERALE_MISSIVEN_SHEET)

existing_docs = {
    path.stem
    for path in GENERALE_MISSIVEN_DOCUMENT_DIR.glob("*.json")
    if path.is_file()
}

for document in tqdm(
    sheet.to_documents(n=N, skip_ids=existing_docs),
    total=(N or len(sheet)) - len(existing_docs),
    desc="Writing documents",
    unit="doc",
):
    document_file = GENERALE_MISSIVEN_DOCUMENT_DIR / f"{document.id}.json"

    with document_file.open("xt") as f:
        f.write(document.model_dump_json())
        f.write("\n")

Writing documents:   0%|          | 0/5 [00:00<?, ?doc/s]

Skipping row with inventory number 1171 due to status message: 'Niet gedigitaliseerd.'
Skipping row with inventory number 2770 due to status message: 'Niet gedigitaliseerd.'
Skipping row with inventory number 2770 due to status message: 'Niet gedigitaliseerd.'
Skipping row with inventory number 2770 due to status message: 'Niet gedigitaliseerd.'
Skipping row with inventory number 2911 due to status message: 'Niet gedigitaliseerd.'





# Load Data

In [4]:
%autoreload now

In [5]:
TRAINING_DATA = 0.8

In [6]:
from document_segmentation.model.dataset import PageDataset
from document_segmentation.settings import MIN_REGION_TEXT_LENGTH

dataset = PageDataset.from_dir(
    GENERALE_MISSIVEN_DOCUMENT_DIR, n=100
).remove_short_regions(MIN_REGION_TEXT_LENGTH)
len(dataset)

Reading JSON files: 100%|██████████| 100/100 [00:07<00:00, 13.43file/s]


20750

In [7]:
dataset._class_counts()

Counter({<Label.IN: 2>: 20552, <Label.BEGIN: 1>: 100, <Label.END: 3>: 98})

In [8]:
dataset.class_weights()

[205.44554455445544, 1.0095849754293778, 209.59595959595958, 20750.0]

In [9]:
dataset[5000]

Page(label=<Label.IN: 2>, regions=[Region(id='region_9df849b1-56aa-48d5-b056-961b95080a78_1', types=(<RegionType.PARAGRAPH: 'paragraph'>, <RegionType.PHYSICAL_STRUCTURE_DOC: 'physical_structure_doc'>, <RegionType.TEXT_REGION: 'text_region'>, <RegionType.PAGEXML_DOC: 'pagexml_doc'>), coordinates=((927, 671), (946, 693), (949, 715), (1000, 725), (1044, 747), (1060, 766), (1047, 835), (990, 867), (930, 921), (937, 1054), (930, 1180), (943, 1221), (889, 1395), (892, 1417), (914, 1458), (895, 1528), (908, 1658), (902, 1750), (905, 1892), (883, 1974), (886, 2015), (876, 2053), (883, 2085), (867, 2155), (823, 2237), (788, 2265), (791, 2306), (807, 2329), (807, 2360), (845, 2385), (851, 2408), (848, 2461), (927, 2544), (921, 2563), (902, 2572), (873, 2604), (851, 2673), (854, 2705), (873, 2733), (876, 2851), (899, 2961), (902, 3123), (918, 3167), (918, 3192), (864, 3293), (870, 3331), (911, 3357), (952, 3366), (1206, 3366), (1338, 3379), (1598, 3376), (1772, 3385), (1971, 3376), (2091, 3392), 

In [10]:
split = int(len(dataset) * TRAINING_DATA)

training_data = dataset[:split]
training_data._class_counts()

Counter({<Label.IN: 2>: 16456, <Label.BEGIN: 1>: 73, <Label.END: 3>: 71})

In [11]:
test_data = dataset[split:]
test_data._class_counts()

Counter({<Label.IN: 2>: 4096, <Label.END: 3>: 27, <Label.BEGIN: 1>: 27})

# Train Model

In [12]:
import torch

BATCH_SIZE = 16
EPOCHS = 3
WEIGHTS = torch.Tensor(dataset.class_weights())  # For an imbalanced dataset

In [43]:
%autoreload now

In [44]:
from document_segmentation.model.page_sequence_tagger import PageSequenceTagger

tagger = PageSequenceTagger()

In [45]:
tagger._device

'mps'

In [46]:
tagger

PageSequenceTagger(
  (_page_embedding): PageEmbedding(
    (_region_model): RegionEmbedding(
      (_transformer_model): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(42774, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): Linear(in_features=768, out_features=768, bias=True)
                  (key): Linear(in_features=768, out_features=768, bias=True)
                  (value): Linear(in_features=768, out_features=768, bias=True)
                  (dropout): Dropout(p=0.1, inplace=False)
     

In [47]:
tagger.train_(training_data, EPOCHS, BATCH_SIZE, WEIGHTS)

  0%|          | 0/1037.5 [00:01<?, ?batch/s]


RuntimeError: Given groups=1, weight of size [1, 100, 3, 3], expected input[1, 16, 4, 4] to have 100 channels, but got 16 channels instead

# Evaluate Model

In [None]:
import csv
import sys

from torcheval.metrics import (
    MulticlassAccuracy,
    MulticlassF1Score,
    MulticlassPrecision,
    MulticlassRecall,
)
from tqdm import tqdm

from document_segmentation.pagexml.datamodel.label import Label

writer = csv.DictWriter(
    sys.stdout,
    fieldnames=("Predicted", "Actual", "Page ID", "Text", "Scores"),
    delimiter="\t",
)

writer.writeheader()

accuracy = MulticlassAccuracy(num_classes=len(Label))
precision = MulticlassPrecision(average=None, num_classes=len(Label))
recall = MulticlassRecall(average=None, num_classes=len(Label))
f1_score = MulticlassF1Score(average=None, num_classes=len(Label))

for batch in tqdm(
    test_data.batches(BATCH_SIZE), total=len(test_data) / BATCH_SIZE, unit="batch"
):
    predicted = tagger(batch)
    labels = batch.labels()

    _labels = torch.Tensor([label.value - 1 for label in labels]).to(int)
    accuracy.update(predicted, _labels)
    precision.update(predicted, _labels)
    recall.update(predicted, _labels)
    f1_score.update(predicted, _labels)

    for page, pred, label in zip(batch.pages, predicted, labels):
        pred_label = Label(pred.argmax().item() + 1)
        if pred_label != Label.IN or label != Label.IN:
            writer.writerow(
                {
                    "Predicted": pred_label.name,
                    "Actual": label.name,
                    "Page ID": page.doc_id,
                    "Text": page.text(delimiter="; ")[:50],
                    "Scores": str(pred.tolist()),
                }
            )

Predicted	Actual	Page ID	Text	Scores


  2%|▏         | 3/129.6875 [00:00<00:05, 22.72batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_0317.jpg	zouden doen dragen, dat geene dubbelde; dukatons z	[0.9711317420005798, 0.028105031698942184, 0.00037071146653033793, 0.00039246713276952505]
BEGIN	IN	NL-HaNA_1.04.02_3533_0318.jpg	aanbesteede; 'sComp„s reparatie tot Th: 100:; mede	[0.6227352619171143, 0.37679725885391235, 0.00024215613666456193, 0.00022533860465046018]
BEGIN	IN	NL-HaNA_1.04.02_3533_0349.jpg	worden gelijd, dat die frifatie/ bedongen inkoop; 	[0.9723415374755859, 0.026956947520375252, 0.00034019837039522827, 0.00036138645373284817]
BEGIN	IN	NL-HaNA_1.04.02_3533_0350.jpg	begunstigde Britsche vrienden van ons; afkeerig is	[0.598449170589447, 0.40105879306793213, 0.00025473529240116477, 0.00023724768834654242]
BEGIN	IN	NL-HaNA_1.04.02_3533_0381.jpg	„meld onder aanhaaling, dat de noodsaake„; lijkhei	[0.9586215019226074, 0.04012445732951164, 0.0006167643587104976, 0.0006373219075612724]
BEGIN	IN	NL-HaNA_1.04.02_3533_0382.jpg	dat de Comt. e en de Colonie ginter volgens der; m	[0.69552469

  7%|▋         | 9/129.6875 [00:00<00:05, 23.66batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_0509.jpg	Dat particuliere Engelsen alleen met; sal peter na	[0.9645834565162659, 0.034492310136556625, 0.00045255740405991673, 0.00047164104762487113]
BEGIN	IN	NL-HaNA_1.04.02_3533_0510.jpg	7dd; „o of ƒ 13. 17 2/5. , dan bleek het schie„; l	[0.556537389755249, 0.44290244579315186, 0.0002930821792688221, 0.0002670731919351965]
BEGIN	IN	NL-HaNA_1.04.02_3533_0541.jpg	van'teen en ander word verder gehandeld in; de bri	[0.9697939157485962, 0.029392404481768608, 0.00039518537232652307, 0.0004184711433481425]
BEGIN	IN	NL-HaNA_1.04.02_3533_0542.jpg	missive van den 8:' october 1777. § 300:/ En; ton 	[0.5456979274749756, 0.453619122505188, 0.00035544708953239024, 0.000327510351780802]
BEGIN	IN	NL-HaNA_1.04.02_3533_0573.jpg	Een somma van 800 ropijen sijnde ƒ 1000. boven; de	[0.9778214693069458, 0.02146643027663231, 0.0003412238438613713, 0.00037098146276548505]
BEGIN	IN	NL-HaNA_1.04.02_3533_0574.jpg	elkander te krijgen was, dienen moest voor; het fo	[0.57876169681549

  9%|▉         | 12/129.6875 [00:00<00:04, 24.83batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_0637.jpg	was gereverteerd.; uit kunnen aan ons gerigten bri	[0.9554650783538818, 0.043025724589824677, 0.0007408924866467714, 0.0007683063158765435]
BEGIN	IN	NL-HaNA_1.04.02_3533_0638.jpg	30:e aug:s naar Jaggernaike poerom hadden; gesonde	[0.5908240079879761, 0.4083344638347626, 0.00043875601841136813, 0.00040269046439789236]
BEGIN	IN	NL-HaNA_1.04.02_3533_0669.jpg	oorlog tussen Engeland en V rankeryk ont„; stond, 	[0.9506711363792419, 0.04754258319735527, 0.0008773644804023206, 0.0009088683873414993]
BEGIN	IN	NL-HaNA_1.04.02_3533_0670.jpg	bragt dat een bhaarkoper maar op 88 à 91½; N. N: P	[0.6131911277770996, 0.38599371910095215, 0.0004243984294589609, 0.0003907752688974142]
BEGIN	IN	NL-HaNA_1.04.02_3533_0701.jpg	sij vermeenen dierhalven ook dat't; 578; weder in 	[0.9417372345924377, 0.055552516132593155, 0.0013279347913339734, 0.0013822790933772922]
BEGIN	IN	NL-HaNA_1.04.02_3533_0702.jpg	vopgen, tegens Een halv procento interess; smaands	[0.64326941967010

 12%|█▏        | 15/129.6875 [00:00<00:05, 21.19batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_0765.jpg	Indien het schip than van; Bengale op Ceilon aangi	[0.9911278486251831, 0.008677499368786812, 9.08474758034572e-05, 0.00010382452455814928]
BEGIN	IN	NL-HaNA_1.04.02_3533_0766.jpg	op dat het zoude sijn geweest die Caneel vroeg:; t	[0.7399224638938904, 0.2598026990890503, 0.00013959332136437297, 0.00013519941421691328]
BEGIN	IN	NL-HaNA_1.04.02_3533_0797.jpg	12:e november aan ons order versogt hebbende; of z	[0.976003110408783, 0.02348206751048565, 0.00024965047487057745, 0.00026518694357946515]
BEGIN	IN	NL-HaNA_1.04.02_3533_0798.jpg	aan den Politicquen Raad sal moeten addres„; seere	[0.6484968662261963, 0.3511802554130554, 0.0001672081125434488, 0.00015563216584268957]
BEGIN	IN	NL-HaNA_1.04.02_3533_0829.jpg	een honderd negen en twintig snaphanen,; alle volg	[0.9520689249038696, 0.04627956822514534, 0.0008104535518214107, 0.0008409757283516228]
BEGIN	IN	NL-HaNA_1.04.02_3533_0830.jpg	overgeleverde, en vrij overhaastig beeedigde; rapp	[0.57827311754226

 16%|█▌        | 21/129.6875 [00:01<00:05, 19.91batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_0861.jpg	§: 24 Tot secretaris van Politie insteede van den 	[0.970413327217102, 0.028797650709748268, 0.0003831917711067945, 0.00040593123412691057]
BEGIN	IN	NL-HaNA_1.04.02_3533_0862.jpg	§ 37 Ten besluite deesor materie brengen; wij uwe 	[0.5353243947029114, 0.46393710374832153, 0.00038453287561424077, 0.00035390572156757116]
BEGIN	IN	NL-HaNA_1.04.02_3533_0893.jpg	sal himn geweest voor een enkelde keer, Conde„; mi	[0.9859957098960876, 0.013638210482895374, 0.00017319867038168013, 0.00019281634013168514]
BEGIN	IN	NL-HaNA_1.04.02_3533_0894.jpg	gewoonte verschonken, ter somma van; rop„s 1100 of	[0.5715686082839966, 0.4277229905128479, 0.0003650063299573958, 0.00034344871528446674]
BEGIN	IN	NL-HaNA_1.04.02_3533_0925.jpg	§ 14 Daar en tegen beloopt het versondene. der„; w	[0.9782046675682068, 0.02134193666279316, 0.00021919111895840615, 0.0002341189974686131]
BEGIN	IN	NL-HaNA_1.04.02_3533_0926.jpg	§ 16. van de goede uitwvecking, die in het nouwe; 	[0.5761471986

 21%|██        | 27/129.6875 [00:01<00:07, 14.39batch/s]

BEGIN	IN	NL-HaNA_1.04.02_3533_1021.jpg	Bij het Collegie van; schepenen is, ter plaatsvull	[0.9687738418579102, 0.030365020036697388, 0.00041935141780413687, 0.0004418338357936591]
BEGIN	IN	NL-HaNA_1.04.02_3533_1022.jpg	§7 In voldoening aan de resolutie van; den 4:e mai	[0.5564950704574585, 0.4428122937679291, 0.0003609151463024318, 0.00033172869007103145]
BEGIN	IN	NL-HaNA_1.04.02_3533_1053.jpg	wij de vrijheid uwe wel Edele Hoog Agtb.; in eerbi	[0.9780625700950623, 0.021477222442626953, 0.00022278404503595084, 0.00023733246780466288]
BEGIN	IN	NL-HaNA_1.04.02_3533_1054.jpg	duurte van het gean er nog is, geen gevolg; kunnen	[0.6789174675941467, 0.3207969069480896, 0.00014789852139074355, 0.00013770180521532893]
BEGIN	IN	NL-HaNA_1.04.02_3533_1085.jpg	Iaccatrasche -Boven - en Treanger- landen; omtrend	[0.9890824556350708, 0.010637151077389717, 0.00013170110469218343, 0.00014874101907480508]
BEGIN	IN	NL-HaNA_1.04.02_3533_1086.jpg	de rivier Tjimandirie, laaten hooft vatten,; door 	[0.58837980

 25%|██▍       | 32/129.6875 [00:01<00:05, 17.39batch/s]

BEGIN	IN	NL-HaNA_1.04.02_1603_0063.jpg	d' oude daatsen te probeeren, welke door ses; geco	[0.9848355650901794, 0.014892235398292542, 0.0001302896998822689, 0.00014186282351147383]
BEGIN	IN	NL-HaNA_1.04.02_1603_0064.jpg	ontvangen, maar naderhand heevd sijn E. .; jaarige	[0.6311376094818115, 0.36857086420059204, 0.000151675587403588, 0.0001398552703903988]
BEGIN	IN	NL-HaNA_1.04.02_1603_0095.jpg	d'oorlogs munitien en verdere gereedsz:; daar toe 	[0.9802466630935669, 0.019347678869962692, 0.00019583785615395755, 0.0002098510303767398]
BEGIN	IN	NL-HaNA_1.04.02_1603_0096.jpg	s der drie beloovde predikanten; te wel te pas gek	[0.8027003407478333, 0.1970936506986618, 0.000104518148873467, 0.00010153818584512919]
IN	END	NL-HaNA_1.04.02_1603_0119.jpg	den advocaat fiscaal van India tot sijne; laste sa	[0.023968417197465897, 0.9759633541107178, 3.5646702599478886e-05, 3.25499931932427e-05]
IN	BEGIN	NL-HaNA_1.04.02_2768_0021.jpg	Aan d'Edele Hoog Agtb: Heeren, de Heeren; Represen	[0.0200817231088876

 27%|██▋       | 35/129.6875 [00:01<00:05, 17.40batch/s]

BEGIN	IN	NL-HaNA_1.04.02_1382_0846.jpg	Macasser. maar ƒ 25840: 14: 7: en hunne incomsten;	[0.9416800141334534, 0.055683065205812454, 0.0012922778259962797, 0.0013446594821289182]
BEGIN	IN	NL-HaNA_1.04.02_1382_0847.jpg	Macasser. particulieren aanvoer te sluijten / sal 	[0.6336658596992493, 0.3649386167526245, 0.0007205404108390212, 0.0006750066531822085]
BEGIN	IN	NL-HaNA_1.04.02_1382_0878.jpg	Palembangh. en den pangeran depattij; En hebben; d	[0.9454439878463745, 0.05201345309615135, 0.0012471993686631322, 0.0012953290715813637]
BEGIN	IN	NL-HaNA_1.04.02_1382_0879.jpg	Palembangh, hem souden hebben geweest, wederlijdse	[0.8305928111076355, 0.16889292001724243, 0.00025930453557521105, 0.0002548944321461022]
BEGIN	IN	NL-HaNA_1.04.02_1382_0910.jpg	van een duijsend Roa:, de minste; verstreckinge me	[0.9530431628227234, 0.045372091233730316, 0.0007778365397825837, 0.0008069067262113094]
BEGIN	IN	NL-HaNA_1.04.02_1382_0911.jpg	onsen Iongste sullen vinden aangehaalt;; sijnde he	[0.581116735935211

 29%|██▊       | 37/129.6875 [00:07<01:07,  1.38batch/s]

BEGIN	IN	NL-HaNA_1.04.02_1382_0974.jpg	Cormandel, haar daarmede gesuspecteert heeft.; bel	[0.9538847804069519, 0.044573552906513214, 0.0007564668194390833, 0.0007852856651879847]
BEGIN	IN	NL-HaNA_1.04.02_1382_0975.jpg	Cormandel, gewoonte aldaar is, de goederen te late	[0.5918629765510559, 0.40737321972846985, 0.0003975749423261732, 0.00036617935984395444]
BEGIN	IN	NL-HaNA_1.04.02_1382_1006.jpg	Ceijlon. fruijten, uijt persia den 31: october op 	[0.968643069267273, 0.030483657494187355, 0.00042528484482318163, 0.00044794505811296403]
BEGIN	IN	NL-HaNA_1.04.02_1382_1007.jpg	als ons en alomme in India present; met de nodige 	[0.6354628801345825, 0.3640614449977875, 0.00024647967074997723, 0.00022918944887351245]


 30%|███       | 39/129.6875 [00:13<01:48,  1.19s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1382_1038.jpg	ons maar wat Rijckelijcken daar van; mede deelen, 	[0.9557549953460693, 0.04284050688147545, 0.000688561296556145, 0.0007159532397054136]
BEGIN	IN	NL-HaNA_1.04.02_1382_1039.jpg	Souratta. volgens hun schrijven van 20: Iunij; voo	[0.559892475605011, 0.43927687406539917, 0.0004326395282987505, 0.0003979949397034943]


 31%|███       | 40/129.6875 [00:15<02:01,  1.36s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1382_1070.jpg	en winsten te staan, om te meer; vertier te maacke	[0.941635012626648, 0.055687323212623596, 0.0013125929981470108, 0.0013651762856170535]
BEGIN	IN	NL-HaNA_1.04.02_1382_1071.jpg	Iavas oostcust, vermogen niet om 'tselve buijten o	[0.6460748910903931, 0.35258641839027405, 0.0006912039243616164, 0.0006474517867900431]


 32%|███▏      | 41/129.6875 [00:17<02:13,  1.50s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1382_1102.jpg	Batavia. Ult=o maart 1683) nevens desen dubbel; to	[0.9646387100219727, 0.03359111398458481, 0.000855210586450994, 0.0009149403194896877]
BEGIN	IN	NL-HaNA_1.04.02_1382_1103.jpg	Batavia. beright off onderright nogh bij geschrift	[0.771961510181427, 0.2270055115222931, 0.0005194319528527558, 0.000513597798999399]


 32%|███▏      | 42/129.6875 [00:21<03:02,  2.08s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1382_1134.jpg	Iongste voijagie nae sumatras weltcust,; omtrent h	[0.9712966084480286, 0.02808092162013054, 0.00030409294413402677, 0.00031838411814533174]
BEGIN	IN	NL-HaNA_1.04.02_1382_1135.jpg	is, maar Evenwel bestaat uijt swacke; En sterffeli	[0.587746798992157, 0.4118863046169281, 0.00019225322466809303, 0.00017464635311625898]


 33%|███▎      | 43/129.6875 [00:25<03:26,  2.38s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1382_1166.jpg	Noch P=r de scheepen thuijs te; spijck en strijen 	[0.9860454201698303, 0.013723908923566341, 0.00011015445488737896, 0.00012048761709593236]
BEGIN	IN	NL-HaNA_1.04.02_1382_1167.jpg	wijnen &=a met de schepen vande; Equipagie des Iaa	[0.6057382822036743, 0.39396899938583374, 0.00015266861009877175, 0.00014006120909471065]
IN	END	NL-HaNA_1.04.02_1382_1189.jpg	Batavia. sij, 'tgunt wij versoecken dat uw Ho: Ed=	[0.024106426164507866, 0.9758249521255493, 3.59416808350943e-05, 3.2722273317631334e-05]
IN	BEGIN	NL-HaNA_1.04.02_2140_0043.jpg	Originele; Generale Missive van Haar; Edelens Den 	[0.024853648617863655, 0.975078821182251, 3.52835631929338e-05, 3.214679964003153e-05]


 34%|███▍      | 44/129.6875 [00:27<03:22,  2.37s/batch]

BEGIN	IN	NL-HaNA_1.04.02_2140_0051.jpg	missive door den Gouverneur, en; raad aan Cabo de 	[0.9406207203865051, 0.05659669637680054, 0.001364226802252233, 0.001418375875800848]
BEGIN	IN	NL-HaNA_1.04.02_2140_0052.jpg	en den Crinas vaerder Evengemelte maand na neder„;	[0.6673074960708618, 0.3315068483352661, 0.0006114543648436666, 0.0005741908098571002]


 35%|███▍      | 45/129.6875 [00:29<03:17,  2.33s/batch]

BEGIN	IN	NL-HaNA_1.04.02_2140_0083.jpg	men had den sulth: gelijk aangetoont, de schadelyk	[0.93804931640625, 0.05894046276807785, 0.0014762328937649727, 0.0015340198297053576]
BEGIN	IN	NL-HaNA_1.04.02_2140_0084.jpg	al Eenigen tijd te hebben huijs; gehouden op het E	[0.6626120805740356, 0.3360038995742798, 0.0007136655622161925, 0.0006703117396682501]


 35%|███▌      | 46/129.6875 [00:32<03:17,  2.36s/batch]

BEGIN	IN	NL-HaNA_1.04.02_2140_0115.jpg	in dese directie met de scheepen; oostendenaren am	[0.940448522567749, 0.05680786445736885, 0.0013455083826556802, 0.0013981197262182832]
BEGIN	IN	NL-HaNA_1.04.02_2140_0116.jpg	montant van der„ aan vaderlandse - en Indische; re	[0.6487979292869568, 0.34989917278289795, 0.0006724374834448099, 0.0006305210990831256]


 36%|███▌      | 47/129.6875 [00:34<03:10,  2.31s/batch]

BEGIN	IN	NL-HaNA_1.04.02_2140_0147.jpg	al het geene de ministers op; den; Eijsch van uEd:	[0.9437512755393982, 0.053762540221214294, 0.0012183841317892075, 0.0012677614577114582]
BEGIN	IN	NL-HaNA_1.04.02_2140_0148.jpg	de scheepen; wickenburg en; is vorderde goederen h	[0.6372575163841248, 0.36147570610046387, 0.0006543862400576472, 0.0006124170613475144]


 37%|███▋      | 48/129.6875 [00:37<01:03,  1.29batch/s]

BEGIN	IN	NL-HaNA_1.04.02_2140_0179.jpg	die nergens toe kan werden; g' Emploijeert maar oo	[0.9455181956291199, 0.052035294473171234, 0.0011997788678854704, 0.001246739993803203]
BEGIN	IN	NL-HaNA_1.04.02_2140_0180.jpg	overgaan Een monster, ter qu; titeijt van 37½ lb:,	[0.8264032602310181, 0.1730942577123642, 0.0002529376943130046, 0.0002495023945812136]





KeyboardInterrupt: 

In [None]:
writer = csv.DictWriter(
    sys.stdout,
    fieldnames=["Metric", "Average"] + [label.name for label in Label],
    delimiter="\t",
)
writer.writeheader()

for metric in (precision, recall, f1_score):
    scores = {
        label.name: f"{score:.4f}"
        for label, score in zip(Label, metric.compute().tolist())
    }
    writer.writerow(
        {"Metric": metric.__class__.__name__, "Average": str(metric.average)} | scores
    )

print(f"Accuracy ({accuracy.average} average):\t{accuracy.compute().item():.4f}")

        [3]]) classes have zero instances in both the predictions and the ground truth labels. Precision is still logged as zero.


Metric	Average	BEGIN	IN	END	OUT
MulticlassPrecision	None	0.0000	1.0000	0.0000	0.0000
MulticlassRecall	None	0.0000	0.9300	0.0000	0.0000
MulticlassF1Score	None	0.0000	0.9637	0.0000	0.0000
Accuracy (micro average):	0.9300
