In [1]:
%load_ext autoreload
%autoreload now

In [2]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "MIG-08137aa2-e69b-5e74-8390-7997329b1336"
# os.environ["WORLD_SIZE"] = "1"

# Download and convert data

In [3]:
from tqdm import tqdm

from document_segmentation.pagexml.annotations.renate_analysis import RenateAnalysis
from document_segmentation.settings import RENATE_ANALYSIS_DIR

N = None

RENATE_ANALYSIS_DIR.mkdir(parents=True, exist_ok=True)

sheet = RenateAnalysis()


existing_docs = {
    path.stem for path in RENATE_ANALYSIS_DIR.glob("Globdoc_*.json") if path.is_file()
}

for document in tqdm(
    sheet.to_documents(n=N, skip_ids=existing_docs),
    total=(N or len(sheet)) - len(existing_docs),
    desc="Writing documents",
    unit="doc",
):
    document_file = RENATE_ANALYSIS_DIR / f"{document.id}.json"

    with document_file.open("xt") as f:
        f.write(document.model_dump_json())
        f.write("\n")

Writing documents: 0doc [00:00, ?doc/s]


In [4]:
import logging

from tqdm import tqdm

from document_segmentation.pagexml.annotations.renate_analysis import RenateAnalysisInv
from document_segmentation.settings import RENATE_ANALYSIS_DIR, RENATE_ANALYSIS_SHEETS

N = None


sheet = RenateAnalysisInv(RENATE_ANALYSIS_SHEETS[0])  # TODO: use both sheets

for document in tqdm(
    sheet.to_documents(n=N), desc="Writing documents", unit="doc", total=26
):
    document_file = RENATE_ANALYSIS_DIR / f"{document.id}.json"

    if document_file.exists():
        logging.info(f"Document {document.id} already exists, skipping")
    else:
        with document_file.open("xt") as f:
            f.write(document.model_dump_json())
            f.write("\n")

Writing documents:   0%|          | 0/26 [00:00<?, ?doc/s]

Writing documents: 100%|██████████| 26/26 [00:10<00:00,  2.55doc/s]


# Load Data

In [18]:
%autoreload now

In [19]:
TRAINING_DATA = 0.8

In [20]:
from document_segmentation.model.dataset import DocumentDataset

dataset: DocumentDataset = DocumentDataset.from_dir(RENATE_ANALYSIS_DIR)
len(dataset)

Reading JSON files: 100%|██████████| 104/104 [00:00<00:00, 176.63file/s]


2184

In [21]:
dataset._class_counts()

Counter({<Label.IN: 1>: 1907,
         <Label.BEGIN: 0>: 104,
         <Label.END: 2>: 100,
         <Label.OUT: 3>: 73})

In [22]:
dataset.class_weights()

[20.8, 1.1446540880503144, 21.623762376237625, 29.513513513513512]

In [24]:
training_data, test_data = dataset.split(TRAINING_DATA)

In [26]:
training_data._class_counts()

Counter({<Label.IN: 1>: 1591,
         <Label.BEGIN: 0>: 83,
         <Label.END: 2>: 81,
         <Label.OUT: 3>: 67})

In [27]:
test_data._class_counts()

Counter({<Label.IN: 1>: 316,
         <Label.BEGIN: 0>: 21,
         <Label.END: 2>: 19,
         <Label.OUT: 3>: 6})

# Train Model

In [28]:
import torch

BATCH_SIZE = 32
EPOCHS = 10
WEIGHTS = torch.Tensor(dataset.class_weights())  # For an imbalanced dataset

In [None]:
%autoreload now

In [29]:
from document_segmentation.model.page_sequence_tagger import PageSequenceTagger

tagger = PageSequenceTagger()

In [30]:
tagger._device

'mps'

In [31]:
tagger

PageSequenceTagger(
  (_page_embedding): PageEmbedding(
    (_region_model): RegionEmbeddingSentenceTransformer(
      (_transformer_model): SentenceTransformer(
        (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: RobertaModel 
        (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
      )
      (_region_type): Embedding(9, 16)
      (_linear): Linear(in_features=784, out_features=512, bias=True)
    )
    (_rnn): LSTM(512, 256, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
    (_linear): Linear(in_features=512, out_features=256, bias=True)
  )
  (_rnn): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.1, bidirectional=True)
  (_linear): Linear(in_features=512, out_features=4, bias=True)
  (_soft

In [32]:
tagger.train_(training_data, EPOCHS, BATCH_SIZE, WEIGHTS.to(tagger._device))


116batch [01:44,  1.11batch/s]
Reading JSON files:   0%|          | 0/104 [09:17<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2906 MB
[Loss:	6.392]


116batch [00:06, 18.18batch/s]
Reading JSON files:   0%|          | 0/104 [09:23<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2860 MB
[Loss:	4.474]


116batch [00:06, 18.00batch/s]
Reading JSON files:   0%|          | 0/104 [09:30<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	4.442]


116batch [00:06, 19.33batch/s]
Reading JSON files:   0%|          | 0/104 [09:36<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	4.433]


116batch [00:05, 19.72batch/s]
Reading JSON files:   0%|          | 0/104 [09:41<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	4.131]


116batch [00:05, 19.71batch/s]
Reading JSON files:   0%|          | 0/104 [09:47<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	3.584]


116batch [00:05, 19.84batch/s]
Reading JSON files:   0%|          | 0/104 [09:53<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	3.574]


116batch [00:05, 19.80batch/s]
Reading JSON files:   0%|          | 0/104 [09:59<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	3.569]


116batch [00:05, 19.60batch/s]
Reading JSON files:   0%|          | 0/104 [10:05<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	3.568]


116batch [00:05, 19.56batch/s]
Reading JSON files:   0%|          | 0/104 [10:11<?, ?file/s]

Current allocated memory (MPS): 1182 MB
Driver allocated memory (MPS): 2842 MB
[Loss:	3.567]


# Evaluate Model

In [33]:
import csv
import sys

from torcheval.metrics import (
    MulticlassAccuracy,
    MulticlassF1Score,
    MulticlassPrecision,
    MulticlassRecall,
)
from tqdm import tqdm

from document_segmentation.pagexml.datamodel.label import Label

writer = csv.DictWriter(
    sys.stdout,
    fieldnames=("Predicted", "Actual", "Page ID", "Text", "Scores"),
    delimiter="\t",
)

writer.writeheader()

accuracy = MulticlassAccuracy(num_classes=len(Label))
precision = MulticlassPrecision(average=None, num_classes=len(Label))
recall = MulticlassRecall(average=None, num_classes=len(Label))
f1_score = MulticlassF1Score(average=None, num_classes=len(Label))

for batch in tqdm(
    test_data.batches(BATCH_SIZE), total=len(test_data) / BATCH_SIZE, unit="batch"
):
    predicted = tagger(batch)
    labels = batch.labels()

    _labels = torch.Tensor([label.value for label in labels]).to(int)
    accuracy.update(predicted, _labels)
    precision.update(predicted, _labels)
    recall.update(predicted, _labels)
    f1_score.update(predicted, _labels)

    for page, pred, label in zip(batch.pages, predicted, labels):
        pred_label = Label(pred.argmax().item())
        # if pred_label != Label.IN or label != Label.IN:
        writer.writerow(
            {
                "Predicted": pred_label.name,
                "Actual": label.name,
                "Page ID": page.doc_id,
                "Text": page.text(delimiter="; ")[:50],
                "Scores": str(pred.tolist()),
            }
        )

Reading JSON files:   0%|          | 0/104 [10:23<?, ?file/s]


Predicted	Actual	Page ID	Text	Scores


 18%|█▊        | 2/11.3125 [00:00<00:03,  2.72batch/s]

BEGIN	BEGIN	NL-HaNA_1.04.02_1060_0435.jpg	Alsoo het schip der Goes, als t'Jacht Cleijn Enckh	[0.9996637105941772, 0.0002524361771065742, 5.55374936084263e-05, 2.8267319066799246e-05]
IN	IN	NL-HaNA_1.04.02_1060_0436.jpg	Op heden den 5 feb. @ 1614, door beroep vanden E. 	[0.0012747891014441848, 0.997553288936615, 0.0011668851366266608, 5.150438937562285e-06]
IN	IN	NL-HaNA_1.04.02_1060_0437.jpg	@ 1615. Dondergeschreven te soonen; Soo Is bij d' 	[0.0007243358995765448, 0.9982724189758301, 0.0009999239118769765, 3.3418723432987463e-06]
IN	IN	NL-HaNA_1.04.02_1060_0438.jpg	Den 19 feb. . vernomen hebbende op de factorije to	[0.0006676050252281129, 0.9983349442481995, 0.0009944145567715168, 3.1268480142898625e-06]
IN	IN	NL-HaNA_1.04.02_1060_0439.jpg	Ert Dircx wttgevaren voor Soldaet, opt schip Banta	[0.0006568318349309266, 0.9983606934547424, 0.000979323056526482, 3.1010447401058627e-06]
IN	IN	NL-HaNA_1.04.02_1060_0440.jpg	Comptoir deeart sullen gaen, omme door den opperco	[0.000655361975077539

 27%|██▋       | 3/11.3125 [00:01<00:03,  2.38batch/s]

OUT	OUT	NL-HaNA_1.04.02_1547_0348.jpg		[3.767828457057476e-05, 1.168915241578361e-05, 1.6890284314285964e-05, 0.9999337196350098]
OUT	OUT	NL-HaNA_1.04.02_1547_0349.jpg		[1.3423024938674644e-05, 5.066107860329794e-06, 6.1292248574318364e-06, 0.9999754428863525]
OUT	OUT	NL-HaNA_1.04.02_1547_0350.jpg		[2.89409035758581e-05, 1.1263072337897029e-05, 1.1508109309943393e-05, 0.9999483823776245]
BEGIN	BEGIN	NL-HaNA_1.04.02_1547_0351.jpg	Translaat ola door haar E:; heer Commandeur adriaa	[0.9994751811027527, 0.00016204184794332832, 5.188388240640052e-05, 0.0003109582175966352]
IN	IN	NL-HaNA_1.04.02_1547_0352.jpg	gegeven werd op dit alle onlusten verweijderinghe 	[0.002426056656986475, 0.9959057569503784, 0.0015960998134687543, 7.202728738775477e-05]
END	END	NL-HaNA_1.04.02_1547_0353.jpg	en het Cochinisz rijk herwaarts te senden om over 	[0.0002140397991752252, 0.0006525940843857825, 0.9990953207015991, 3.8060639781178907e-05]


 35%|███▌      | 4/11.3125 [00:03<00:06,  1.06batch/s]

BEGIN	BEGIN	NL-HaNA_1.04.02_1506_1034.jpg	-; d; E; 7.; decken; 8; 2; van de; 5; ƒ; 3; E; E; 	[0.999672532081604, 0.0002436629729345441, 5.570215216721408e-05, 2.817159656842705e-05]
IN	IN	NL-HaNA_1.04.02_1506_1035.jpg	binnewater; een lamme; D95; same; uijt; ies; Cas; 	[0.0013309026835486293, 0.9972808361053467, 0.0013826104113832116, 5.614747806248488e-06]
IN	IN	NL-HaNA_1.04.02_1506_1036.jpg	30 vrs; o; Janor; 6; 5o; e; x; 116; E; 6.; 1.; :; 	[0.0011187694035470486, 0.9959751963615417, 0.0029000823851674795, 5.9029798649135046e-06]
END	END	NL-HaNA_1.04.02_1506_1037.jpg	k; „noortvelt; rogons; „1; rsame; uijt; eruijt; ƒ;	[0.00011362581426510587, 0.0002934639051090926, 0.9995860457420349, 6.863442195026437e-06]


 44%|████▍     | 5/11.3125 [00:04<00:07,  1.26s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_3060_0043.jpg	Na dat de Leeden deeser Vergaadering bij een geroe	[0.9717885255813599, 0.010821838863193989, 0.001531894551590085, 0.015857649967074394]
IN	IN	NL-HaNA_1.04.02_3060_0044.jpg		[0.004722322802990675, 0.9493709206581116, 0.0015321957180276513, 0.04437460005283356]
OUT	IN	NL-HaNA_1.04.02_3060_0045.jpg		[0.007382892072200775, 0.4826573133468628, 0.0038950678426772356, 0.5060647130012512]
OUT	IN	NL-HaNA_1.04.02_3060_0046.jpg		[0.00460227532312274, 0.07550110667943954, 0.002633779775351286, 0.917262852191925]
BEGIN	IN	NL-HaNA_1.04.02_3060_0047.jpg	Woensdag den 13: ' October A„o 1762.; Na dat de Le	[0.9990507960319519, 0.000773828593082726, 6.256939377635717e-05, 0.00011286102380836383]
IN	IN	NL-HaNA_1.04.02_3060_0048.jpg	Nagesien; G„s V„n Aken	[0.0006021875306032598, 0.998832643032074, 0.0005534070078283548, 1.1793497833423316e-05]
IN	IN	NL-HaNA_1.04.02_3060_0049.jpg	Woensdag den 13.:' October A„o 1762.; Na dat de Le	[0.0004429000255186111, 0.99891161

 53%|█████▎    | 6/11.3125 [00:06<00:07,  1.35s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_1088_0511.jpg	Naer dat den 8en. septembr 1625, het fergatt Surat	[0.9995299577713013, 0.00038027067785151303, 5.3568310249829665e-05, 3.61987404176034e-05]
IN	IN	NL-HaNA_1.04.02_1088_0512.jpg	die van Lohoe waren hem gevolcht, tot op lebeleeuw	[0.000736082496587187, 0.998572587966919, 0.0006874403916299343, 3.958030447392957e-06]
IN	IN	NL-HaNA_1.04.02_1088_0513.jpg	wederom afgesonden, naar Bouro, om te vernemen, wa	[0.00042865300201810896, 0.9989858269691467, 0.000582914159167558, 2.585274387456593e-06]
IN	IN	NL-HaNA_1.04.02_1088_0514.jpg	ende ten deele onwillich, soo dat met schoon spree	[0.00039627691148780286, 0.99903404712677, 0.0005673717241734266, 2.425034153930028e-06]
IN	IN	NL-HaNA_1.04.02_1088_0515.jpg	ons voor antwoort, dat wel waar was, dat sijn Vade	[0.0003879569412674755, 0.9990419745445251, 0.0005676839500665665, 2.3906254682515282e-06]
IN	IN	NL-HaNA_1.04.02_1088_0516.jpg	noch al wel toeginck, ende mijn hier van noch vrij	[0.00037304774741642177

 62%|██████▏   | 7/11.3125 [00:07<00:05,  1.21s/batch]

BEGIN	IN	NL-HaNA_1.04.02_1088_0543.jpg	244; voorganis, datmen ons niet en mocht vertrouwe	[0.9996716976165771, 0.0002440052921883762, 5.560545832850039e-05, 2.864483576558996e-05]
IN	IN	NL-HaNA_1.04.02_1088_0544.jpg	vrouw, den man niet mede toebehoorde, niet meer an	[0.0013026803499087691, 0.9975475668907166, 0.0011445655254647136, 5.290075478114886e-06]
IN	IN	NL-HaNA_1.04.02_1088_0545.jpg	soete middelen te werck gaen, hoewel daar mede nie	[0.000736430985853076, 0.9982445240020752, 0.0010156105272471905, 3.414582806726685e-06]
IN	IN	NL-HaNA_1.04.02_1088_0546.jpg	soude mogen bewaert leggen, maer alst nu alsoo bes	[0.0006624372908845544, 0.998327910900116, 0.0010065833339467645, 3.124250497421599e-06]
IN	IN	NL-HaNA_1.04.02_1088_0547.jpg	sij seijden dat op Hattamana, den Jongen Coninck, 	[0.0006504838238470256, 0.9983435869216919, 0.0010027886601164937, 3.0803296340309316e-06]
IN	IN	NL-HaNA_1.04.02_1088_0548.jpg	uijt de quartieren van hittoe, dit heele Mosson, m	[0.0006729831220582128, 0.

 71%|███████   | 8/11.3125 [00:08<00:03,  1.12s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_8099_0205.jpg	Van Ternaten onder dato 11:' 7ber: 1732; van alle 	[0.9996312856674194, 0.0002860643435269594, 5.3606450819643214e-05, 2.9019489375059493e-05]
IN	IN	NL-HaNA_1.04.02_8099_0206.jpg	Van Ternaten onder dato 11:' Septemb:r 1732; Lauwt	[0.0012760489480569959, 0.9975454211235046, 0.0011733275605365634, 5.199290171731263e-06]
IN	IN	NL-HaNA_1.04.02_8099_0207.jpg	Van Ternaten onder dato 11:' 7ber: 1732; door hem 	[0.0007270185160450637, 0.99822598695755, 0.0010435982840135694, 3.4044653602904873e-06]
IN	IN	NL-HaNA_1.04.02_8099_0208.jpg	Ternaten onder dato 11:' 7ber: 1732; Van; „dugting	[0.0006661674124188721, 0.9983421564102173, 0.000988578307442367, 3.148679070363869e-06]
IN	IN	NL-HaNA_1.04.02_8099_0209.jpg	Ternaten onder dato 11:' 7ber: 1732; Van; Ternaten	[0.0006602519424632192, 0.9983484745025635, 0.0009882479207590222, 3.1223669338942273e-06]
IN	IN	NL-HaNA_1.04.02_8099_0210.jpg	Ternaten onder dato 11:' Sepb: 1732; Van; voor ops	[0.000632950162980705

 80%|███████▉  | 9/11.3125 [00:08<00:02,  1.08batch/s]

BEGIN	BEGIN	NL-HaNA_1.04.02_1070_0199.jpg	2 saeckers Elck van 3000 lb; 2 halve dittos elck v	[0.9980164766311646, 0.0011951117776334286, 0.0004300513246562332, 0.00035834635491482913]
IN	IN	NL-HaNA_1.04.02_1070_0200.jpg		[0.0033359762746840715, 0.9911088943481445, 0.002743801102042198, 0.002811391605064273]
IN	END	NL-HaNA_1.04.02_1070_0201.jpg	Adriaen gerritsz van utrecht sergiant; marijn Ding	[0.008283664472401142, 0.8976438045501709, 0.09393522888422012, 0.00013724264863412827]


 88%|████████▊ | 10/11.3125 [00:10<00:01,  1.23s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_1509_1538.jpg	Monsterolle van alle sComp:s Loontreckende; Monste	[0.9996657371520996, 0.0002513462968636304, 5.458337182062678e-05, 2.8321261197561398e-05]
IN	IN	NL-HaNA_1.04.02_1509_1539.jpg	dienaren dewelcke in't Cormandelse Gouvernement bi	[0.001239656237885356, 0.9975988268852234, 0.0011563834268599749, 5.0744920372380875e-06]
IN	IN	NL-HaNA_1.04.02_1509_1540.jpg	339. en 35. persoonen p=r Transport. —; Namen, Toe	[0.0007339020376093686, 0.9982773065567017, 0.000985415535978973, 3.334223038109485e-06]
IN	IN	NL-HaNA_1.04.02_1509_1541.jpg	90; d=o.. . ..; Adsistent; Chirurgijn. . . . ƒ 36.	[0.0006549559766426682, 0.9983497858047485, 0.0009921861346811056, 3.074438154726522e-06]
IN	IN	NL-HaNA_1.04.02_1509_1542.jpg	339 en 74. persoonen P=r Transport; Namen, Toename	[0.0006838284898549318, 0.9983718991279602, 0.0009412301005795598, 3.1212073281494668e-06]
IN	IN	NL-HaNA_1.04.02_1509_1543.jpg	siekevaar. . . ƒ 20. walcheren. . . . . 1662. Zeel	[0.000647359527647495

 97%|█████████▋| 11/11.3125 [00:13<00:00,  1.77s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_1490_0583.jpg	Copije Secrete Resolutien; genomen bij de Ho: Rege	[0.9996656179428101, 0.0002538571716286242, 5.294060247251764e-05, 2.766472243820317e-05]
IN	IN	NL-HaNA_1.04.02_1490_0584.jpg	in presentie; van zijn Ed. t te; volbrengen, soo s	[0.0012668200070038438, 0.9975445866584778, 0.0011833261232823133, 5.190873253013706e-06]
IN	IN	NL-HaNA_1.04.02_1490_0585.jpg	dat 's Comp. s dienaren en wel voorna„; mentlijk p	[0.0007265448803082108, 0.9982732534408569, 0.0009968261001631618, 3.3520634588057874e-06]
IN	IN	NL-HaNA_1.04.02_1490_0586.jpg	bij ons te boeck staen voor niet wel; geintentione	[0.0006719699595123529, 0.9983534812927246, 0.0009714624029584229, 3.1505230708717136e-06]
IN	IN	NL-HaNA_1.04.02_1490_0587.jpg	goetgevonden dat dese ontbiedinge; bij ons gemeen 	[0.0006590731791220605, 0.9983616471290588, 0.0009761892142705619, 3.1130866773310117e-06]
IN	IN	NL-HaNA_1.04.02_1490_0588.jpg	schaap harder herwaerts aen te doen; overkomen, ge	[0.0006577562307938

12batch [00:15,  1.65s/batch]                          

BEGIN	IN	NL-HaNA_1.04.02_1490_0615.jpg	en ons buijten postuer te brengen,; om de dierbare	[0.9996631145477295, 0.00025345568428747356, 5.472698467201553e-05, 2.8706832381431013e-05]
IN	IN	NL-HaNA_1.04.02_1490_0616.jpg	a:o 1684. wierde geordonneert in; haere herwaerts 	[0.0012444094754755497, 0.9975848197937012, 0.0011656596325337887, 5.085044904262759e-06]
IN	IN	NL-HaNA_1.04.02_1490_0617.jpg	rijkelijk te konnen versorgen,; mitsgaders het mis	[0.0007251425413414836, 0.9982725381851196, 0.000998975709080696, 3.343217713336344e-06]
IN	IN	NL-HaNA_1.04.02_1490_0618.jpg	gepractiseert geworden is, sullen; moeten doen hou	[0.0006678376812487841, 0.9983385801315308, 0.0009903855388984084, 3.1583035706717055e-06]
IN	IN	NL-HaNA_1.04.02_1490_0619.jpg	over te steeken, agtervolgens; sodanige Zeijlaes o	[0.0006566886440850794, 0.9982996582984924, 0.0010404939530417323, 3.1756430871610064e-06]
IN	IN	NL-HaNA_1.04.02_1490_0620.jpg	gecarteert legt, maer dat oock de; stronien inde m	[0.0006548099918290973

13batch [00:15,  1.31s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_1547_0110.jpg	gemerkt &E.; Waarmeede; Edele hoog agtbaare gebied	[0.9996683597564697, 0.00023772170243319124, 6.379025580827147e-05, 3.0142153264023364e-05]
IN	IN	NL-HaNA_1.04.02_1547_0111.jpg	Minuit ola door den P„r gesaghebber ale ander wigl	[0.0016885414952412248, 0.9953450560569763, 0.002958133118227124, 8.329462616529781e-06]
END	END	NL-HaNA_1.04.02_1547_0112.jpg	l  eene heben em maede slekt e ondergeende kopmans	[0.00012205754319438711, 0.00031572484294883907, 0.9995546936988831, 7.4977456279157195e-06]


14batch [00:15,  1.00s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_8820_0069.jpg	Van Cormandel deder 24: November ao 1702.; Jck ond	[0.9996696710586548, 0.00024816161021590233, 5.3943706006975845e-05, 2.814264917105902e-05]
IN	IN	NL-HaNA_1.04.02_8820_0070.jpg	Van Cormandel onder 24: November 1702.; gevonden 6	[0.0012739149387925863, 0.9975729584693909, 0.0011480273678898811, 5.127843905938789e-06]
IN	IN	NL-HaNA_1.04.02_8820_0071.jpg	Van Cormandel onder 24: November 1702.; had geordo	[0.0007494880701415241, 0.9982807636260986, 0.0009664397803135216, 3.3273411190748448e-06]
IN	IN	NL-HaNA_1.04.02_8820_0072.jpg	Van Cormandel onder 24: November 1702:; Aen den He	[0.0006500694435089827, 0.9983468055725098, 0.0010000088950619102, 3.0433000119955977e-06]
IN	IN	NL-HaNA_1.04.02_8820_0073.jpg	Van Cormandel onder 24: November 1702.; gedoente d	[0.0006400021375156939, 0.9983522891998291, 0.0010047731921076775, 3.0109290491964202e-06]
IN	IN	NL-HaNA_1.04.02_8820_0074.jpg	Van Cormandel onder 24: November 1702.; soo sal de	[0.00064515660051

15batch [00:16,  1.22batch/s]

BEGIN	BEGIN	NL-HaNA_1.04.02_2682_0249.jpg	Met het ondergenoemde schip; vertrekken over China	[0.9980717897415161, 0.00100780522916466, 0.0007985559059306979, 0.00012173243158031255]


16batch [00:23,  2.78s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_3095_0015.jpg	Register der Papieren; werdende versonden per het 	[0.9996663331985474, 0.00024714358733035624, 5.76371603528969e-05, 2.8874936106149107e-05]
IN	IN	NL-HaNA_1.04.02_3095_0016.jpg	4.; orig: in genaagt, a:o p„o; d'Edele Groot Agtba	[0.001223199418745935, 0.997593104839325, 0.0011785266688093543, 5.070771749160485e-06]
IN	IN	NL-HaNA_1.04.02_3095_0017.jpg	N:o 7. Copia Generale Resolutien des Casteels; Bat	[0.0007239365368150175, 0.9982640147209167, 0.0010087478440254927, 3.3508467822684906e-06]
IN	IN	NL-HaNA_1.04.02_3095_0018.jpg	Commissien, Memorien,; Jnstructien en z:, welke; v	[0.0006688731373287737, 0.9983238577842712, 0.0010040155611932278, 3.1686290640209336e-06]
IN	IN	NL-HaNA_1.04.02_3095_0019.jpg	No 14. Thien. Gesloten Pacquetten, houdende; de ad	[0.0006571879494003952, 0.9983505010604858, 0.0009891919326037169, 3.1126539852266433e-06]
IN	IN	NL-HaNA_1.04.02_3095_0020.jpg	Commissien, Memorien; Jnstructien en z:, weg; van 	[0.00065413751872256

17batch [00:24,  2.23s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_8260_0061.jpg	S morgens te agt uuren nog geen bevoeging in het B	[0.9996588230133057, 0.0002552396326791495, 5.6450662668794394e-05, 2.949877489299979e-05]
IN	IN	NL-HaNA_1.04.02_8260_0062.jpg	insgelijks een bentings op te werepan, waarop ik h	[0.0012324214912950993, 0.9976182579994202, 0.0011441261740401387, 5.056373993284069e-06]
IN	IN	NL-HaNA_1.04.02_8260_0063.jpg	geschied zynde, goa zig 's morgens te negen uuren 	[0.0007091356092132628, 0.9982755184173584, 0.0010121166706085205, 3.2632633519824594e-06]
IN	IN	NL-HaNA_1.04.02_8260_0064.jpg	verlies van onsekant Heer op mij wederom met den p	[0.0006529196980409324, 0.9983458518981934, 0.0009981091134250164, 3.0607200187660055e-06]
IN	IN	NL-HaNA_1.04.02_8260_0065.jpg	Ik gaf dierhalven den provisconelen vandrig sor sc	[0.0006436578114517033, 0.998359739780426, 0.0009936420246958733, 3.0210069326130906e-06]
IN	IN	NL-HaNA_1.04.02_8260_0066.jpg	van goa lag, zullende zyn volk zuyd wertwaarts vuu	[0.0006423511076718

18batch [00:25,  1.87s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_3248_0877.jpg	Op Huijden den 5: October a„o 1761: voor mij Wolfe	[0.9996320009231567, 0.0002867208095267415, 4.938192796544172e-05, 3.196508623659611e-05]
IN	IN	NL-HaNA_1.04.02_3248_0878.jpg	op die wijze gedurende hun verblijf aldaar, en ond	[0.001041713054291904, 0.9980586171150208, 0.0008950007613748312, 4.679641733673634e-06]
IN	IN	NL-HaNA_1.04.02_3248_0879.jpg	niet krijgende ! de Xullas aandoen, en overweldige	[0.0006033992394804955, 0.9986262321472168, 0.0007672395440749824, 3.0666906241094694e-06]
IN	IN	NL-HaNA_1.04.02_3248_0880.jpg	van Christoffel dias, en Adriaan Rijkschroef clerc	[0.0005511748022399843, 0.998690664768219, 0.0007553162868134677, 2.8560384635056835e-06]
IN	IN	NL-HaNA_1.04.02_3248_0881.jpg	Heden den 8:e Maart 1762: Compareerde voor mij Ioh	[0.0005697144661098719, 0.9987142086029053, 0.0007132465834729373, 2.879525027310592e-06]
IN	IN	NL-HaNA_1.04.02_3248_0882.jpg	dog Een van derzelver prauwen bleeft op de droogte	[0.0005534894298762083

21batch [00:26,  1.09batch/s]

OUT	OUT	NL-HaNA_1.04.02_1547_0360.jpg		[4.6434310206677765e-05, 2.5492503482382745e-05, 1.813627386582084e-05, 0.9999098777770996]
OUT	OUT	NL-HaNA_1.04.02_1547_0361.jpg		[1.5475776308448985e-05, 8.291252925118897e-06, 6.33479203315801e-06, 0.9999698400497437]
OUT	OUT	NL-HaNA_1.04.02_1547_0362.jpg		[2.7481732104206458e-05, 1.5985984646249563e-05, 1.1455773346824571e-05, 0.9999450445175171]
BEGIN	BEGIN	NL-HaNA_1.04.02_1547_0363.jpg	Jnstructie voor den onder„; Sonsbeek, vertreckende	[0.9995558857917786, 0.00018646824173629284, 3.85285857191775e-05, 0.00021914199169259518]
IN	IN	NL-HaNA_1.04.02_1547_0364.jpg	op te wakkeren sijn, soo meede te Ervaaren wat; Ef	[0.002504652366042137, 0.9966664910316467, 0.0007734567625448108, 5.537641845876351e-05]
IN	IN	NL-HaNA_1.04.02_1547_0365.jpg	Van ons gering vermoogen, voort te setten onder an	[0.0008247647783719003, 0.9984316229820251, 0.0007315014954656363, 1.215212614624761e-05]
IN	IN	NL-HaNA_1.04.02_1547_0366.jpg	aanslaagen der picaten en roovers, 

23batch [00:29,  1.13s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_1083_0013.jpg	adn. 6=en augustij Anoo 1624; Octe der kerckelijck	[0.999489426612854, 0.00037690700264647603, 8.722964412299916e-05, 4.6380584535654634e-05]
IN	IN	NL-HaNA_1.04.02_1083_0014.jpg		[0.006468998268246651, 0.9929623007774353, 0.0003682940441649407, 0.0002003989793593064]
IN	IN	NL-HaNA_1.04.02_1083_0015.jpg	Int Jaer des Heeren onses Salichmaeckers Jesu Chri	[0.002301795408129692, 0.996840238571167, 0.0008491344633512199, 8.752758731134236e-06]
IN	IN	NL-HaNA_1.04.02_1083_0016.jpg	Kercken dienaers soo te lande als opde; Rhede alhi	[0.0005745316157117486, 0.9986787438392639, 0.0007434916333295405, 3.1757979286339832e-06]
IN	IN	NL-HaNA_1.04.02_1083_0017.jpg	Verlost moeten werden midtsgaders offf oock eenige	[0.0005185448681004345, 0.9987021684646606, 0.0007762503810226917, 2.94998312710959e-06]
IN	IN	NL-HaNA_1.04.02_1083_0018.jpg	Alsoo de Generale Comp. e. dageluckx boven haer ve	[0.0005135959945619106, 0.9986936450004578, 0.0007897705072537065, 2.92677

24batch [00:30,  1.29s/batch]

BEGIN	BEGIN	NL-HaNA_1.04.02_8696_0051.jpg	Van Siam onder dato 20: april 1737; Dag-register, 	[0.999670147895813, 0.0002444376586936414, 5.7068929891102016e-05, 2.8394919354468584e-05]
IN	IN	NL-HaNA_1.04.02_8696_0052.jpg	Van Siam onder dato 20:' april 1731; moesten verbl	[0.0013370545348152518, 0.9975607395172119, 0.0010970152216032147, 5.258822511677863e-06]
IN	IN	NL-HaNA_1.04.02_8696_0053.jpg	Van Siam onder dato 20: april 1737; zijn aen gesig	[0.0007060010102577507, 0.9982632994651794, 0.0010274903615936637, 3.2366833693231456e-06]
IN	IN	NL-HaNA_1.04.02_8696_0054.jpg	Siam onder dato 2 april 1737; den 8:e _=o alvroeg 	[0.0006481486489064991, 0.9983488321304321, 0.0010000128531828523, 3.0164023883116897e-06]
IN	IN	NL-HaNA_1.04.02_8696_0055.jpg	Van Siam onder dato: 20: april 1737; ons aengewees	[0.0006393356015905738, 0.9983567595481873, 0.0010009024990722537, 3.0001624509168323e-06]
IN	IN	NL-HaNA_1.04.02_8696_0056.jpg	Van Siam onder dato: 20: april 1737; Maart; die ge	[0.000636874872725




In [34]:
writer = csv.DictWriter(
    sys.stdout,
    fieldnames=["Metric"] + [label.name for label in Label],
    delimiter="\t",
)
writer.writeheader()

for metric in (precision, recall, f1_score):
    scores = {
        label.name: f"{score:.4f}"
        for label, score in zip(Label, metric.compute().tolist())
    }
    writer.writerow({"Metric": metric.__class__.__name__} | scores)

print(f"Accuracy ({accuracy.average} average):\t{accuracy.compute().item():.4f}")

Metric	BEGIN	IN	END	OUT
MulticlassPrecision	0.8400	0.9968	0.8571	0.7500
MulticlassRecall	1.0000	0.9715	0.9474	1.0000
MulticlassF1Score	0.9130	0.9840	0.9000	0.8571
Accuracy (micro average):	0.9724
