In [None]:
# Download the Donut repository and the SROIE dataset
# This script is used to fine-tune the Donut model on the SROIE dataset
# This script is a copy of orginal Donut script which runs on google colab
!git clone https://github.com/clovaai/donut.git
!git clone https://github.com/zzzDavid/ICDAR-2019-SROIE

Cloning into 'donut'...
remote: Enumerating objects: 268, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 268 (delta 0), reused 3 (delta 0), pack-reused 264[K
Receiving objects: 100% (268/268), 62.76 MiB | 35.66 MiB/s, done.
Resolving deltas: 100% (123/123), done.


In [None]:
# set up the environment
!cd donut && pip install .
!pip install transformers==4.25.1
!pip install pytorch_lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/donut
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers>=4.11.3
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets[vision]
  Downloading datasets-2.10.1-py3-none-any.whl (469 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 KB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m 

In [None]:
import os
import json
import shutil
from tqdm.notebook import tqdm
lines = []
images = []
for ann in tqdm(os.listdir("/content/ICDAR-2019-SROIE/data/key")[500:600]):
    if ann != ".ipynb_checkpoints":
        with open("/content/ICDAR-2019-SROIE/data/key/" + ann) as f:
            data = json.load(f)
    images.append(ann[:-4] + "jpg")
    line = {"gt_parse": data}
    lines.append(line)
with open("./sroie_donut/validation/metadata.jsonl", 'w') as f:
    for i, gt_parse in enumerate(lines):
        line = {"file_name": images[i], "ground_truth": json.dumps(gt_parse)}
        f.write(json.dumps(line) + "\n")
        shutil.copyfile("/content/ICDAR-2019-SROIE/data/img/" + images[i], "./sroie_donut/validation/" + images[i])

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
# make a copy of the template config file
!cp /content/donut/config/train_cord.yaml /content/donut/config/train_sorie.yaml

In [None]:
# Used the modify the config file to fine-tune the Donut model on the SROIE dataset
# You can find the config file in the train_sorie.yaml file
!cd donut && python train.py --config /content/donut/config/train_sorie.yaml

In [None]:
!nvidia-smi

Mon Mar 13 13:21:35 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P0    28W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# a gradio interface to test the model
from donut import DonutModel
from PIL import Image
import torch
model = DonutModel.from_pretrained("/content/donut/result/train_sorie/20230313_111731")
if torch.cuda.is_available():
    model.half()
    device = torch.device("cuda")
    model.to(device)
else:
    model.encoder.to(torch.bfloat16)
model.eval()
image = Image.open("/content/IMG_3517.jpg").convert("RGB")
output = model.inference(image=image, prompt="<s_sroie_donut>")
output

{'predictions': [{'company': 'PHO 16',
   'date': '01/12/2018',
   'address': '20 RULE SCHEFFER',
   'total': '51,00'}]}

In [None]:
# mount the google drive to save the model
from google.colab import drive
drive.mount('/content/drive')
!cp -r /content/donut/result/train_sorie/20230313_111731 /content/drive/MyDrive

Mounted at /content/drive
