In [2]:
import os
import cv2
import numpy as np
import scipy.ndimage
from skimage.filters import (threshold_niblack, threshold_sauvola)
from skimage.io import imsave
from skimage import (img_as_ubyte, exposure)
from tqdm.notebook import tqdm

## Binary with TRESH_BINARY

In [12]:
def Bin(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        if f.endswith('.jpg') :
            img = os.path.join(dossier_input, f)
            image = cv2.imread(img) 
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            retval, th = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
            path_save = os.path.join(dossier_output, f)
            cv2.imwrite(path_save, th)

In [13]:
Bin(dossier_input = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/HTR/Data/illustrations_pages", dossier_output = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/HTR/Data/illustrations_pages_bin")

  0%|          | 0/1048 [00:00<?, ?it/s]

# Binary with Otsu

It automatically finds the global threshold. 
_Cf._ Nobuyuki Otsu (1979). "A threshold selection method from gray-level histograms". IEEE Trans. Sys. Man. Cyber.  9 (1): 62–66. [DOI: 10.1109/TSMC.1979.4310076](https://ieeexplore.ieee.org/document/4310076).

In [22]:
def OTSU(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files):
        if f.endswith('.jpg') :
            img = os.path.join(dossier_input, f)
            image = cv2.imread(img) 
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            retval, th = cv2.threshold(gray, 0, 255,cv2.THRESH_OTSU)
            path_save = os.path.join(dossier_output, f)
            cv2.imwrite(path_save, th)

In [23]:
OTSU(dossier_input = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/HTR/Data/illustrations_pages", dossier_output = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/HTR/Data/illustrations_pages_bin")

  0%|          | 0/1048 [00:00<?, ?it/s]

# Binary with Niblack

The Niblack algorithm determines a threshold value for each pixel by dragging a window across the image. 
_Cf_. : W. Niblack, An introduction to Digital Image Processing, Prentice-Hall, 1986. 

In [17]:
def Niblack(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        img = os.path.join(dossier_input, f)
        image = cv2.imread(img) 
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        thresh_niblack = threshold_niblack(gray, window_size=219, k=0.9)
        niblack = gray > thresh_niblack
        binary_niblack = img_as_ubyte(niblack)
        path_save = os.path.join(dossier_output, f)
        imsave(path_save, binary_niblack)

In [18]:
Niblack(dossier_input = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/TEST_bin", dossier_output = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/TEST_bin/Niblack")

  0%|          | 0/5 [00:00<?, ?it/s]

error: OpenCV(4.5.5) /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


# Binary with Sauvola

Modification of the Niblack technique Niblack algorithm with calculation of the maximum standard deviation of a greyscale image. 
_Cf_. : J. Sauvola and M. Pietikainen, “Adaptive document image binarization,” Pattern Recognition 33(2), pp. 225-236, 2000. DOI:10.1016/S0031-3203(99)00055-2 [DOI:10.1109/83.366472](https://www.sciencedirect.com/science/article/abs/pii/S0031320399000552?via%3Dihub).

In [20]:
def Sauvola(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        img = os.path.join(dossier_input, f)
        image = cv2.imread(img) 
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        thresh_sauvola = threshold_sauvola(gray, window_size=219)
        sauvola = gray > thresh_sauvola
        binary_sauvola = img_as_ubyte(sauvola)
        path_save = os.path.join(dossier_output, f)
        imsave(path_save, binary_sauvola)

In [21]:
Sauvola(dossier_input = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/TEST_bin", dossier_output = "/Users/pauline/Documents/Kunsthistorisches-UZH_Archivdatenbank/TEST_bin/Sauvola")

  0%|          | 0/6 [00:00<?, ?it/s]

error: OpenCV(4.5.5) /Users/xperience/actions-runner/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


# Deblur 

With a simple sharpening kernels using cv2.filter2D.

In [11]:
def Deblur(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        img = os.path.join(dossier_input, f)
        image = cv2.imread(img) 
        sharpening = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
        deblur = cv2.filter2D(image, -1, sharpening)
        path_save = os.path.join(dossier_output, f)
        cv2.imwrite(path_save, deblur)

In [12]:
Deblur(dossier_input = "../Varios-GroundTruth-Test-Alto/BGR", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-deblur")

  0%|          | 0/198 [00:00<?, ?it/s]

# Gamma correction
Brightness correction through gamma exposure transformation. 
Gamma1 to brighten the image; Gamma2 to darken the image.

In [13]:
def Gamma1(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        img = os.path.join(dossier_input, f)
        image = cv2.imread(img) 
        image_gamma = exposure.adjust_gamma(image, gamma = 0.9)
        path_save = os.path.join(dossier_output, f)
        cv2.imwrite(path_save, image_gamma)

In [14]:
Gamma1(dossier_input = "../Varios-GroundTruth-Test-Alto/BGR", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-gamma1")

  0%|          | 0/198 [00:00<?, ?it/s]

In [15]:
def Gamma2(dossier_input:str, dossier_output):
    if not os.path.isdir(dossier_output):
        os.mkdir(dossier_output) 
    files = os.listdir(dossier_input)
   
    for f in tqdm(files): 
        img = os.path.join(dossier_input, f)
        image = cv2.imread(img) 
        image_gamma = exposure.adjust_gamma(image, gamma = 2.25)
        path_save = os.path.join(dossier_output, f)
        cv2.imwrite(path_save, image_gamma)

In [16]:
Gamma2(dossier_input = "../Varios-GroundTruth-Test-Alto/BGR", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-gamma2")

  0%|          | 0/198 [00:00<?, ?it/s]

# Gamma2 + Niblack

In [3]:
Niblack(dossier_input = "../Varios-GroundTruth-Test-Alto/GroundTruth-gamma2", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-Gamma2+Niblack")

  0%|          | 0/198 [00:00<?, ?it/s]

# Gamma2 + Sauvola


In [3]:
Sauvola(dossier_input = "../Varios-GroundTruth-Test-Alto/GroundTruth-gamma2", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-Gamma2+Sauvola")

  0%|          | 0/198 [00:00<?, ?it/s]

# Gamma2 + OTSU

In [5]:
OTSU(dossier_input = "../Varios-GroundTruth-Test-Alto/GroundTruth-gamma2", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-Gamma2+OTSU")

  0%|          | 0/198 [00:00<?, ?it/s]

# Deblur + Niblack

In [4]:
Niblack(dossier_input = "../Varios-GroundTruth-Test-Alto/GroundTruth-deblur", dossier_output = "../Varios-GroundTruth-Test-Alto/GroundTruth-Deblur+Niblack")

  0%|          | 0/198 [00:00<?, ?it/s]