basics04.ipynb
# Processing a folder of images

### Import relevant libraries

In [27]:
import numpy as np
import cv2
from skimage import io, data
import napari
import matplotlib.pyplot as plt
import copy
from pathlib import Path


### Configuration

In [28]:
SHOW_FULLSIZE = False
DELAY = 2000                # in milliseconds
INPUT_PATH = 'data/'
OUTPUT_PATH = 'output/'
OUTPUT_THRESHOLD = 88       # Threshold for final output
CROPPING_THRESHOLD_UPPER = 255      # Threshold used to select the crop outline
CROPPING_THRESHOLD_LOWER = 250

### Define helper functions
* **makethumb()** is a quick rescaling function, reducing an image to 10% linear size
* **showbriefly()** shows an image for DELAY milliseconds or until a key is pressed

In [29]:
def makethumb(i):
    return cv2.resize(i,None,fx=0.1,fy=0.1,interpolation=cv2.INTER_CUBIC)

def showbriefly(i):
    WindowName = 'Image Viewer'
    view_window = cv2.namedWindow(WindowName, cv2.WINDOW_NORMAL)
    w = cv2.imshow(WindowName,i)
    # force focus
    cv2.setWindowProperty(WindowName,cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
    cv2.setWindowProperty(WindowName,cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_NORMAL)
    # wait 10 seconds
    cv2.waitKey(DELAY)
    cv2.destroyAllWindows()

def showinline(bgrimage, title):
    mpimg = cv2.cvtColor(bgrimage, cv2.COLOR_BGR2RGB)
    plt.imshow(mpimg)
    plt.title(title)
    plt.show()

## Get the list of files
We're looking at the /data folder, and as yet no filtering for just images


In [30]:
from os import listdir
from os.path import isfile, join
os.path.splitext(os.path.basename("/path/to/file.txt"))[0]
onlyfiles = [f for f in listdir(INPUT_PATH) if isfile(join(INPUT_PATH, f))]
print(onlyfiles)

NameError: name 'os' is not defined

## Now loop through each of the found files

In [31]:
for file in onlyfiles:
    filename= file.stem
    print(file)

NameError: name 'onlyfiles' is not defined

### Read image using CV2 functions


In [32]:
    img = cv2.imread(filename=INPUT_PATH+file)
    #showinline(img,'Unmodified image')
    showbriefly(makethumb(img))

NameError: name 'file' is not defined

In [33]:
grey = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

NameError: name 'img' is not defined

### Apply thresholds

In [34]:
    grey = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret,thresh = cv2.threshold(grey,OUTPUT_THRESHOLD,255,cv2.THRESH_OTSU)
    showbriefly(thresh)
    
    

NameError: name 'img' is not defined

#### Find the Contours
* Redo the threshold with more extreme settings
* Find the contours of the objects in the image

From: https://stackoverflow.com/questions/13538748/crop-black-edges-with-opencv

In [35]:
    _, cropthresh = cv2.threshold(thresh,CROPPING_THRESHOLD_LOWER,CROPPING_THRESHOLD_UPPER,cv2.THRESH_BINARY)
    contours,hierarchy = cv2.findContours(cropthresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    showbriefly(cropthresh)

NameError: name 'thresh' is not defined

In [36]:
    maxc = []
    maxa = 0
    for c in contours:
        a = cv2.contourArea(c)
        if a>maxa:
            maxc=c
            maxa=a
            print('new maximum: '+str(maxa))
    cont = maxc

NameError: name 'contours' is not defined

In [None]:
    x,y,w,h = cv2.boundingRect(cont)

    print('x,y,x`,y` = ',x,y,x+w,y+h)
    print('w,h = ',w,h)

#### Show the bounding box in green for confirmation
* Note that the **rectangle()** function modifies the image it references
 * Hence the need to do a **deepcopy()**

In [37]:
    boundbox = copy.deepcopy(img)
    cv2.rectangle(boundbox,(x,y),(x+w,y+h),(0,255,0),20)
    #showinline(boundbox, 'Bounding box')
    showbriefly(boundbox)

NameError: name 'img' is not defined

Crop

In [38]:
    crop = img[y:y+h,x:x+w]
    showbriefly(crop)
    #showinline(crop, 'Cropped image')

NameError: name 'img' is not defined

In [39]:
    cropocr = thresh[y:y+h,x:x+w]
    showbriefly(cropocr)
    #showinline(cropocr,'OCR prepared image')

NameError: name 'thresh' is not defined

### Now write the images to the output folder

In [40]:
    cv2.destroyAllWindows()
    print('Writing cropped colour image')
    _ = cv2.imwrite(img=crop, filename=OUTPUT_PATH + filename + '-cropped.jpg')
    print('Writing cropped thresholded image for OCR processing')
    _ = cv2.imwrite(img=cropocr, filename=OUTPUT_PATH + filename + '-thresholded.jpg')

Writing cropped colour image


NameError: name 'crop' is not defined