## Importing Modules

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

%load_ext autoreload
%autoreload 2

In [2]:
from boxdetect import config
from boxdetect.img_proc import draw_rects, get_image
from boxdetect.pipelines import get_boxes
import matplotlib.pyplot as plt
from pdb import set_trace
from tqdm import tqdm
import os
import cv2

In [3]:
import torch
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten

In [4]:
import glob
# from pdb import set_trace
# from collections import Counter
import random
import albumentations as A
import imagesize
from albumentations.pytorch import ToTensorV2

In [5]:
from torch.utils.data import Dataset
from sklearn.metrics import classification_report
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST
from torch.optim import Adam
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import argparse
import torch
import time

## Custom Funtions

In [44]:
cfg = config.PipelinesConfig()

# important to adjust these values to match the size of boxes on your image
cfg.width_range = (15,60)
cfg.height_range = (15, 60)

# the more scaling factors the more accurate the results but also it takes more time to processing
# too small scaling factor may cause false positives
# too big scaling factor will take a lot of processing time
cfg.scaling_factors = [1.0]

# w/h ratio range for boxes/rectangles filtering
cfg.wh_ratio_range = (0.5, 2.5)

# range of groups sizes to be returned
cfg.group_size_range = (1, 30)

# for this image we will use rectangles as a kernel for morphological transformations
cfg.morph_kernels_type = 'rectangles'  # 'lines'

# num of iterations when running dilation tranformation (to engance the image)
cfg.dilation_iterations = 0


# def get_CB_boundingboxes(file_path,cfg,out_dir=None):
    
#     try:
#         image = cv2.imread(file_path)


#         des_size = (1260, 1800)
#         ori_size = image.shape[1],image.shape[0]

#         ht_rat = des_size[1]/ori_size[1]
#         wd_rat = des_size[0]/ori_size[0]

#         resized_image = cv2.resize(image, des_size,interpolation = cv2.INTER_NEAREST)
#         rects, grouping_rects, pimage, output_image = get_boxes(
#             resized_image, cfg=cfg, plot=False)

#         rects[:,[0,2]] = rects[:,[0,2]]/wd_rat
#         rects[:,[1,3]] = rects[:,[1,3]]/ht_rat
        
#         if out_dir:
#             out_img = draw_rects(get_image(file_path), rects, thickness=2)
#             cv2.imwrite(f"{out_dir}/{img}", out_img)

#         rects[:,2] = rects[:,2]+rects[:,0]
#         rects[:,3] = rects[:,3]+rects[:,1]
#     except:
#         rects=np.array([])
#     return image, rects

# ## LENET: Model Architecture

class LeNet(Module):
    def __init__(self, numChannels, classes):
        # call the parent constructor
        super(LeNet, self).__init__()
        # initialize first set of CONV => RELU => POOL layers
        self.conv1 = Conv2d(in_channels=numChannels, out_channels=20,
            kernel_size=(5, 5))
        self.relu1 = ReLU()
        self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize second set of CONV => RELU => POOL layers
        self.conv2 = Conv2d(in_channels=20, out_channels=50,
            kernel_size=(5, 5))
        self.relu2 = ReLU()
        self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize first (and only) set of FC => RELU layers 64x450 and 800x500)
        self.fc1 = Linear(in_features=450, out_features=500)
        self.relu3 = ReLU()
        # initialize our softmax classifier
        self.fc2 = Linear(in_features=500, out_features=classes)
        self.logSoftmax = LogSoftmax(dim=1)

    def forward(self, x):
        # pass the input through our first set of CONV => RELU =>
        # POOL layers
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        # pass the output from the previous layer through the second
        # set of CONV => RELU => POOL layers
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        # flatten the output from the previous layer and pass it
        # through our only set of FC => RELU layers
        x = flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)
        # pass the output to our softmax classifier to get our output
        # predictions
        x = self.fc2(x)
        output = self.logSoftmax(x)
        # return the output predictions
        return output

## Create Custom Dataset

In [7]:
Data_dir = "./CheckboxClassificationData"

In [8]:
classes = []
image_paths = []

## collect image paths 
for sub_dir in glob.glob(Data_dir+'/*'):
    cls = sub_dir.split('/')[-1]
    for file in glob.glob(sub_dir+'/*'):
        classes.append(cls)
        image_paths.append(file)
        
        
## Random shuffle 
img_lab_list = list(zip(image_paths, classes))
random.shuffle(img_lab_list)

train_img_lab_list, valid_img_lab_list = img_lab_list[:int(0.8*len(img_lab_list))], img_lab_list[int(0.8*len(img_lab_list)):] 


# train_image_paths, train_classes = zip(*train_img_lab_list)
# valid_image_paths, valid_classes = zip(*valid_img_lab_list)



In [9]:
idx_to_class = {'not_a_checkbox':0, 'not_selected':1, 'selected':2}
class_to_idx = {y: x for x, y in idx_to_class.items()}

transforms = A.Compose(
    [
        A.Resize(25,25),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

class CustomDataset(Dataset):
    def __init__(self, imglab_ziplist, transform=False):
        self.image_paths,self.labels = zip(*imglab_ziplist)
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        #label = image_filepath.split('/')[-2]
        label = idx_to_class[self.labels[idx]]
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image, label

In [11]:
train_dataset = CustomDataset(train_img_lab_list,transforms)
valid_dataset = CustomDataset(valid_img_lab_list,transforms)

## Model Tarining

### Model config

In [12]:
# define training hyperparameters
INIT_LR = 1e-3
BATCH_SIZE = 64
EPOCHS = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [13]:
trainDataLoader = DataLoader(train_dataset, shuffle=True,batch_size=BATCH_SIZE)
valDataLoader = DataLoader(valid_dataset, batch_size=BATCH_SIZE)

trainSteps = len(train_dataset) // BATCH_SIZE
valSteps = len(train_dataset) // BATCH_SIZE

In [14]:
model = LeNet(
    numChannels=3,
    classes=len(idx_to_class)).to(device)

# initialize our optimizer and loss function
opt = Adam(model.parameters(), lr=INIT_LR)
lossFn = nn.NLLLoss()

# initialize a dictionary to store training history
H = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": []
}

# measure how long training is going to take
print("[INFO] training the network...")
startTime = time.time()

[INFO] training the network...


In [15]:
from tqdm import tqdm

In [16]:
#PyTorch: Training your first Convolutional Neural Network (CNN)
# loop over our epochs
for e in range(0, EPOCHS):
    
    # set the model in training mode
    model.train()
    
    # initialize the total training and validation loss
    totalTrainLoss = 0
    totalValLoss = 0
    
    # initialize the number of correct predictions in the training
    # and validation step
    trainCorrect = 0
    valCorrect = 0
    
    # loop over the training set
    for (x, y) in trainDataLoader:
        # send the input to the device
        (x, y) = (x.to(device), y.to(device))
        # perform a forward pass and calculate the training loss
        pred = model(x)
        loss = lossFn(pred, y)
        # zero out the gradients, perform the backpropagation step,
        # and update the weights
        opt.zero_grad()
        loss.backward()
        opt.step()
        # add the loss to the total training loss so far and
        # calculate the number of correct predictions
        totalTrainLoss += loss
        trainCorrect += (pred.argmax(1) == y).type(
            torch.float).sum().item()
        
    # switch off autograd for evaluation
    with torch.no_grad():
        # set the model in evaluation mode
        model.eval()
        # loop over the validation set
        for (x, y) in valDataLoader:
            # send the input to the device
            (x, y) = (x.to(device), y.to(device))
            # make the predictions and calculate the validation loss
            pred = model(x)
            totalValLoss += lossFn(pred, y)
            # calculate the number of correct predictions
            valCorrect += (pred.argmax(1) == y).type(
            torch.float).sum().item()
            
    # calculate the average training and validation loss
    avgTrainLoss = totalTrainLoss / trainSteps
    avgValLoss = totalValLoss / valSteps
    # calculate the training and validation accuracy
    trainCorrect = trainCorrect / len(trainDataLoader.dataset)
    valCorrect = valCorrect / len(valDataLoader.dataset)
    # update our training history
    H["train_loss"].append(avgTrainLoss.cpu().detach().numpy())
    H["train_acc"].append(trainCorrect)
    H["val_loss"].append(avgValLoss.cpu().detach().numpy())
    H["val_acc"].append(valCorrect)
    # print the model training and validation information
    print("[INFO] EPOCH: {}/{}".format(e + 1, EPOCHS))
    print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(
        avgTrainLoss, trainCorrect))
    print("Val loss: {:.6f}, Val accuracy: {:.4f}\n".format(
        avgValLoss, valCorrect))

[INFO] EPOCH: 1/10
Train loss: 0.206155, Train accuracy: 0.9380
Val loss: 0.027941, Val accuracy: 0.9730

[INFO] EPOCH: 2/10
Train loss: 0.072177, Train accuracy: 0.9839
Val loss: 0.013751, Val accuracy: 0.9890

[INFO] EPOCH: 3/10
Train loss: 0.049994, Train accuracy: 0.9882
Val loss: 0.014093, Val accuracy: 0.9890

[INFO] EPOCH: 4/10
Train loss: 0.041544, Train accuracy: 0.9891
Val loss: 0.010136, Val accuracy: 0.9920

[INFO] EPOCH: 5/10
Train loss: 0.037816, Train accuracy: 0.9911
Val loss: 0.012797, Val accuracy: 0.9877

[INFO] EPOCH: 6/10
Train loss: 0.036024, Train accuracy: 0.9903
Val loss: 0.015628, Val accuracy: 0.9847

[INFO] EPOCH: 7/10
Train loss: 0.031473, Train accuracy: 0.9920
Val loss: 0.015209, Val accuracy: 0.9883

[INFO] EPOCH: 8/10
Train loss: 0.031539, Train accuracy: 0.9923
Val loss: 0.010670, Val accuracy: 0.9914

[INFO] EPOCH: 9/10
Train loss: 0.030068, Train accuracy: 0.9920
Val loss: 0.009800, Val accuracy: 0.9908

[INFO] EPOCH: 10/10
Train loss: 0.039406, Trai

In [18]:
# finish measuring how long training took
endTime = time.time()
print("[INFO] total time taken to train the model: {:.2f}s".format(
    endTime - startTime))
# we can now evaluate the network on the test set
print("[INFO] evaluating network...")
# turn off autograd for testing evaluation
with torch.no_grad():
    # set the model in evaluation mode
    model.eval()
    
    # initialize a list to store our predictions
    preds = []
    # loop over the test set
    for (x, y) in valDataLoader:
        # send the input to the device
        x = x.to(device)
        # make the predictions and add them to the list
        pred = model(x)
        preds.extend(pred.argmax(axis=1).cpu().numpy())
# generate a classification report
print(classification_report(np.vectorize(idx_to_class.get)(np.array(valid_dataset.labels)),np.array(preds)))

[INFO] total time taken to train the model: 41.97s
[INFO] evaluating network...
              precision    recall  f1-score   support

           0       0.90      0.93      0.92        58
           1       0.99      1.00      0.99      1154
           2       1.00      0.97      0.98       417

    accuracy                           0.99      1629
   macro avg       0.96      0.97      0.96      1629
weighted avg       0.99      0.99      0.99      1629



In [26]:
torch.save(model.state_dict(), "CB_detection_weights.pth")

## Check Box detction

In [23]:
idx_to_class = {'not_a_checkbox':0, 'not_selected':1, 'selected':2}
class_to_idx = {y: x for x, y in idx_to_class.items()}

transforms = A.Compose(
    [
        A.Resize(25,25),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

device='cpu'
# model = LeNet(
#     numChannels=3,
#     classes=len(idx_to_class)).to(device)
# model.load_state_dict(torch.load("CB_detection_weights.pth"))
model = model.to(device) 
model.eval();

In [45]:
def get_cb_prediction(file_path, cnn_transforms,model):
    
    class_to_idx = {0: 'not_a_checkbox', 1: 'not_selected', 2: 'selected'}

    image,cb_boxes = get_CB_boundingboxes(file_path,cfg)
    
    cb_boxescounter = 0
    
    cb_items = {} 
    for bb in cb_boxes:
        crop_img=image[bb[1]:bb[3], bb[0]:bb[2]]
        crop_ten = cnn_transforms(image=crop_img)["image"]
        predict = model(crop_ten.unsqueeze_(0))
        
        pred_class = class_to_idx[predict.argmax(axis=1).item()]
        
        if pred_class != 'not_a_checkbox':
            cb_items[f"cb_{cb_boxescounter+1}"] = [pred_class, bb]
            cb_boxescounter+=1
    return cb_items

def get_CB_boundingboxes(file_path,cfg,out_dir=None):
    
    #try:

    image = cv2.imread(file_path)


    des_size = (1260, 1800)
    ori_size = image.shape[1],image.shape[0]

    ht_rat = des_size[1]/ori_size[1]
    wd_rat = des_size[0]/ori_size[0]

    resized_image = cv2.resize(image, des_size,interpolation = cv2.INTER_NEAREST)
    rects, grouping_rects, pimage, output_image = get_boxes(
        resized_image, cfg=cfg, plot=False)

    rects[:,[0,2]] = rects[:,[0,2]]/wd_rat
    rects[:,[1,3]] = rects[:,[1,3]]/ht_rat

    if out_dir:
        out_img = draw_rects(get_image(file_path), rects, thickness=2)
        cv2.imwrite(f"{out_dir}/{img}", out_img)

    rects[:,2] = rects[:,2]+rects[:,0]
    rects[:,3] = rects[:,3]+rects[:,1]
#     except:
#         rects=np.array([])
    return image, rects

### Sample 1

In [47]:

testpath = 'ACORD_139_2004_03_p1_80_brit_page1.png'

cb_items = get_cb_prediction(testpath, transforms, model)

image = Image.open(testpath)
image = image.convert("RGB")

draw = ImageDraw.Draw(image, "RGBA")
    
for k,v in cb_items.items():
    draw.rectangle(list(v[1]), outline='green' if v[0]=='selected' else 'red', width=2)
image.save(f"Results/{testpath}")

### Sample 2

In [26]:
testpath = 'ACORD_140_2016_03_p1_127_noi_page1.png'

In [28]:
cb_items = get_cb_prediction(testpath, transforms, model)

In [32]:
from PIL import Image, ImageDraw, ImageFont

In [39]:
image = Image.open(testpath)
image = image.convert("RGB")

draw = ImageDraw.Draw(image, "RGBA")
    
for k,v in cb_items.items():
    draw.rectangle(list(v[1]), outline='green' if v[0]=='selected' else 'red', width=2)
image.save(f"Results/{testpath}")

### Sample 3

In [41]:

testpath = 'Acord_823_2011_10_p1_479_noi_brit_page1.png'

cb_items = get_cb_prediction(testpath, transforms, model)

image = Image.open(testpath)
image = image.convert("RGB")

draw = ImageDraw.Draw(image, "RGBA")
    
for k,v in cb_items.items():
    draw.rectangle(list(v[1]), outline='green' if v[0]=='selected' else 'red', width=2)
image.save(f"Results/{testpath}")

### Sample 4

In [42]:
testpath = 'Acord_129_p1_291_neg02rot_page1.png'

cb_items = get_cb_prediction(testpath, transforms, model)

image = Image.open(testpath)
image = image.convert("RGB")

draw = ImageDraw.Draw(image, "RGBA")
    
for k,v in cb_items.items():
    draw.rectangle(list(v[1]), outline='green' if v[0]=='selected' else 'red', width=2)
image.save(f"Results/{testpath}")