# Inference

This notebook is made to load the existing model parameters, take a checkbox image as an input and predict whether that checkbox is checked, uchecked or not a checkbox at all

## Recreating the model and loading the parameters

In [1]:
import torch
from torch import nn
from torchvision import transforms
from PIL import Image, ImageOps
import cv2
import numpy as np
from collections import Counter


class CheckboxClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        x = nn.Flatten()(x)
        x = nn.ReLU()(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CheckboxClassifier().to(device)

model.load_state_dict(torch.load("checkbox_nn_parameters.pth", map_location=device))

model.eval()


CheckboxClassifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=3, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

## Recreating the transforms

In [2]:
class CustomPad:
    def __init__(self, padding):
        self.padding = padding

    def _find_border_color(self, image):
        pixels = list(image.getdata())
        width, height = image.size
        edge_pixels = (
            pixels[:width]
            + pixels[-width:]
            + [pixels[n * width] for n in range(height)]
            + [pixels[n * width - 1] for n in range(1, height + 1)]
        )
        most_common_color = Counter(edge_pixels).most_common(1)[0][0]

        return most_common_color

    def __call__(self, img):
        left = top = right = bottom = self.padding

        border_color = self._find_border_color(img)
        new_img = ImageOps.expand(
            img, border=(left, top, right, bottom), fill=border_color
        )
        return new_img


class LargestContourCrop:
    def __init__(self, padding=10, sigma=0.5):
        self.padding = padding
        self.sigma = sigma

    def __call__(self, image):
        image_np = np.array(image)

        gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
        blurred = cv2.GaussianBlur(gray, (0, 0), self.sigma)

        edges = cv2.Canny(blurred, threshold1=30, threshold2=100)
        contours, _ = cv2.findContours(
            edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )

        if contours:
            largest_contour = max(contours, key=cv2.contourArea)

            x, y, w, h = cv2.boundingRect(largest_contour)

            x_pad = max(x - self.padding, 0)
            y_pad = max(y - self.padding, 0)
            w_pad = min(w + 2 * self.padding, image_np.shape[1] - x_pad)
            h_pad = min(h + 2 * self.padding, image_np.shape[0] - y_pad)

            cropped_image = image_np[y_pad : y_pad + h_pad, x_pad : x_pad + w_pad]

            return Image.fromarray(cropped_image)

        return image


## Inference script

In [3]:
def classify_image(image_path, model, device):
    image = Image.open(image_path)

    transform = transforms.Compose(
        [
            transforms.Lambda(lambda img: img.convert("RGB")),
            CustomPad(padding=20),
            LargestContourCrop(padding=10, sigma=0.5),
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
        ]
    )

    image = transform(image)
    image = image.unsqueeze(0)
    image = image.to(device)

    with torch.no_grad():
        outputs = model(image)

    probabilities = torch.nn.functional.softmax(outputs, dim=1)
    _, predicted_class = torch.max(probabilities, 1)
    probs = probabilities.squeeze().tolist()
    checkbox_classes = ["checked", "unchecked", "other"]

    return {
        "probs": {
            checkbox_class: prob
            for checkbox_class, prob in zip(checkbox_classes, probs)
        },
        "predicted_class": checkbox_classes[predicted_class.item()],
    }


## Run the script!

Pass an image as an input and get probabilities and prediction

In [4]:
classify_image("checkbox.png", model, device)

{'probs': {'checked': 0.3265308737754822,
  'unchecked': 0.6046016812324524,
  'other': 0.0688675045967102},
 'predicted_class': 'unchecked'}