In [0]:
import torch
from torchvision import transforms
import torch.nn.functional as F
import torch.nn as nn

from PIL import Image
import imageio
import os

from google.colab import drive

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
class YOLO(nn.Module):
    def __init__(self, img_width, row_size):
        super(YOLO, self).__init__()
        self.row_size = row_size
        self.conv1 = nn.Conv2d(1, 16, 7, stride=2)
        self.mp1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, (3, 3), stride=1)
        self.mp2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, (3, 3), stride=1)
        self.mp3 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64*53*36, 4096)
        self.fc2 = nn.Linear(4096, row_size * 5)
        self.dropout = nn.Dropout()

    def forward(self, x):
        # Conv + ReLU + max pooling for two layers
        x = F.relu(self.conv1(x))
        x = self.mp1(x)
        x = F.relu(self.conv2(x))
        x = self.mp2(x)
        x = F.relu(self.conv3(x))
        x = self.mp3(x)
        x = x.view(-1, 64*53*36)
        x = F.relu(self.dropout(self.fc1(x)))
        x = self.fc2(x)
        x = x.view(-1, self.row_size, 5)
        x = torch.sigmoid(x)
        return x

In [0]:
def calc_x_y(row, tensor):
    """calc coordinates"""

    x = tensor[1] * 619
    y = tensor[2] * (885 / 50) + row * (885 / 50)
    width = tensor[3] * 619
    height = tensor[4] * 885
    return torch.FloatTensor([1, x, y, width, height])

In [0]:
def calc_box(tensor):
    """calc box for output line"""
    x1 = max(0, tensor[1] - 0.5 * tensor[3])
    y1 = max(0, tensor[2] - 0.5 * tensor[4])
    x2 = min(619, tensor[1] + 0.5 * tensor[3])
    y2 = min(885, tensor[2] + 0.5 * tensor[4])

    box = [x1, y1, x2, y2]
    return box

In [0]:
def non_maximum_suppression(tensor, percent):
    """choose predicted lines by highest propability. 
    Lines who overlap a actual choosen line by percent or higher will delete."""
    
    for j in range(tensor.size(1)):
        if(tensor[j,0].item() < 0.5):
            tensor[j,0] = torch.tensor(0)
    found = []
    while(True):
        maximum = 0
        index = 0
        for j in range(tensor.size(1)):
            if(tensor[j,0].item() > maximum and j not in found):
                maximum = tensor[j,0].item()
                index = j

        if(maximum == 0):
            break

        found.append(index)
        tensor[index,0] = torch.tensor(1)
            
        for j in range(tensor.size(1)):
            if(j != index and tensor[j,0] >= 0.5):
                x_y_max = calc_x_y(index, tensor[index])
                x_y_other = calc_x_y(j, tensor[j])
                box1 = calc_box(x_y_max)
                box2 = calc_box(x_y_other)
                if(calc_iou(box1, box2) > percent):
                    tensor[j,0] = 0

In [19]:
imgs_path = "drive/My Drive/data_small/forms/forms_train_small/"
imgs_paths = os.listdir(imgs_path)
weight_path = "drive/My Drive/evaluation_small/weights_small.pt"
predict_path = "drive/My Drive/testlines_predicted_small/"

transform = transforms.Compose([transforms.Resize((885, 619)),
                                    transforms.ToTensor()])

# set a boolean flag that indicates whether a cuda capable GPU is available
is_gpu = torch.cuda.is_available()
print("GPU is available:", is_gpu)
print("If you are receiving False, try setting your runtime to GPU")

# set the device to cuda if a GPU is available
device = torch.device("cuda" if is_gpu else "cpu")
model = torch.load(weight_path)

print(model)

GPU is available: True
If you are receiving False, try setting your runtime to GPU
YOLO(
  (conv1): Conv2d(1, 16, kernel_size=(7, 7), stride=(2, 2))
  (mp1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (mp2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (mp3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=122112, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=250, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [0]:
def predict_lines(model,imgs_path, predict_path):
    """ predict images to lines from image path to predict_path"""
    img_count = 0
    for path in imgs_paths:
        count = 0
        img_tensor = transform(Image.open(imgs_path + path))
        output = model(torch.stack([img_tensor]).to(device))[0]
        # find right boxes
        non_maximum_suppression(output, 0.5)
        img = imageio.imread(imgs_path + path)
        yscale = round(img.shape[0] / 885)
        xscale = round(img.shape[1] / 619)
        print(xscale, xscale)
        for i in range(50):
            if(output[i][0] > 0.5):
                print(output[i])
                box = calc_box(calc_x_y(i, output[i]))
                x1 = (int(box[0])) * xscale
                x2 = (int(box[2])) * xscale
                y1 = (int(box[1])) * yscale
                y2 = (int(box[3])) * yscale
                print(box)
                imageio.imwrite(predict_path + "pic" + str(img_count) + "line" + str(count) + '.jpg', img[y1:y2, x1:x2])
                count += 1
        img_count += 1

In [24]:
predict_lines(model, imgs_path, predict_path)

4 4
tensor([0.9624, 0.5155, 0.2458, 0.7337, 0.0322], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(92.0245), tensor(184.8153), tensor(546.1631), tensor(213.2853)]
tensor([0.9611, 0.5311, 0.7832, 0.7651, 0.0283], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(91.9314), tensor(231.4322), tensor(565.5098), tensor(256.4917)]
tensor([0.9774, 0.5564, 0.3270, 0.7862, 0.0289], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(101.0668), tensor(276.1984), tensor(587.7156), tensor(301.7772)]
tensor([0.9356, 0.5376, 0.7916, 0.7765, 0.0302], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(92.4154), tensor(319.2509), tensor(573.0753), tensor(345.9702)]
tensor([0.9861, 0.5391, 0.4071, 0.7786, 0.0331], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(92.7140), tensor(364.2780), tensor(574.6712), tensor(393.5328)]
tensor([0.9241, 0.5441, 0.8382, 0.7767, 0.0290], device='cuda:0',
       grad_fn=<SelectBackward>)
[tensor(96.4169), tensor(409.0951), tenso