In [None]:
import torch
import torchvision
from torchvision.io import read_image
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn as nn

import os
from os import listdir
from os.path import isfile, join

import csv

from timeit import default_timer as timer

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print(device)

In [None]:
def collater_function(batch):
    final_targets = []
    new_images = []
    for i in range(len(batch)):
        #sample is a tuple of an image and its targets
        #unpack
        targets = batch[i][1]
        image = batch[i][0]

        new_dict_entry = {}
        #process targets
        new_dict_entry['boxes'] = torch.tensor(targets['boxes'], device=device)
        new_dict_entry['labels'] = torch.tensor(targets['labels'], device=device)

        final_targets.append(new_dict_entry)

        image = image.to(device)
        new_images.append(image)

    return (new_images, final_targets)

In [None]:
image_transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
batch_size = 16

training_dataset = RCNNDataset(
    annotations='D:\\GitHub\\Hindi-Handwriting-Recognition\\CNN_test_sandbox\\Dataset\\Local (Training)\\annotations.csv',
    img_dir='D:\\GitHub\\Hindi-Handwriting-Recognition\\CNN_test_sandbox\\Dataset\\Local (Training)',
    device=device,
    transform=image_transform
)

training_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True, collate_fn=collater_function)

testing_dataset = RCNNDataset(
    annotations='D:\\GitHub\\Hindi-Handwriting-Recognition\\CNN_test_sandbox\\Dataset\\Local (Testing)\\annotations.csv',
    img_dir='D:\\GitHub\\Hindi-Handwriting-Recognition\\CNN_test_sandbox\\Dataset\\Local (Testing)',
    device=device,
    transform=image_transform
)

testing_loader = DataLoader(testing_dataset, batch_size=batch_size, shuffle=True, collate_fn=collater_function)

In [None]:
def area_of_intersection(bb1, bb2):
    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    return intersection_area

In [None]:
def test_and_evaluate(model, device, optimizer, scheduler, epochs, batch_size, training_loader, testing_loader, testing_dataset, overlap_threshold):
    n_total_steps = len(training_loader)
    print('Training')
    model.train()
    for epoch in range(epochs):
        start = timer()
        for i, (images, targets) in enumerate(training_loader):

            loss_dict = model(images, targets)
            loss_accumulated = sum(loss for loss in loss_dict.values())

            loss_value = loss_accumulated.item()

            optimizer.zero_grad()
            loss_accumulated.backward()
            optimizer.step()

        scheduler.step()
        end = timer()
        print(f'Time for Epoch {epoch+1}: {end-start:.4f} seconds, loss:{loss_value}')

    #Evaluate
    print('Evaluating')
    model.eval()

    #tracking variables
    possible_bounding_boxes = 0
    correct_bounding_boxes = 0

    possible_labels = 0
    correct_labels = 0

    avg_score = 0

    with torch.no_grad():
        for i, (images, targets) in enumerate(testing_loader):
            outputs = model(images)
            for output in outputs:
              boxes = output['boxes'].tolist()
              labels = output['labels'].tolist()
              score = output['scores'].tolist()

              t_boxes = targets['boxes'].tolist()
              t_labels = targets['labels'].tolist()

              #select the first 35 returned boxes only
              boxes = boxes[0:35]
              labels = labels[0:35]
              score = score[0:35]

              #average score
              avg_scr = sum(score) / len(score)
              if avg_score == 0:
                avg_score = avg_scr
              else:
                avg_score = (avg_score + avg_scr) / 2

              #check bbox and classification accuracy
              for x in range(len(labels)):
                pred_box = boxes[x]
                pred_label = labels[x]

                possible_bounding_boxes += 1

                #find closest matching box if any
                for k in range(len(t_labels)):
                  t_box = t_boxes[k]
                  area_of_overlap = area_of_intersection(pred_box, t_box)
                  area_of_truth = (t_box[2] - t_box[0]) * (t_box[3] - t_box[1])
                  percentage_overlap = area_of_overlap / area_of_truth
                  if percentage_overlap >= overlap_threshold:
                    #bounding box match
                    correct_bounding_box += 1
                    possible_labels += 1
                    #check label
                    if pred_label == t_labels[k]:
                      correct_labels +=1
                    break

        bounding_perf = correct_bounding_boxes / possible_bounding_boxes
        label_perf = correct_labels / possible_labels

        print(f'Performance at {epochs}=> BBOX: {bounding_perf}, LABEL: {label_perf}, AVG CONFIDENCE: {avg_score}')

        return (bounding_perf, label_perf, avg_score)


In [None]:
#rcnn test
learning_rate = 0.005
# for num_epochs in range(,51, 10):
num_epochs = 10
overlap_threshold = 0.90
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(progress=True, num_classes=48)
model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr  = learning_rate, momentum=0.9, weight_decay=0.0005)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0.001)

bbox_perf, label_perf, conf = test_and_evaluate(model, device, optimizer, scheduler, num_epochs, batch_size, training_loader, testing_loader, testing_dataset, overlap_threshold)
