# AI Engine

# Import and functions

In [14]:
import torch
import cv2
import os
import time
import datetime
import math
import random
from torch.utils.data import Dataset, DataLoader
import sys
from PIL import Image
import torchvision.transforms.functional as transform
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.ssd import SSDClassificationHead
from torch.utils.tensorboard import SummaryWriter
import numpy
from matplotlib import pyplot as plt
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# Calculates the Intersection Over Union for two specified bounding boxes
def calc_iou(bb1, bb2):
    # Get the coordinates of the intersecting box
    inter_x = max(bb1[0], bb2[0])
    inter_y = max(bb1[1], bb2[1])
    inter_x2 = min(bb1[2], bb2[2])
    inter_y2 = min(bb1[3], bb2[3])
    
    if inter_x2 < inter_x or inter_y2 < inter_y:
        return 0.0
    
    inter_area = (inter_x2 - inter_x) * (inter_y2 - inter_y)

    # If intersection area is or lower than 0 we dont have an intersection
    #if inter_area <= 0:
    #    return 0.0

    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])
    iou = inter_area / float(bb1_area + bb2_area - inter_area)
    return iou

def calc_intersection(bb1, bb2):
    inter_x = max(bb1[0], bb2[0])
    inter_y = max(bb1[1], bb2[1])
    inter_x2 = min(bb1[2], bb2[2])
    inter_y2 = min(bb1[3], bb2[3])
    
    if inter_x2 < inter_x or inter_y2 < inter_y:
        return 0.0
    
    return (inter_x2 - inter_x) * (inter_y2 - inter_y)

# Calculates the area of a bounding box
def calc_area(bb):
    return (bb[2] - bb[0]) * (bb[3] - bb[1])

def collate_fn(batch):
    return tuple(zip(*batch))

# Padds a bounding box by a specific number, doubles the padding if text is specified
def pad_bb(bb, pad, text=False):
    x,y,x2,y2 = bb
    if text:
        return [x-pad*2, y-pad, x2+pad*2, y2+pad]
    return [x-pad, y-pad, x2+pad, y2+pad]

# Returns the smallest bounding box between two specified boxes
def return_smallest(bb1, bb2):
    bb1_x,bb1_y,bb1_x2,bb1_y2 = bb1
    bb2_x,bb2_y,bb2_x2,bb2_y2 = bb2
    bb1_size = (bb1_x2-bb1_x)*(bb1_y2-bb1_y)
    bb2_size = (bb2_x2-bb2_x)*(bb2_y2-bb2_y)
    
    return bb2 if bb1_size > bb2_size else bb1

# Gets the bounding boxes from an image by processing the image
def get_bbs_from_image(im, clean=True, pad=0, text=False, ignore_padding=10, combine_all=False):
    imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(imgray, 100, 255, cv2.THRESH_BINARY_INV)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST , cv2.CHAIN_APPROX_SIMPLE)
    bbs = []
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        #cv2.rectangle(im, (x, y), (x+w, y+h), (0, 255, 255), 2)
        if x > ignore_padding and y > ignore_padding and x < im.shape[:2][1]-ignore_padding and y < im.shape[:2][0]-ignore_padding:
            bbs.append([x,y,x+w,y+h])
                
    t_bbs = []
    [t_bbs.append(x) for x in bbs if x not in t_bbs]
    
    t_bbs = remove_small_bb_list(t_bbs, 10000)
    combined_bbs = combine_bb_list(t_bbs, pad=pad, text=text)
    
    if clean:
        combined_bbs = clean_bb_list(combined_bbs, pad=pad)
    
    if combine_all:
        temp_bb = combined_bbs[0]
        for bb in combined_bbs:
            if temp_bb[0] > bb[0]:
                temp_bb[0] = bb[0]
            if temp_bb[1] > bb[1]:
                temp_bb[1] = bb[1]
            if temp_bb[2] < bb[2]:
                temp_bb[2] = bb[2]
            if temp_bb[3] < bb[3]:
                temp_bb[3] = bb[3]
        return [temp_bb]
    return combined_bbs

def combine_bb_list(bb_list, pad=0, text=False):
    bbs = bb_list.copy()
    iou_non_zero = True
    while iou_non_zero:
        iou_non_zero = False
        for i in range(len(bbs)-1):
            for c in range(i, len(bbs)):
                if bbs[i] == bbs[c]:
                    continue
                    
                iou = calc_iou(pad_bb(bbs[i], pad, text=text), bbs[c])
                
                if iou != 0:
                    iou_non_zero = True
                    bb = combine_bb(bbs[i], bbs[c])
                    bb1 = bbs[i].copy()
                    bb2 = bbs[c].copy()
                    
                    bbs.remove(bb1)
                    bbs.remove(bb2)
                    bbs.append(bb)
                    break;
            if iou_non_zero:
                break;
    return bbs

def combine_bb(bb1, bb2):
    bb1_x,bb1_y,bb1_x2,bb1_y2 = bb1
    bb2_x,bb2_y,bb2_x2,bb2_y2 = bb2

    if bb2_x < bb1_x:
        bb1_x = bb2_x
    if bb2_y < bb1_y:
        bb1_y = bb2_y
    if bb2_x2 > bb1_x2:
        bb1_x2 = bb2_x2
    if bb2_y2 > bb1_y2:
        bb1_y2 = bb2_y2
        
    return [bb1_x, bb1_y, bb1_x2, bb1_y2]

def clean_bb_list(bb_list, pad=0, text=False):
    bbs = bb_list.copy()
    iou_non_zero = True
    while iou_non_zero:
        iou_non_zero = False
        for i in range(len(bbs)):
            if i == len(bbs)-1:
                break;
                
            iou = calc_iou(pad_bb(bbs[i], pad, text=text), bbs[i+1])

            if iou == 0:
                continue

            iou_non_zero = True
            bb = return_smallest(bbs[i], bbs[i+1])
            bbs.remove(bb)
            break;
                
    return bbs

def remove_small_bb_list(bb_list, size):
    cleaned_list = []
    for bb in bb_list:
        x,y,x2,y2 = bb
        w = x2-x
        h = y2-y
        if w*h > size:
            cleaned_list.append(bb)
            
    return cleaned_list

# Normalizes a pixel specific bounding box [x, y, x2, y2] to normalized bounding box [x, y, w, h]
def normalize_bb(bb, shape):
    h_img,w_img = shape
    x,y,x2,y2 = bb
    norm_w,norm_h = [(x2-x)/w_img, (y2-y)/h_img]
    return [((x+x2)/2)/w_img, ((y+y2)/2)/h_img, norm_w, norm_h]

# Denormalizes a normalized bounding box [x, y, w, h] to pixel specific bounding box [x, y, x2, y2]
def denormalize_bb(bb, shape):
    h_img,w_img = shape
    x,y,w,h = bb
    x_min,y_min = [int(x*w_img-(w*w_img)/2), int(y*h_img-(h*h_img)/2)]
    return [x_min, y_min, x_min+int(w*w_img), y_min+int(h*h_img)]

# Stringifies a bounding box for output
def bb_to_str(bb):
    return str(bb[0])+' '+str(bb[1])+' '+str(bb[2])+' '+str(bb[3])

# Destringifies a bounding box
def str_to_bb(bb_str):
    str_arr = bb_str.split(' ')
    return [float(str_arr[0]), float(str_arr[1]), float(str_arr[2]), float(str_arr[3]), float(str_arr[4])]

# Generates dataset structure by generating boundingbox labels, spliting data into train and validition sets
# also providing the found boundingboxes for verification of labeling being successfull 
def generate_dataset(root_folder, labels=[], split_components=True, train_val_ratio=0.8, combine_all=False):
    os.mkdir('./'+root_folder+'_generated/')
    os.mkdir('./'+root_folder+'_generated/images/')
    os.mkdir('./'+root_folder+'_generated/images/train/')
    os.mkdir('./'+root_folder+'_generated/images/val/')
    os.mkdir('./'+root_folder+'_generated/images/bbs/')
    os.mkdir('./'+root_folder+'_generated/labels/')

    if (split_components):    
        for component in os.listdir('./'+root_folder):
            os.mkdir('./'+root_folder+'_generated/images/train/'+component+'/')
            os.mkdir('./'+root_folder+'_generated/images/val/'+component+'/')
            images = os.listdir('./'+root_folder+'/'+component)
            for i in range(len(images)):
                image = images[i]
                img_type = 'val' if i > math.floor(len(images)*train_val_ratio) else 'train'
                im = cv2.imread('./'+root_folder+'/'+component+'/'+image)
                cv2.imwrite('./'+root_folder+'_generated/images/'+img_type+'/'+component+'/'+image, im)
                bbs = get_bbs_from_image(im, clean=True, pad=30, text=True, combine_all=combine_all)
                bbs_str = '' 
                for bb in bbs:
                    bbs_str += str(labels[component])+' '+bb_to_str(normalize_bb(bb, im.shape[:2]))+'\n'
                    x,y,x2,y2 = pad_bb(bb, 5)
                    cv2.rectangle(im, (x, y), (x2, y2), (0, 0, 255), 2)
                cv2.imwrite('./'+root_folder+'_generated/images/bbs/'+image, im)
                f = open('./'+root_folder+'_generated/labels/'+image[:-3]+"txt", "a")
                f.write(bbs_str[:-1])
                f.close()
    else:
        images = os.listdir('./'+root_folder)
        for i in range(len(images)):
            image = images[i]
            img_type = 'val' if i > math.floor(len(images)*train_val_ratio) else 'train'
            im = cv2.imread('./'+root_folder+'/'+image)
            cv2.imwrite('./'+root_folder+'_generated/images/'+img_type+'/'+image, im)
            bbs = get_bbs_from_image(im, clean=True, pad=30, text=True, combine_all=combine_all)
            
            bbs_str = '' 
            c = 0
            for bb in bbs:
                c = c + 1
                bbs_str += str(c)+' '+bb_to_str(normalize_bb(bb, im.shape[:2]))+'\n'
                x,y,x2,y2 = pad_bb(bb, 5)
                cv2.rectangle(im, (x, y), (x2, y2), (0, 0, 255), 2)
                cv2.putText(im, str(c), (int((x+x2)/2)-50,int((y+y2)/2)), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 255), 5, cv2.LINE_AA)
            
            cv2.imwrite('./'+root_folder+'_generated/images/bbs/'+image, im)
            f = open('./'+root_folder+'_generated/labels/'+image[:-3]+"txt", "a")
            f.write(bbs_str[:-1])
            f.close()
            
def threshold_output(prediction, threshold=0.5):
    output = {'boxes':[], 'scores':[], 'labels':[]}
    for i in range(len(prediction['scores'])):
        if prediction['scores'][i] > threshold:
            output['boxes'].append(prediction['boxes'][i])
            output['scores'].append(prediction['scores'][i])
            output['labels'].append(prediction['labels'][i])
    return output

def interpol_precision(precision, fptp):
    inter_prec = []
    curr_prec = precision[0]
    for i in range(len(precision)):
        if fptp[i]:
            curr_prec = precision[i]
        inter_prec.append(curr_prec)
    return inter_prec

def calc_ap(precision, recall, fptp):
    inter_prec = interpol_precision(precision, fptp)
    AP = 0
    inter_prec.append(0)
    
    p = 0
    for i in range(0, 11):
        for c in range(p, len(recall)):
            if recall[c] < i*0.1:
                if p != len(recall):
                    p += 1
            else:
                break
        AP += inter_prec[p]
    
    return AP/11

def calc_map(model, data_loader, device, num_classes, threshold=0.5, IoU=0.5):
    precision = [[] for k in range(0,num_classes)]
    recall = [[] for k in range(0,num_classes)]
    fptp = [[] for k in range(0,num_classes)] # 0 = false positive, 1 = true positive
    fptp_p = [1 for k in range(0,num_classes)]
    tpfn = [0 for k in range(0,num_classes)]
    AP = [0 for k in range(0,num_classes)]
    mAP = 0
    predictions = []
    batch_nr = 0
    epoch_time = time.time()
    
    for images, targets in data_loader:
        batch_nr += 1
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        preds = model(images)
        for i in range(len(preds)):
            thres = threshold_output(preds[i], threshold)
            for c in range(len(thres['labels'])):
                b_label = False
                for g in range(len(targets[i]['labels'])):
                    if device.type == 'cpu':
                        iou = calc_iou(targets[i]['boxes'][g].detach().numpy(), thres['boxes'][c].detach().numpy())
                    else:
                        iou = calc_iou(targets[i]['boxes'][g].cpu().detach().numpy(), thres['boxes'][c].cpu().detach().numpy())
                    if iou > IoU and thres['labels'][c].item() == targets[i]['labels'][g].item():
                        fptp[thres['labels'][c].item()].append(1)
                        b_label = True
                        break;
                if not b_label:
                        fptp[thres['labels'][c].item()].append(0)
                        tpfn[thres['labels'][c].item()] += 1

        for tar in targets:
            for lab in tar['labels']:
                tpfn[lab.item()] += 1
        
        print(
            '\r[Eval] mAP [{}/{}]\tEpoch time elapsed: {}'.format(
                batch_nr, len(data_loader), str(datetime.timedelta(seconds=round(time.time()-epoch_time)))
            ),
            end=''
        )
    
    for i in range(len(fptp)):
        for c in range(1,len(fptp[i])+1):
            precision[i].append(sum(fptp[i][:c])/len(fptp[i][:c]))
            recall[i].append(sum(fptp[i][:c])/tpfn[i])
    
    for i in range(len(recall)):
        if precision[i] != []:
            AP[i] = calc_ap(precision[i], recall[i], fptp[i])*100
            mAP += AP[i]
    mAP = mAP/(num_classes-1)
    return AP[1:], mAP
            
def predict_and_save(model, root_dir, save_dir, labels=[], threshold=0.5, IoU=0, mask=False, unique_name='', skip_component=''):
    # Check if the 4th final character is a dot aka if the input directory is a file
    if root_dir[-4] == '.':
        im = root_dir
        img = cv2.imread(im)
        cv2_img = cv2.imread(im)
        if mask:
            imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            ret, img = cv2.threshold(imgray, 100, 255, cv2.THRESH_BINARY_INV)
        tensor_img = torch.tensor(transform.to_tensor(img))
        tensor_img = torch.reshape(tensor_img, (1, tensor_img.size(0), tensor_img.size(1), tensor_img.size(2)))

        predictions = model(tensor_img)
        dont_print_id = []
        for i in range(len(predictions[0]['boxes'])):
            score = predictions[0]['scores'][i].item()
            if IoU > 0:
                if i in dont_print_id:
                    continue
                bb1 = predictions[0]['boxes'][i].detach().numpy()
                for c in range(i, len(predictions[0]['boxes'])):
                    bb2 = predictions[0]['boxes'][c].detach().numpy()
                    if calc_intersection(bb1, bb2) > calc_area(bb1)*IoU:
                        if labels[predictions[0]['labels'][i].item()-1] != skip_component and labels[predictions[0]['labels'][c].item()-1] == skip_component: 
                            dont_print_id.append(c)
            if score > threshold:
                x,y,x2,y2 = predictions[0]['boxes'][i].detach().numpy()
                cv2.rectangle(cv2_img, (int(x), int(y)), (int(x2), int(y2)), (0, 0, 255), 2)
                cv2.putText(cv2_img, str(score*100)[:5], (int((x+x2)/2)-200,int((y+y2)/2)), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 255), 5, cv2.LINE_AA)
                
                if len(labels) > 1:
                    cv2.putText(cv2_img, labels[predictions[0]['labels'][i].item()-1], (int((x+x2)/2)-250,int((y+y2)/2)-150), cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 255, 255), 5, cv2.LINE_AA)

        cv2.imwrite(save_dir+unique_name+(root_dir.split("/")[-1]), cv2_img)
    else:
        for image in os.listdir(root_dir):
            im = root_dir+image
            img = Image.open(im)
            cv2_img = cv2.imread(im)
            tensor_img = torch.tensor(transform.to_tensor(img))
            tensor_img = torch.reshape(tensor_img, (1, tensor_img.size(0), tensor_img.size(1), tensor_img.size(2)))

            predictions = model(tensor_img)
            for i in range(len(predictions[0]['boxes'])):
                score = predictions[0]['scores'][i].item()
                if score > threshold:
                    x,y,x2,y2 = predictions[0]['boxes'][i].detach().numpy()
                    cv2.rectangle(cv2_img, (int(x), int(y)), (int(x2), int(y2)), (0, 0, 255), 2)
                    cv2.putText(cv2_img, str(score*100)[:5], (int((x+x2)/2)-200,int((y+y2)/2)), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 255), 5, cv2.LINE_AA)

                    if len(labels) > 1:
                        cv2.putText(cv2_img, labels[predictions[0]['labels'][i].item()-1], (int((x+x2)/2)-250,int((y+y2)/2)-150), cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 255, 255), 5, cv2.LINE_AA)

            cv2.imwrite(save_dir+unique_name+image, cv2_img)

def save_best_bb(pred1, pred2, IoU=0.5):
    remove_pred1_ids = []
    remove_pred2_ids = []
    
    # Go through pred1 checking which elements to remove and also pred2
    for i in range(len(pred1['boxes'])):
        score1 = pred1['scores'][i].item()
        if pred1['boxes'][i].device.type == 'cpu':
            bb1 = pred1['boxes'][i].detach().numpy()
        else:
            bb1 = pred1['boxes'][i].cpu().detach().numpy()
        for c in range(len(pred2['boxes'])):
            score2 = pred2['scores'][c].item()
            
            if pred2['boxes'][c].device.type == 'cpu':
                bb2 = pred2['boxes'][c].detach().numpy()
            else:
                bb2 = pred2['boxes'][c].cpu().detach().numpy()
            intersect = calc_intersection(bb1, bb2)
            if intersect > calc_area(bb1)*IoU or intersect > calc_area(bb2)*IoU:
                if score1 > score2:
                    remove_pred2_ids.append(c)
                else:
                    remove_pred1_ids.append(i)
            
    # Remove the elements that should not be in the refined final list
    final_pred = {'boxes':[], 'scores':[], 'labels':[]}
    for i in range(len(pred1['boxes'])):
        if i not in remove_pred1_ids:
            final_pred['boxes'].append(pred1['boxes'][i])
            final_pred['scores'].append(pred1['scores'][i])
            final_pred['labels'].append(pred1['labels'][i])
    for i in range(len(pred2['boxes'])):
        if i not in remove_pred2_ids:
            final_pred['boxes'].append(pred2['boxes'][i])
            final_pred['scores'].append(pred2['scores'][i])
            final_pred['labels'].append(pred2['labels'][i])
    
    return final_pred
    
def frcnn_load_singular_models(model_name, components, root_dir):
    models = []
    device = torch.device('cpu')#torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    for component in components:
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

        num_classes = 2
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        model.to(device)
        checkpoint = torch.load(root_dir+'/'+component+'/'+model_name+'.pt')
        model.load_state_dict(checkpoint['model_state_dict'])
        model.eval()
        models.append(model)
        
    return models


def ssd_load_singular_models(model_name, components, root_dir, device):
    models = []

    for component in components:
        model_ssd = torchvision.models.detection.ssd300_vgg16(pretrained=True)

        num_classes = 2
        in_channels = [512, 1024, 512, 256, 256, 256]
        num_anchors = [4, 6, 6, 6, 4, 4]
        model_ssd.head.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)
        
        model_ssd.to(device)
        
        checkpoint = torch.load(root_dir+'/'+component+'/'+model_name+'.pt')
        model_ssd.load_state_dict(checkpoint['model_state_dict'])
        model_ssd.eval()
        models.append(model_ssd)
        
    return models

def predict_models(models, images, IoU=0.5):
    predictions = [0 for k in range(len(images))]
    for i in range(len(images)):
        preds = []
        for model in models:
            pred = model([images[i]])
            preds.append(pred[0])

        for c in range(len(preds)):
            preds[c]['labels'] *= (c+1)

        final_pred = preds[0]
        for c in range(1, len(preds)):
            final_pred = save_best_bb(final_pred, preds[c], IoU=IoU)
        predictions[i] = final_pred
    return predictions
    
def predict_and_save_models(models, root_dir, save_dir, labels=[], threshold=0.5, IoU=0.5, mask=False):
    if root_dir[-4] == '.':
        im = root_dir
        img = cv2.imread(im)
        cv2_img = cv2.imread(im)
        if mask:
            imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            ret, img = cv2.threshold(imgray, 100, 255, cv2.THRESH_BINARY_INV)
        tensor_img = torch.tensor(transform.to_tensor(img))
        tensor_img = torch.reshape(tensor_img, (1, tensor_img.size(0), tensor_img.size(1), tensor_img.size(2)))
        predictions = []
        for model in models:
            predictions.append(model(tensor_img)[0])

        for i in range(len(predictions)):
            predictions[i]['labels'] *= (i+1)
            
        final_pred = predictions[0]
        for i in range(1, len(predictions)):
            final_pred = save_best_bb(final_pred, predictions[i], IoU=IoU)
            
        for i in range(len(final_pred['boxes'])):
            score = final_pred['scores'][i].item()
            if score > threshold:
                x,y,x2,y2 = final_pred['boxes'][i].detach().numpy()
                cv2.rectangle(cv2_img, (int(x), int(y)), (int(x2), int(y2)), (0, 0, 255), 2)
                cv2.putText(cv2_img, str(score*100)[:5], (int((x+x2)/2)-200,int((y+y2)/2)), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 255), 5, cv2.LINE_AA)

                if len(labels) > 1:
                    cv2.putText(cv2_img, labels[final_pred['labels'][i].item()-1], (int((x+x2)/2)-250,int((y+y2)/2)-150), cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 255, 255), 5, cv2.LINE_AA)

        cv2.imwrite(save_dir+(root_dir.split("/")[-1]), cv2_img)
    else:
        for image in os.listdir(root_dir):
            im = root_dir+image
            img = Image.open(im)
            cv2_img = cv2.imread(im)
            tensor_img = transform.to_tensor(img)
            tensor_img = torch.reshape(tensor_img, (1, tensor_img.size(0), tensor_img.size(1), tensor_img.size(2)))

            predictions = []
            for model in models:
                predictions.append(model(tensor_img)[0])

            for i in range(len(predictions)):
                predictions[i]['labels'] *= (i+1)

            final_pred = predictions[0]
            for i in range(1, len(predictions)):
                final_pred = save_best_bb(final_pred, predictions[i], IoU=IoU)

            for i in range(len(final_pred['boxes'])):
                score = final_pred['scores'][i].item()
                if score > threshold:
                    x,y,x2,y2 = final_pred['boxes'][i].detach().numpy()
                    cv2.rectangle(cv2_img, (int(x), int(y)), (int(x2), int(y2)), (0, 0, 255), 2)
                    cv2.putText(cv2_img, str(score*100)[:5], (int((x+x2)/2)-200,int((y+y2)/2)), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 255, 255), 5, cv2.LINE_AA)

                    if len(labels) > 1:
                        cv2.putText(cv2_img, labels[final_pred['labels'][i].item()-1], (int((x+x2)/2)-250,int((y+y2)/2)-150), cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 255, 255), 5, cv2.LINE_AA)

            cv2.imwrite(save_dir+image, cv2_img)

def mixup_data(x, y, alpha=1.0, use_cuda=False):
    batch_size = len(x)
    if (batch_size < 2):
        return x, [0], y
    
    # Get a random lambda
    if alpha > 0:
        lam = numpy.clip(numpy.random.beta(alpha, alpha), 0.4, 0.6)
    else:
        lam = 1

    # convert tensor array to numpy for mixup
    t_x = numpy.empty((batch_size, x[0].size()[1], x[0].size()[2]))
    for i in range(len(x)):
        t_x[i] = x[i].numpy().astype(numpy.float)
    
    # convert tuple to numpy array for easier indexing
    t_y = numpy.empty((batch_size), dtype=numpy.object)
    for i in range(len(y)):
        t_y[i] = {}
        for var in y[i]:
            t_y[i][var] = y[i][var].numpy().astype(numpy.double)
        
    # Get a random set of indicies
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)
        
    # Mix the two images and make them % transparent based on lambda
    t_mixed_x = lam * t_x + (1 - lam) * t_x[index, :]
    y_a, y_b = t_y, t_y[index]

    # Zip together the bounding boxes of the zipped images
    mixedup_bboxes = []
    i = 0
    for bbox, s_bbox in zip(y_a, y_b):
        # If the two images zipped are the same, keep one of the boundingbox infos
        if (bbox['boxes'][0] == s_bbox['boxes'][0]).all():
            mix_len = len(bbox['boxes'])
            mixedup_bboxes.append({'boxes':torch.zeros([mix_len, 4], dtype=torch.double), 'labels':torch.zeros([mix_len], dtype=torch.int64), 'image_id':torch.zeros([1], dtype=torch.int64), 'area':torch.zeros([mix_len], dtype=torch.float), 'iscrowd':torch.zeros([mix_len], dtype=torch.int64)})
            for c in range(len(bbox['boxes'])):
                mixedup_bboxes[i]['boxes'][c] = torch.from_numpy(bbox['boxes'][c])
                mixedup_bboxes[i]['labels'][c] = bbox['labels'][c]
                mixedup_bboxes[i]['area'][c] = bbox['area'][c]
                mixedup_bboxes[i]['iscrowd'][c] = bbox['iscrowd'][c]
            i += 1
            continue;
            
        mix_len = len(bbox['boxes'])+len(s_bbox['boxes'])
        mixedup_bboxes.append({'boxes':torch.zeros([mix_len, 4], dtype=torch.double), 'labels':torch.zeros([mix_len], dtype=torch.int64), 'image_id':torch.zeros([1], dtype=torch.int64), 'area':torch.zeros([mix_len], dtype=torch.float), 'iscrowd':torch.zeros([mix_len], dtype=torch.int64)})
        for c in range(len(bbox['boxes'])):
            mixedup_bboxes[i]['boxes'][c] = torch.from_numpy(bbox['boxes'][c])
            mixedup_bboxes[i]['labels'][c] = bbox['labels'][c]
            mixedup_bboxes[i]['area'][c] = bbox['area'][c]
            mixedup_bboxes[i]['iscrowd'][c] = bbox['iscrowd'][c]
        
        for j in range(len(s_bbox['boxes'])):
            mixedup_bboxes[i]['boxes'][c+j+1] = torch.from_numpy(s_bbox['boxes'][j])
            mixedup_bboxes[i]['labels'][c+j+1] = s_bbox['labels'][j]
            mixedup_bboxes[i]['area'][c+j+1] = s_bbox['area'][j]
            mixedup_bboxes[i]['iscrowd'][c+j+1] = s_bbox['iscrowd'][j]
            
        mixedup_bboxes[i]['image_id'][0] = bbox['image_id'][0]
        i += 1
        
    mixed_x = []
    for v in t_mixed_x:
        mixed_x.append(torch.FloatTensor([v]))
    
    return mixed_x, index, tuple(mixedup_bboxes)

# Generates new combined data based on how many components it can fit into the screen without overlapping too much
def generate_combined_data(root_folder, max_nr_components=3, IoU=0.05, start_index_filename=0):
    os.mkdir('./'+root_folder+'_generated/')
    os.mkdir('./'+root_folder+'_generated/images/')
    os.mkdir('./'+root_folder+'_generated/labels/')
    components = os.listdir('./'+root_folder+'/images')
    images = []
    for component in components:
        images.append(os.listdir('./'+root_folder+'/images/'+component))

    for i in range(len(components)):
        print('Making dataset for:',components[i])
        for img in images[i]:
            image = []
            labels = []
            im = cv2.imread('./'+root_folder+'/images/'+components[i]+'/'+img)
            imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            ret, image = cv2.threshold(imgray, 100, 255, cv2.THRESH_BINARY_INV)

            f = open('./'+root_folder+'/labels/'+img[:-3]+'txt', 'r')
            data = f.read().split('\n')
            f.close()
            for line in data:
                labels.append(str_to_bb(line))

            nr_components = 0
            components_checked = [i]
            for c in range(len(images)):
                if max_nr_components == nr_components:
                    break;

                choices = []
                for y in range(len(components)):
                    if not y in components_checked:
                        choices.append(y)
                if len(choices) == 0:
                    break;
                c = numpy.random.choice(choices)
                components_checked.append(c)
                
                idxs = [y for y in range(len(images[c]))]
                random.shuffle(idxs)
                for y in idxs:
                    f = open('./'+root_folder+'/labels/'+images[c][y][:-3]+'txt', 'r')
                    data = f.read().split('\n')
                    f.close()

                    intersects = False
                    new_labels = labels.copy()
                    for line in data:
                        line_bb = str_to_bb(line)
                        denorm_line_bb = denormalize_bb(line_bb[1:], image.shape[:2])
                        for bb in labels:
                            denorm_bb = denormalize_bb(bb[1:], image.shape[:2])
                            if calc_iou(denorm_bb, denorm_line_bb) > IoU or calc_iou(denorm_line_bb, denorm_bb) > IoU:
                                intersects = True
                                break;
                        if intersects:
                            break;
                        new_labels.append(line_bb)
                    if intersects:
                        continue;

                    labels = new_labels.copy()
                    im2 = cv2.imread('./'+root_folder+'/images/'+components[c]+'/'+images[c][y])
                    imgray2 = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
                    ret, image2 = cv2.threshold(imgray2, 100, 255, cv2.THRESH_BINARY_INV)
                    image = image+image2
                    nr_components += 1
                    break;

            str_labels = ''
            for bb in labels:
                str_labels += str(int(bb[0]))+' '+bb_to_str(bb[1:])+'\n'
            cv2.imwrite('./'+root_folder+'_generated/images/'+str(start_index_filename)+'.jpg', image)
            f = open('./'+root_folder+'_generated/labels/'+str(start_index_filename)+'.txt', 'w')
            f.write(str_labels[:-1])
            f.close()
            start_index_filename += 1
            
class SketchDataset(Dataset):
    def __init__(self, root_dir, set_type, single_component=False, combined=False, preprocessed=False):
        self.images = []
        self.labels = []
        self.component_names = []
        if combined:
            for image in os.listdir(root_dir+"/images/"+set_type):
                self.images.append(root_dir+"/images/"+set_type+'/'+image)
                self.labels.append(root_dir+"/labels/"+(image.split('.')[0]+'.txt'))
        elif not single_component:
            for component in os.listdir(root_dir+"/images/"+set_type):
                if component == 'Combined':
                    continue
                self.component_names.append(component)
                for image in os.listdir(root_dir+"/images/"+set_type+"/"+component):
                    self.images.append(root_dir+"/images/"+set_type+"/"+component+"/"+image)
                    self.labels.append(root_dir+"/labels/"+(image.split('.')[0]+'.txt'))
        else:
            self.component_names.append(single_component)
            for image in os.listdir(root_dir+"/images/"+set_type+"/"+single_component):
                self.images.append(root_dir+"/images/"+set_type+"/"+single_component+"/"+image)
                self.labels.append(root_dir+"/labels/"+(image.split('.')[0]+'.txt'))
            
        self.root = root_dir
        self.single_component = single_component
        self.combined = combined
        self.preprocessed = preprocessed
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        im = cv2.imread(self.images[idx])
        if not self.preprocessed:
            imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            ret, img = cv2.threshold(imgray, 100, 255, cv2.THRESH_BINARY_INV)
        else:
            img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        
        f = open(self.labels[idx], "r")
        data = f.read().split('\n')
        f.close()

        N = len(data)
        boxes = torch.zeros([N, 4], dtype=torch.double)
        labels = torch.zeros([N], dtype=torch.int64)
        areas = torch.zeros([N])
        
        for i in range(N):
            bb = denormalize_bb(str_to_bb(data[i])[1:], img.shape[:2])
            boxes[i][0],boxes[i][1],boxes[i][2],boxes[i][3] = bb
            areas[i] = calc_area(bb)
            
            if not self.single_component:
                labels[i] = int(data[i].split(' ')[0])+1
                continue
                
            labels[i] = 1
                
        return transform.to_tensor(img), {'boxes':boxes, 'labels':labels, 'image_id':torch.LongTensor([idx]), 'area':areas, 'iscrowd':torch.zeros([N], dtype=torch.int64)}

def train_model(model, optimizer, data_loader, data_loader_val, device, num_epochs, model_type, model_name, lr_scheduler=False, folder_name=datetime.datetime.now().strftime("%b-%d_%H-%M"), mixup=False, begin_epoch=0):
    writer = SummaryWriter()
    total_time = time.time()
    
    if not os.path.exists('./models/'+model_type+'/'+folder_name+'/'):
        os.mkdir('./models/'+model_type+'/'+folder_name+'/')

    for epoch in range(begin_epoch, num_epochs):
        epoch_time = time.time()
        epoch_loss = []
        batch_nr = 0
        
        for images, targets in data_loader:
            batch_time = time.time()
            if mixup:
                images, _, targets = mixup_data(images, targets)

            # Send them to device if using GPU
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            pred = model(images, targets)
            losses = sum(loss for loss in pred.values())
            
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            
            epoch_loss.append(losses.item())
            
            writer.add_scalars(model_type+'_'+model_name, {
                'train_loss': losses.item(),
            }, epoch*len(data_loader)+batch_nr)
            
            batch_nr = batch_nr + 1
            print_loss = losses.item()
            
            if batch_nr == epoch+1:
                print_loss = numpy.average(epoch_loss)
                
            print(
                '\r[Train] Epoch {} [{}/{}] - Loss: {} \tProgress [{}%] \tEpoch time elapsed: {}'.format(
                    epoch+1, batch_nr, len(data_loader), print_loss, round(((epoch/num_epochs)+(1/num_epochs*batch_nr/len(data_loader)))*100, 2), str(datetime.timedelta(seconds=round(time.time()-epoch_time)))
                ),
                end=''
            )
        
            
        writer.add_scalars(model_type+'_'+model_name, {
            'avg_epoch_loss': numpy.average(epoch_loss),
        }, (epoch+1))
            
        if lr_scheduler:
            lr_scheduler.step()
        
        print()
        evaluate_model(model, data_loader_val, device, writer, model_type, model_name, epoch+1)
        #model.train()
        print()
        
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, './models/'+model_type+'/'+folder_name+'/'+model_name+'-'+str(epoch+1)+'.pt')

    print(
        '\rTraining completed! Loss: {} \tTotal time elapsed: {}'.format(
            losses.item(), str(datetime.timedelta(seconds=round(time.time()-total_time)))
        ),
        end=''
    )
    
def evaluate_model(model, data_loader, device, writer, model_type, model_name, epoch):
    with torch.no_grad():
        epoch_time = time.time()
        avg_loss = []
        batch_nr = 0
        for images, targets in data_loader:
            # Send them to device if using GPU
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            pred = model(images, targets)
            losses = sum(loss for loss in pred.values())
            avg_loss.append(losses.item())
            
            batch_nr = batch_nr + 1
            print_loss = losses.item()
            
            if batch_nr == epoch+1:
                print_loss = numpy.average(avg_loss)
            print(
                '\r[Val] [{}/{}] - Loss: {} \tEpoch time elapsed: {}'.format(
                    batch_nr, len(data_loader), print_loss, str(datetime.timedelta(seconds=round(time.time()-epoch_time)))
                ),
                end=''
            )

        writer.add_scalars(model_type+'_'+model_name, {
            'val_loss': numpy.average(avg_loss),
        }, epoch)

def train_multi_frcnn(model_name, model_type, epochs, components=[]):
    if components == []:
        f = open('./dataset/labels.txt', "r")
        data = f.read().split('\n')
        f.close()
        components = {data[i]:i for i in range(len(data))}

    for component in components:
        dataset_train = SketchDataset('./dataset', 'train', single_component=component)
        dataset_val = SketchDataset('./dataset', 'val', single_component=component)
        data_loader = torch.utils.data.DataLoader(
                dataset_train, batch_size=5, shuffle=True, num_workers=0,
                collate_fn=collate_fn)
        data_loader_val = torch.utils.data.DataLoader(
                dataset_val, batch_size=2, shuffle=False, num_workers=0,
                collate_fn=collate_fn)

        device = torch.device('cpu')#torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        num_classes = 2
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        model.to(device)

        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.005, 
                                    momentum=0.9, weight_decay=0.0005)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       step_size=3,
                                                       gamma=0.1)

        train_model(model, optimizer, data_loader, data_loader_val, device, epochs, model_type, model_name, lr_scheduler, folder_name=component+'_'+model_name)
        
def train_multi_ssd(model_name, model_type, epochs, components=[]):
    if components == []:
        f = open('./dataset/labels.txt', "r")
        data = f.read().split('\n')
        f.close()
        components = {data[i]:i for i in range(len(data))}

    for component in components:
        print('Training:',component)
        print()
        dataset_train = SketchDataset('./dataset', 'train', single_component=component)
        dataset_val = SketchDataset('./dataset', 'val', single_component=component)
        data_loader = torch.utils.data.DataLoader(
                dataset_train, batch_size=5, shuffle=True, num_workers=0,
                collate_fn=collate_fn)
        data_loader_val = torch.utils.data.DataLoader(
                dataset_val, batch_size=2, shuffle=False, num_workers=0,
                collate_fn=collate_fn)
        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

        model_ssd = torchvision.models.detection.ssd300_vgg16(pretrained=True)

        num_classes = 2
        in_channels = [512, 1024, 512, 256, 256, 256]
        num_anchors = [4, 6, 6, 6, 4, 4]
        model_ssd.head.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)


        model_ssd.to(device)


        params = [p for p in model_ssd.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.00005, 
                                    momentum=0.9, weight_decay=0.0005)

        # and a learning rate scheduler
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                       step_size=3,
                                                       gamma=0.1)
        

        train_model(model_ssd, optimizer, data_loader, data_loader_val, device, epochs, model_type, model_name, lr_scheduler, folder_name=component+'_'+model_name)
    
def load_frcnn(date, model_name):
    device = torch.device('cpu')#torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    num_classes = 13
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.to(device)
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, 
                                momentum=0.9, weight_decay=0.0005)

    checkpoint = torch.load('./models/Faster-RCNN/'+date+'/'+model_name+'.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dic(checkpoint['optimizer_state_dict'])

    return model,optimizer
    
def load_ssd(date, model_name):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model_ssd = torchvision.models.detection.ssd300_vgg16(pretrained=True)

    num_classes = 13
    in_channels = [512, 1024, 512, 256, 256, 256]
    num_anchors = [4, 6, 6, 6, 4, 4]
    model_ssd.head.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)

    model_ssd.to(device)

    params = [p for p in model_ssd.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.0005, 
                                momentum=0.9, weight_decay=0.0005)

    checkpoint = torch.load('./models/SSD/'+date+'/'+model_name+'.pt')
    model_ssd.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dic(checkpoint['optimizer_state_dict'])

    return model_ssd,optimizer

def handle_references(results):
    return results
def post_process_results(results, IoU=0.5):
    return results

### Generate combined data

In [41]:
time_now = time.time()
generate_combined_data('temp/all_data_march', max_nr_components=7, start_index_filename=3124)
str(datetime.timedelta(seconds=round(time.time()-time_now)))

Making dataset for: Button
Making dataset for: Checkbox
Making dataset for: Combobox
Making dataset for: Datagrid
Making dataset for: Dropdown
Making dataset for: Header
Making dataset for: Input
Making dataset for: List
Making dataset for: Object
Making dataset for: Radiobutton
Making dataset for: Reference


'0:35:41'

### Predict and save model based on threshold and IoU

In [14]:
print(labels) 
predict_and_save(model_retina, './dataset_mAP/images/', './results/all-retina/v8_map/', labels=labels, threshold=0.5, IoU=0, mask=True)

['Header', 'Input', 'Button', 'List', 'Datagrid', 'Dropdown', 'Combobox', 'Checkbox', 'Radiobutton', 'Reference', 'Object', 'ReferenceHead']




### mAP

In [29]:
# This was evaluating the single network that trains on single components

dataset_val = SketchDataset('./dataset_mAP', '', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=3, shuffle=False, num_workers=0,
        collate_fn=collate_fn)


#IoU = 0.75
model.eval()

frcnn_metric90 = calc_map(model_single, data_loader_val, torch.device('cpu'), 13, IoU=0.9)
print()
frcnn_metric75 = calc_map(model_single, data_loader_val, torch.device('cpu'), 13, IoU=0.75)
print()
frcnn_metric50 = calc_map(model_single, data_loader_val, device, 13, IoU=0.5)
#print()
#frcnn_combined_metric90 = calc_map(combined_frcnn, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
#ssd_combined_metric90 = calc_map(combined_ssd, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
f = open('./dataset/labels.txt', "r")
data = f.read().split('\n')
f.close()
labels = [data[i] for i in range(len(data))]

print()
print('Values @90:')
print(frcnn_metric90[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric90[0][i])

print()
print('Values @75:')
print(frcnn_metric75[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric75[0][i])

print()
print('Values @50:')
print(frcnn_metric50[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric50[0][i])


[Eval] mAP [27/27]	Epoch time elapsed: 0:04:31
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:27
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:28
Values @90:
8.756188245921875

Header 		-> 0.0
Input 		-> 18.484848484848484
Button 		-> 4.27807486631016
List 		-> 28.98989898989899
Datagrid 	-> 14.498834498834501
Dropdown 	-> 11.255411255411255
Combobox 	-> 13.363636363636363
Checkbox 	-> 0.0
Radiobutton 	-> 9.090909090909092
Reference 	-> 0.0
Object 		-> 5.112645401213658
ReferenceHead 	-> 0.0

Values @75:
40.09377861697465

Header 		-> 0.0
Input 		-> 75.25381923617523
Button 		-> 72.72727272727273
List 		-> 46.51230788985441
Datagrid 	-> 30.797422274695002
Dropdown 	-> 74.81485141652607
Combobox 	-> 83.46977096977099
Checkbox 	-> 30.89572192513369
Radiobutton 	-> 29.350840336134453
Reference 	-> 2.840909090909091
Object 		-> 23.50647852408416
ReferenceHead 	-> 10.95594901313993

Values @50:
49.946678353157445

Header 		-> 28.530105300770913
Input 		-> 75.25381923617523
Button 		-> 72.7272727

In [32]:
# This evaluation was made on a model that used combined data

dataset_val = SketchDataset('./dataset_mAP', '', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=3, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

#IoU = 0.75
model.eval()

frcnn_metric90 = calc_map(model, data_loader_val, torch.device('cpu'), 13, IoU=0.9)
print()
frcnn_metric75 = calc_map(model, data_loader_val, torch.device('cpu'), 13, IoU=0.75)
print()
frcnn_metric50 = calc_map(model, data_loader_val, device, 13, IoU=0.5)
#print()
#frcnn_combined_metric90 = calc_map(combined_frcnn, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
#ssd_combined_metric90 = calc_map(combined_ssd, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
f = open('./dataset/labels.txt', "r")
data = f.read().split('\n')
f.close()
labels = [data[i] for i in range(len(data))]

print()
print('Values @90:')
print(frcnn_metric90[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric90[0][i])

print()
print('Values @75:')
print(frcnn_metric75[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric75[0][i])

print()
print('Values @50:')
print(frcnn_metric50[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric50[0][i])


[Eval] mAP [27/27]	Epoch time elapsed: 0:04:30
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:27
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:27
Values @90:
4.940642320241888

Header 		-> 0.0
Input 		-> 10.795454545454545
Button 		-> 0.0
List 		-> 12.662337662337661
Datagrid 	-> 20.458077276259097
Dropdown 	-> 2.1212121212121215
Combobox 	-> 11.395338668065941
Checkbox 	-> 0.0
Radiobutton 	-> 0.0
Reference 	-> 0.0
Object 		-> 1.855287569573284
ReferenceHead 	-> 0.0

Values @75:
33.56378356901082

Header 		-> 9.090909090909092
Input 		-> 40.71087976226316
Button 		-> 54.7068428647376
List 		-> 72.7730411374612
Datagrid 	-> 56.66666666666666
Dropdown 	-> 37.471164769773715
Combobox 	-> 52.23120134761088
Checkbox 	-> 33.608815426997246
Radiobutton 	-> 27.737458619811562
Reference 	-> 0.0
Object 		-> 10.42759324009324
ReferenceHead 	-> 7.3408299018055105

Values @50:
50.772714410379486

Header 		-> 49.61436728988911
Input 		-> 47.873952960517315
Button 		-> 77.35834154679372
List 		-> 83.27537

In [6]:
# This evaluation was made on 1 single epoch from a gigantic combined dataset

dataset_val = SketchDataset('./dataset_mAP', '', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=3, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

#IoU = 0.75

frcnn_metric90 = calc_map(model_giga, data_loader_val, torch.device('cpu'), 13, IoU=0.9)
print()
frcnn_metric75 = calc_map(model_giga, data_loader_val, torch.device('cpu'), 13, IoU=0.75)
print()
frcnn_metric50 = calc_map(model_giga, data_loader_val, device, 13, IoU=0.5)
#print()
#frcnn_combined_metric90 = calc_map(combined_frcnn, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
#ssd_combined_metric90 = calc_map(combined_ssd, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
f = open('./dataset/labels.txt', "r")
data = f.read().split('\n')
f.close()
labels = [data[i] for i in range(len(data))]

print()
print('Values @90:')
print(frcnn_metric90[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric90[0][i])

print()
print('Values @75:')
print(frcnn_metric75[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric75[0][i])

print()
print('Values @50:')
print(frcnn_metric50[1])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric50[0][i])


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[Eval] mAP [27/27]	Epoch time elapsed: 0:04:33
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:29
[Eval] mAP [27/27]	Epoch time elapsed: 0:04:29
Values @90:
1.1521464646464645

Header 		-> 0.0
Input 		-> 0.0
Button 		-> 0.0
List 		-> 0.0
Datagrid 	-> 12.12121212121212
Dropdown 	-> 1.7045454545454544
Combobox 	-> 0.0
Checkbox 	-> 0.0
Radiobutton 	-> 0.0
Reference 	-> 0.0
Object 		-> 0.0
ReferenceHead 	-> 0.0

Values @75:
30.716062972690015

Header 		-> 0.0
Input 		-> 44.0311826669088
Button 		-> 18.181818181818183
List 		-> 37.87419651056015
Datagrid 	-> 86.09658893033786
Dropdown 	-> 42.10376492194675
Combobox 	-> 73.16993747940039
Checkbox 	-> 44.54545454545455
Radiobutton 	-> 11.436950146627565
Reference 	-> 0.0
Object 		-> 9.499969727242453
ReferenceHead 	-> 1.6528925619834711

Values @50:
49.8419526628721

Header 		-> 36.36363636363637
Input 		-> 51.75385109479878
Button 		-> 18.181818181818183
List 		-> 73.80998512994582
Datagrid 	-> 100.0
Dropdown 	-> 58.089413600823335
Combobox 	-> 

In [20]:
# Evaluation of retina net for combined data 

dataset_val = SketchDataset('./dataset_mAP', '', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=1, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

IoU = 0.50
model_retina.eval()

frcnn_metric902 = calc_map(model_retina, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
print()
#ssd_metric90 = calc_map(model_ssd, data_loader_val, device, 13, IoU=IoU)
#print()
#frcnn_combined_metric90 = calc_map(combined_frcnn, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()
#ssd_combined_metric90 = calc_map(combined_ssd, data_loader_val, torch.device('cpu'), 13, IoU=IoU)
#print()

f = open('./dataset/labels.txt', "r")
data = f.read().split('\n')
f.close()
labels = [data[i] for i in range(len(data))]

print('Values:')
print(frcnn_metric902[1])
print(frcnn_metric902[0])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric902[0][i])


print('Values:')
print(frcnn_metric90[1])
print(frcnn_metric90[0])
print()

formating = [2,2,2,2,1,1,1,1,1,1,2,1]
for i in range(len(labels)):
    print(labels[i],'\t'*formating[i]+'->',frcnn_metric90[0][i])
#print(ssd_metric90[1])
#print(frcnn_combined_metric90[1])

#print(ssd_combined_metric90[1])

[Eval] mAP [79/79]	Epoch time elapsed: 0:04:18
Values:
37.07613552521579
[45.45454545454545, 47.59099152813845, 20.299586776859506, 55.90242284092552, 44.40356969812863, 28.603930828529755, 45.65400285988522, 56.364126070008425, 2.6392961876832843, 7.142857142857142, 43.760308166841746, 47.097988748186374]

Header 		-> 45.45454545454545
Input 		-> 47.59099152813845
Button 		-> 20.299586776859506
List 		-> 55.90242284092552
Datagrid 	-> 44.40356969812863
Dropdown 	-> 28.603930828529755
Combobox 	-> 45.65400285988522
Checkbox 	-> 56.364126070008425
Radiobutton 	-> 2.6392961876832843
Reference 	-> 7.142857142857142
Object 		-> 43.760308166841746
ReferenceHead 	-> 47.097988748186374
Values:


NameError: name 'frcnn_metric90' is not defined

In [5]:
# Evaluation for ssd

dataset_val = SketchDataset('./dataset', 'val', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=1, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

model_metric90 = calc_map(model_ssd, data_loader_val, device, 13, IoU=0.9)
print(model_metric90)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[Eval] mAP [298/298]	Epoch time elapsed: 0:02:14([0.0, 53.57175087190761, 33.182628423270124, 53.59379750684098, 64.45360630313608, 15.18595041322314, 54.76190476190476, 53.261637352546444, 19.138941866214594, 79.11838819941586, 0, 0], 35.52238380820497)


### mAP on many-network combination

In [8]:
f = open('./dataset/labels.txt', "r")
data = f.read().split('\n')
f.close()
labels = [data[i] for i in range(len(data))]
device = torch.device('cpu') #torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

models_frcnn = frcnn_load_singular_models('v5_SGD-StepLR-5', labels[:-2], './models/Faster-RCNN/Singular_v5')
combined_frcnn = lambda images: predict_models(models_frcnn, images)
print('Loaded models')

Loaded models


In [None]:
dataset_val = SketchDataset('./dataset_mAP', '', combined=True)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=2, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

model_metric90_frcnn = calc_map(combined_frcnn, data_loader_val, device, 13, IoU=0.9)
print()
model_metric75_frcnn = calc_map(combined_frcnn, data_loader_val, device, 13, IoU=0.75)
print()
model_metric50_frcnn = calc_map(combined_frcnn, data_loader_val, device, 13, IoU=0.5)
print()
print(model_metric90_frcnn)
print(model_metric75_frcnn)
print(model_metric50_frcnn)

[Eval] mAP [40/40]	Epoch time elapsed: 1:05:18
[Eval] mAP [40/40]	Epoch time elapsed: 1:04:52
[Eval] mAP [21/40]	Epoch time elapsed: 0:36:57

In [123]:
root_dir = './temp/object_rotated/Object/'
for image in os.listdir(root_dir):
    im = root_dir+image
    img = Image.open(im)
    for i in range(-3, 4):
        if i == 0:
            continue
        rot_img = torchvision.transforms.functional.rotate(img, i, fill=255)
        rot_img.save(root_dir+'rotated_'+str(i)+'_'+image)

KeyboardInterrupt: 

## Object detection

### Train multi

In [95]:
train_multi_ssd('v5_SGD-StepLR', 'SSD', 5)

Training: Header

[Train] Epoch 1 [21/21] - Loss: 7.545849323272705 	Progress [20.0%] 	Epoch time elapsed: 0:00:5352
[Val] [12/12] - Loss: 4.624712944030762 	Epoch time elapsed: 0:00:11
[Train] Epoch 2 [21/21] - Loss: 5.076709747314453 	Progress [40.0%] 	Epoch time elapsed: 0:00:5172
[Val] [12/12] - Loss: 3.2668211460113525 	Epoch time elapsed: 0:00:10
[Train] Epoch 3 [21/21] - Loss: 3.775595188140869 	Progress [60.0%] 	Epoch time elapsed: 0:00:5049
[Val] [12/12] - Loss: 3.1986827850341797 	Epoch time elapsed: 0:00:09
[Train] Epoch 4 [21/21] - Loss: 3.2427618503570557 	Progress [80.0%] 	Epoch time elapsed: 0:00:508
[Val] [12/12] - Loss: 2.826225757598877 	Epoch time elapsed: 0:00:098
[Train] Epoch 5 [21/21] - Loss: 2.7989330291748047 	Progress [100.0%] 	Epoch time elapsed: 0:00:49
[Val] [12/12] - Loss: 2.8225202560424805 	Epoch time elapsed: 0:00:09
Training completed! Loss: 2.7989330291748047 	Total time elapsed: 0:05:03Training: Input

[Train] Epoch 1 [23/23] - Loss: 4.75054502487182

FileNotFoundError: [WinError 3] The system cannot find the path specified: './dataset/images/train/ReferenceHead'

In [87]:
train_multi_frcnn('v5_SGD-StepLR', 'Faster-RCNN', 5, components=['Radiobutton', 'Reference'])

KeyboardInterrupt: 

### Retrain

In [78]:
dataset_train = SketchDataset('./dataset', 'train')
dataset_val = SketchDataset('./dataset', 'val')

data_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=1, shuffle=True, num_workers=0,
        collate_fn=collate_fn)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=2, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

model.train()
train_model(model, optimizer, data_loader, data_loader_val, device, 10, 'Faster-RCNN', 'All-Comp_v6_SGD-StepLR', lr_scheduler, begin_epoch=5)

[Train] Epoch 2 [1268/1268] - Loss: 0.36831164360046387 	Progress [20.0%] 	Epoch time elapsed: 3:53:1329
[Val] [150/150] - Loss: 0.18582996726036072 	Epoch time elapsed: 0:20:24
[Train] Epoch 3 [1268/1268] - Loss: 0.11197745054960251 	Progress [30.0%] 	Epoch time elapsed: 3:48:5106
[Val] [150/150] - Loss: 0.15591025352478027 	Epoch time elapsed: 0:20:222
[Train] Epoch 4 [1268/1268] - Loss: 0.07354184240102768 	Progress [40.0%] 	Epoch time elapsed: 3:49:2313
[Val] [150/150] - Loss: 0.07237938046455383 	Epoch time elapsed: 0:20:235
[Train] Epoch 5 [55/1268] - Loss: 0.01926172338426113 	Progress [40.43%] 	Epoch time elapsed: 0:09:565

KeyboardInterrupt: 

### Faster-RCNN Train model

In [None]:
dataset_train = SketchDataset('./dataset_combined', 'train', combined=True, preprocessed=True)
dataset_val = SketchDataset('./dataset_combined', 'val', combined=True, preprocessed=True)

data_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=3, shuffle=True, num_workers=0,
        collate_fn=collate_fn)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=3, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

device = torch.device('cpu') #torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
num_classes = 13
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, 
                            momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

train_model(model, optimizer, data_loader, data_loader_val, device, 10, 'Faster-RCNN', 'All-Comp_v9_SGD-StepLR_notpre', lr_scheduler)

[Train] Epoch 1 [1619/1619] - Loss: 0.13450743522699468 	Progress [10.0%] 	Epoch time elapsed: 11:03:48
[Val] [464/464] - Loss: 0.2626046294270004 	Epoch time elapsed: 1:15:581
[Train] Epoch 2 [68/1619] - Loss: 0.2409842579561231 	Progress [10.42%] 	Epoch time elapsed: 0:27:567

### SSD train model

In [4]:
dataset_train = SketchDataset('./dataset', 'train')
dataset_val = SketchDataset('./dataset', 'val')

data_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=5, shuffle=True, num_workers=0,
        collate_fn=collate_fn)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=2, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model_ssd = torchvision.models.detection.ssd300_vgg16(pretrained=True)

num_classes = 13
in_channels = [512, 1024, 512, 256, 256, 256]
num_anchors = [4, 6, 6, 6, 4, 4]
model_ssd.head.classification_head = SSDClassificationHead(in_channels, num_anchors, num_classes)


model_ssd.to(device)


params = [p for p in model_ssd.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005, 
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

train_model(model_ssd, optimizer, data_loader, data_loader_val, device, 15, 'SSD', 'All_v5_SGD-StepLR', lr_scheduler)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[Train] Epoch 1 [254/254] - Loss: 1.4496783018112183 	Progress [6.67%] 	Epoch time elapsed: 0:08:59
[Val] [149/149] - Loss: 1.0815856456756592 	Epoch time elapsed: 0:01:42
[Train] Epoch 2 [254/254] - Loss: 0.6185829043388367 	Progress [13.33%] 	Epoch time elapsed: 0:09:03
[Val] [149/149] - Loss: 0.9633197784423828 	Epoch time elapsed: 0:01:439
[Train] Epoch 3 [254/254] - Loss: 0.4855213165283203 	Progress [20.0%] 	Epoch time elapsed: 0:09:0059
[Val] [149/149] - Loss: 0.7084689140319824 	Epoch time elapsed: 0:01:442
[Train] Epoch 4 [254/254] - Loss: 0.5170818567276001 	Progress [26.67%] 	Epoch time elapsed: 0:09:037
[Val] [149/149] - Loss: 0.554409384727478 	Epoch time elapsed: 0:01:4241
[Train] Epoch 5 [254/254] - Loss: 0.2824884057044983 	Progress [33.33%] 	Epoch time elapsed: 0:09:018
[Val] [149/149] - Loss: 0.5354295372962952 	Epoch time elapsed: 0:01:428
[Train] Epoch 6 [254/254] - Loss: 0.23927098512649536 	Progress [40.0%] 	Epoch time elapsed: 0:08:541
[Val] [149/149] - Loss: 0.5

### Retina train model

In [5]:
dataset_train = SketchDataset('./dataset_combined', 'train', combined=True)
dataset_val = SketchDataset('./dataset_combined', 'val', combined=True)

data_loader = torch.utils.data.DataLoader(
        dataset_train, batch_size=3, shuffle=True, num_workers=0,
        collate_fn=collate_fn)
data_loader_val = torch.utils.data.DataLoader(
        dataset_val, batch_size=3, shuffle=False, num_workers=0,
        collate_fn=collate_fn)

device = torch.device('cpu')# if torch.cuda.is_available() else torch.device('cpu')

model_retina = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
model_retina.head = torchvision.models.detection.retinanet.RetinaNetHead(256, 9, 13)

model_retina.to(device)


params = [p for p in model_retina.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, 
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

train_model(model_retina, optimizer, data_loader, data_loader_val, device, 15, 'RetinaNet', 'All_v8_SGD-StepLR', lr_scheduler)

[Train] Epoch 1 [415/415] - Loss: 0.5971544981002808 	Progress [6.67%] 	Epoch time elapsed: 2:36:22
[Val] [106/106] - Loss: 0.7871170043945312 	Epoch time elapsed: 0:16:35
[Train] Epoch 2 [415/415] - Loss: 0.4725760221481323 	Progress [13.33%] 	Epoch time elapsed: 2:36:533
[Val] [106/106] - Loss: 0.445965051651001 	Epoch time elapsed: 0:16:2721
[Train] Epoch 3 [415/415] - Loss: 0.24580880999565125 	Progress [20.0%] 	Epoch time elapsed: 2:36:496
[Val] [106/106] - Loss: 0.3202489912509918 	Epoch time elapsed: 0:16:103
[Train] Epoch 4 [415/415] - Loss: 0.14127951860427856 	Progress [26.67%] 	Epoch time elapsed: 2:33:04
[Val] [106/106] - Loss: 0.19828936457633972 	Epoch time elapsed: 0:16:12
[Train] Epoch 5 [415/415] - Loss: 0.1403563916683197 	Progress [33.33%] 	Epoch time elapsed: 2:33:153
[Val] [106/106] - Loss: 0.1775968074798584 	Epoch time elapsed: 0:16:114
[Train] Epoch 6 [415/415] - Loss: 0.09419110417366028 	Progress [40.0%] 	Epoch time elapsed: 2:33:008
[Val] [106/106] - Loss: 0.

KeyboardInterrupt: 

## Optical character recognition


## Post processing engine