In [1]:
#imports
import numpy as np
import pandas as pd
from pathlib import Path
import os
import random
from PIL import Image, ImageDraw
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
import torch
import torchvision
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from google.colab import files
import helper
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


Code for getting the annotations into a usable form

In [2]:
import re
import datetime
from itertools import groupby
from skimage import measure
import pycocotools
from pycocotools import mask
from math import sqrt

convert = lambda text: int(text) if text.isdigit() else text.lower()
natrual_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]

def resize_binary_mask(array, new_size):
    image = Image.fromarray(array.astype(np.uint8)*255)
    image = image.resize(new_size)
    return np.asarray(image).astype(np.bool_)

def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
                counts.append(0)
        counts.append(len(list(elements)))

    return rle

def binary_mask_to_polygon(binary_mask, tolerance=0):
    """Converts a binary mask to COCO polygon representation

    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated
            polygonal chain. If tolerance is 0, the original coordinate array is returned.

    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation
        segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)

    return polygons

def create_image_info(image_id, file_name, image_size,
                      date_captured=datetime.datetime.utcnow().isoformat(' '),
                      license_id=1, coco_url="", flickr_url=""):

    image_info = {
            "id": image_id,
            "file_name": file_name,
            "width": image_size[0],
            "height": image_size[1],
            "date_captured": date_captured,
            "license": license_id,
            "coco_url": coco_url,
            "flickr_url": flickr_url
    }

    return image_info

def create_annotation_info(annotation_id, image_id, category_info, binary_mask=None,
                           image_size=None, tolerance=2, bounding_box=None):
    assert(binary_mask is not None or image_size is not None)

    if binary_mask is not None:
        if image_size is not None:
            binary_mask = resize_binary_mask(binary_mask, image_size)
        binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
        area = mask.area(binary_mask_encoded)
        bounding_box = mask.toBbox(binary_mask_encoded)
    else:
        area = np.array(bounding_box[2] * bounding_box[3], dtype=int)
    if area < 20:
        print("Area of this annotation is less than 20, Skip it! image_id:", image_id, "area:", area, "bbox:", bounding_box)
        return None
    if category_info["is_crowd"]:
        is_crowd = 1
        segmentation = binary_mask_to_rle(binary_mask)
    else :
        is_crowd = 0
        if binary_mask is not None:
            binary_mask_encoded = mask.encode
            segmentation = binary_mask_to_polygon(binary_mask, tolerance)
            if not segmentation:
                return None
    if binary_mask is not None:
        annotation_info = {
            "id": annotation_id,
            "image_id": image_id,
            "category_id": category_info["id"],
            "iscrowd": is_crowd,
            "area": area.tolist(),
            "bbox": bounding_box.tolist(),
            "segmentation": segmentation,
            "width": binary_mask.shape[1],
            "height": binary_mask.shape[0],
        }
    else:
        annotation_info = {
            "id": annotation_id,
            "image_id": image_id,
            "category_id": category_info["id"],
            "iscrowd": is_crowd,
            "area": area.tolist(),
            "bbox": bounding_box.tolist(),
            "width": image_size[0],
            "height": image_size[1],
        }

    return annotation_info

'''
This file is for creating [MSCOCO style] json annotation files for convenient
training in popular detection frameworks, such as mmdetection, detectron,
maskrcnn-benchmark, etc. Before running this file, please follow the instructions
in https://github.com/waspinator/coco to install the COCO API or use the following
commands for installation:
    pip install cython
    pip install git+git://github.com/waspinator/coco.git@2.1.0

Usage: python3 code/make_json_anno.py --list_path /path/to/img/list/ [--tb_only]
'''

import os
import json
import numpy as np
import xml.etree.ElementTree as ET
from tqdm import tqdm
from argparse import ArgumentParser

'''
ActiveTuberculosis: Active TB
ObsoletePulmonaryTuberculosis: Latent TB
PulmonaryTuberculosis: Unknown TB
'''
def cat2label(cls_name):
    x = {'ActiveTuberculosis': 1, 'ObsoletePulmonaryTuberculosis': 2, 'PulmonaryTuberculosis': 3}
    return x[cls_name]


'''
Load annotations in the XML format
Input:
       xml_path: (string), xml annoation (relative) path
       size    : (int, int), align with the actual image size
'''
def load_annotation(xml_path, resized=(512, 512)):
    if not os.path.exists(xml_path):
        return None, None
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    labels = []
    bboxes_ignore = []
    labels_ignore = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        label = cat2label(name)
        difficult = int(obj.find('difficult').text)
        bnd_box = obj.find('bndbox')
        bbox = [
            int(bnd_box.find('xmin').text),
            int(bnd_box.find('ymin').text),
            int(bnd_box.find('xmax').text),
            int(bnd_box.find('ymax').text)
        ]
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)
        width_ratio = width / resized[1]
        height_ratio = height / resized[0]
        ignore = False
        bbox[0] /= width_ratio; bbox[2] /= width_ratio
        bbox[1] /= height_ratio; bbox[3] /= height_ratio
        w = bbox[2] - bbox[0]
        h = bbox[3] - bbox[1]
        if w < 1 or h < 1:
            ignore = 1
        if difficult or ignore:
            bboxes_ignore.append(bbox)
            labels_ignore.append(label)
        else:
            bboxes.append(bbox)
            labels.append(label)
    if not bboxes:
        bboxes = None #np.zeros((0, 4))
        labels = None #np.zeros((0, ))
    else:
        bboxes = np.array(bboxes, ndmin=2) - 1
        labels = np.array(labels)
    if not bboxes_ignore:
        bboxes_ignore = np.zeros((0, 4))
        labels_ignore = np.zeros((0, ))
    else:
        bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
        labels_ignore = np.array(labels_ignore)
    if bboxes is not None:
        return bboxes.astype(np.float32), labels.astype(np.int64)
    else:
        return None, None


def dataset_info():
    INFO = [
        {
            'contributor': 'Yun Liu, Yu-Huan Wu, Yunfeng Ban, Huifang Wang, Ming-Ming Cheng',
            'date_created': '2020/06/22',
            'description': 'TBX11K Dataset',
            'url': 'http://mmcheng.net/tb',
            'version': '1.0',
            'year': 2020
        }
    ]


    LICENSES = [
        {
            'id': 1,
            'name': 'Attribution-NonCommercial-ShareAlike License',
            'url': 'http://creativecommons.org/licenses/by-nc-sa/2.0/'
        }
    ]

    CATEGORIES = [
        {'id': 1, 'name': 'ActiveTuberculosis', 'supercategory': 'Tuberculosis'},
        {'id': 2, 'name': 'ObsoletePulmonaryTuberculosis', 'supercategory': 'Tuberculosis'},
        {'id': 3, 'name': 'PulmonaryTuberculosis', 'supercategory': 'Tuberculosis'},
    ]

    return INFO, LICENSES, CATEGORIES


#Here's what getData Does:
#input: filepath to target xml file
#output: 2xn array where n is the number of boxes
#1st entry is the dimensions with structure [xmin, ymin, xmax, ymax]
#2nd entry is the classification of the box 1 = ActiveTB, 2 = ObseletePulmonaryTB 3 = PulmonaryTB
def getData (XML_filepath):

  return load_annotation(XML_filepath)

In [3]:
#/content/drive/MyDrive/APS360 Project/archive/TBX11K/annotations/xml
#annotations are stored in a dict with the image name as key and the output of getData as the value
annotation_dir_path = '/content/drive/MyDrive/APS360 Project/archive/TBX11K/annotations/xml'
annotations = [[str(path.parts[-1:])[2:-7] , getData(path)[0], getData(path)[1]] for path in Path(annotation_dir_path).rglob('*.xml')]

#d = [{'image_id':annotation[0], 'bboxes':annotation[1][0], 'label':annotation[1][1]} for annotation in annotations]
# d = {'image_id':annotations[:][0], 'bboxes':annotations[:][1][0], 'label':annotations[:][1][1]}
ann_df = pd.DataFrame(annotations, columns = ['image_id', 'bboxes', 'labels'])
ann_df.head()

# print(len(ann_df))
# ann_df

Unnamed: 0,image_id,bboxes,labels
0,tb0020,"[[173.36876, 57.23937, 276.3796, 147.13057], [...","[1, 1]"
1,tb0006,"[[79.21699, 93.91188, 241.2955, 329.41318], [2...","[1, 1]"
2,tb0033,"[[291.71613, 151.18199, 411.99307, 317.544], [...","[1, 1]"
3,tb0007,"[[307.30734, 62.050446, 442.81104, 208.66173],...","[2, 2]"
4,tb0012,"[[307.45193, 81.31111, 451.6408, 265.4889]]",[1]


In [4]:
#transforms = transforms.Compose([transforms.Resize(512), transforms.ToTensor()])

image_dir_path_healthy = '/content/drive/MyDrive/APS360 Project/archive/TBX11K/imgs/health'
paths = [path.parts[-2:] for path in
         Path(image_dir_path_healthy).rglob('*.png')]
df_healthy = pd.DataFrame(data=paths, columns=['Condition', 'image_id'])

image_dir_path_sick = '/content/drive/MyDrive/APS360 Project/archive/TBX11K/imgs/sick'
paths = [path.parts[-2:] for path in
         Path(image_dir_path_sick).rglob('*.png')]
df_sick = pd.DataFrame(data=paths, columns=['Condition', 'image_id'])

image_dir_path_tb = '/content/drive/MyDrive/APS360 Project/archive/TBX11K/imgs/tb'
paths = [path.parts[-2:] for path in
         Path(image_dir_path_tb).rglob('*.png')]
df_tb = pd.DataFrame(data=paths, columns=['Condition', 'image_id'])


#df_healthy['Images']
#print(df_healthy.head())
#print(df_sick.head())
#df_tb.head()

#had some difficulty since if there are no boxes or boxes of area 0 in an image the training breaks
#the handeled by having a box over the whole image with a class of 0: healthy or 4: sick
df_healthy = df_healthy['image_id'].to_list()
healthy_image_ids = [[string[:-4],[[0,0,512,512]],[0]] for string in df_healthy]
df_healthy = pd.DataFrame(healthy_image_ids, columns = ['image_id', 'bboxes', 'labels'])

df_sick = df_sick['image_id'].to_list()
sick_image_ids = [[string[:-4],[[0,0,512,512]],[4]] for string in df_sick]
df_sick = pd.DataFrame(sick_image_ids, columns = ['image_id', 'bboxes', 'labels'])
print(len(ann_df))

df_mixed = pd.concat([df_sick , df_healthy , ann_df] , ignore_index=True)
print(df_mixed.shape)


#get dataset - from all subdirectories (subdir names become labels automatically)
#dataset = datasets.ImageFolder('/content/drive/MyDrive/APS360 Project/archive/TBX11K/imgs', transform=transforms)

799
(8399, 3)


In [5]:
from operator import index

df_mixed = df_mixed.dropna(axis=0,inplace=False)
#df_mixed = df_mixed.drop(axis=0, index=df_mixed[df_mixed.bboxes == None].index, inplace=False)
print(len(df_mixed))


8399


In [6]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, df, indices):
      self.df = df
      self.indices = indices
  def __len__(self):
      return len(self.indices)
  def __getitem__(self, idx):
      image_name = self.df.image_id[self.indices[idx]]
      target = {}
      boxes = self.df.bboxes[self.indices[idx]]
      labels = self.df.labels[self.indices[idx]]
      try:
        a = torch.tensor(boxes)
        b = torch.tensor(labels)
        target["boxes"] = torch.tensor(boxes)
        target["label"] = torch.tensor(labels)
      except:
        print("died" + str(image_name))
        print(boxes)
        print(labels)
        return

      img_path = ''
      if labels[0] == 0:
        img_path = 'health'
      elif labels[0] == 4:
        img_path = 'sick'
      else:
        img_path = 'tb'
      img_path = '/content/drive/MyDrive/APS360 Project/archive/TBX11K/imgs/' + img_path+ '/'+ image_name + '.png'
      img = Image.open(img_path).convert('RGB')
      return transforms.ToTensor()(img),target



In [7]:
train_inds, val_inds = train_test_split(range(df_mixed.shape[0]),test_size = 0.3)
val_inds, test_inds = train_test_split(val_inds,test_size = 0.5)

added = list()
for ts in train_inds:
    if df_mixed["image_id"][ts][0] == 't':
      added.append(ts)

train_inds = train_inds + 4*added
print(len(train_inds))
print(len(val_inds))
print(len(test_inds))


8183
1260
1260


In [8]:
def custom_collate(data):
  return data

In [9]:
train_dl = torch.utils.data.DataLoader(CustomDataset(df_mixed, train_inds), batch_size=32,
                                       shuffle = True,
                                       collate_fn = custom_collate,
                                       pin_memory = True if torch.cuda.is_available() else False)

val_dl = torch.utils.data.DataLoader(CustomDataset(df_mixed, val_inds), batch_size=64,
                                       shuffle = True,
                                       collate_fn = custom_collate,
                                       pin_memory = True if torch.cuda.is_available() else False)

test_dl = torch.utils.data.DataLoader(CustomDataset(df_mixed, test_inds), batch_size=1260,
                                       shuffle = True,
                                       collate_fn = custom_collate,
                                       pin_memory = True if torch.cuda.is_available() else False)



In [10]:
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(trainable_backbone_layers=3, pretrained = True)

Downloading: "https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth
100%|██████████| 74.2M/74.2M [00:00<00:00, 118MB/s]


In [11]:
#model = torchvision.models.detection.fasterrcnn_resnet50_fpn(trainable_backbone_layers=1,pretrained = True)
num_classes = 5
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes)
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(320,), max_size=640, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): Conv2dNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
          )
        )
      )
      (2): InvertedResidual(
        (block): 

In [12]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
device

device(type='cpu')

In [13]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9, weight_decay = 0.0005)
num_epochs = 5

In [14]:
import torch
from collections import Counter

In [15]:
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    """
    Calculates intersection over union

    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct Labels of Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)

    Returns:
        tensor: Intersection over union for all examples
    """

    # Slicing idx:idx+1 in order to keep tensor dimensionality
    # Doing ... in indexing if there would be additional dimensions
    # Like for Yolo algorithm which would have (N, S, S, 4) in shape
    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    elif box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    # Need clamp(0) in case they do not intersect, then we want intersection to be 0
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    return intersection / (box1_area + box2_area - intersection + 1e-6)

In [16]:
def mean_average_precision(predictions, truths, iou_threshold=0.5, box_format = "corners", num_classes=5):
    #pred_boxes(list): [[train_idx, class_pred, prob_score, x1, y1, x2, y2], ....]
    #print(truths)
    pred_boxes = list()
    true_boxes = list()
    #print('------------------')
    #print(range(len(predictions)))
    for img_num in range(len(predictions)):
        #print("boxes: "+ str(range(len(predictions[img_num]["boxes"]))))
        for box_num in range(len(predictions[img_num]["boxes"])):
            pred_boxes.append([img_num,
                               predictions[img_num]["labels"][box_num],
                               predictions[img_num]["scores"][box_num],
                               predictions[img_num]["boxes"][box_num][0],
                               predictions[img_num]["boxes"][box_num][1],
                               predictions[img_num]["boxes"][box_num][2],
                               predictions[img_num]["boxes"][box_num][3]
                               ])
            #print(pred_boxes)
    tr_num = 0
    for data in truths:
        for box_num in range(len(data[1]["boxes"])):
            true_boxes.append([tr_num, data[1]["label"][box_num], 1,
                               data[1]["boxes"][box_num][0],
                               data[1]["boxes"][box_num][1],
                               data[1]["boxes"][box_num][2],
                               data[1]["boxes"][box_num][3],
                               ])

    average_precisions = []
    average_recalls = []
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []

        for detection in pred_boxes:
            if detection[1] ==c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)
        #dict {imagenum:num of bboxes}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        detections.sort(key=lambda x : x[2], reverse = True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)

        for detection_idx, detection in enumerate(detections):
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]
            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
              iou = intersection_over_union(
                  torch.tensor(detection[3:]),
                  torch.tensor(gt[3:]),
                  box_format = box_format,
                  )
              if iou > best_iou:
                  best_iou = iou
                  best_gt_idx = idx

            if best_iou > iou_threshold:
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1
            else:
                FP[detection_idx] = 1

        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum/(total_true_bboxes + epsilon)
        precisions = torch.divide(TP_cumsum, (TP_cumsum+FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]),recalls))
        average_precisions.append(torch.trapz(precisions,recalls))
        average_recalls.append(sum(recalls)/len(recalls))
    return sum(average_precisions)/len(average_precisions), average_precisions, average_recalls



In [17]:
def get_model_name(name, batch_size, learning_rate, partial_epoch):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "model_{0}_bs{1}_lr{2}_partial_epoch{3}".format(name,
                                                   batch_size,
                                                   learning_rate,
                                                   partial_epoch)
    return path

In [21]:
#since TBX11 has so much data, one epoch will take about 60 hours to train
#to fix this I am using a sub_epoch system which makes 4 batchs = 1 epoch
#all graphics and statistics will use this system instead of actual epochs
model.to(device)
model.train()
partial_epoch_num = 0
batch_num = 0
partial_epoch_loss = 0
train_loss = np.zeros(int(num_epochs*len(train_dl)/5.0))
val_mAP = np.zeros(int(num_epochs*len(train_dl)/5.0))
val_loss = np.zeros(int(num_epochs*len(train_dl)/5.0))
val_average_precisions = list()
val_average_recalls = list()
start_time = time.time()
for epochs in range(num_epochs):
    valbatch = iter(val_dl)
    for batch in train_dl:
        #end of each partial_epoch
        if batch_num%10 == 9:
          #evaluate with validation set
          model.eval()
          try:
            vbatch =  next(valbatch)
          except:
            valbatch = iter(val_dl)
            vbatch =  next(valbatch)

          for d in vbatch:
            imgs.append(d[0].to(device))
            targ = {}
            targ["boxes"] = d[1]["boxes"].to(device)
            targ["labels"] = d[1]["label"].to(device)
            targets.append(targ)
          predicted = model(imgs, targets)
          mapoutput = mean_average_precision(predicted, vbatch)
          val_mAP[partial_epoch_num] = mapoutput[0]
          val_average_precisions.append(mapoutput[1])
          val_average_recalls.append(mapoutput[2])
          model.train()
          #out_loss = model(imgs, targets)
          #out_loss = sum(values for values in out_loss.values())
          #val_loss[partial_epoch_num] = out_loss
          train_loss[partial_epoch_num] = partial_epoch_loss

          print(("Partial epoch {}: Train loss: {} |"+
               "Validation mAP: {}").format(
                   partial_epoch_num + 1,
                   train_loss[partial_epoch_num],
                   val_mAP[partial_epoch_num]))
          model_path = get_model_name("fasterrcnn_mobilenet", 16, .003, partial_epoch_num + 1)
          torch.save(model.state_dict(), model_path)
          partial_epoch_num += 1
          partial_epoch_loss = 0
          np.savetxt("{}_train_loss.csv".format(model_path), train_loss)
          np.savetxt("{}_val_mAP.csv".format(model_path), val_mAP)
          np.savetxt("{}_valAPs.csv".format(model_path), val_average_precisions)
          np.savetxt("{}_valrecalls.csv".format(model_path), val_average_recalls)
          print("time: " + str(time.time()-start_time))
          start_time = time.time()
          model_path = get_model_name("fasterrcnn_mobilenet", 16, .003, partial_epoch_num + 1)
        imgs = []
        targets = []
        for d in batch:
            imgs.append(d[0].to(device))
            targ = {}
            targ["boxes"] = d[1]["boxes"].to(device)
            targ["labels"] = d[1]["label"].to(device)
            targets.append(targ)
        loss_dict = model(imgs, targets)
        loss = sum(values for values in loss_dict.values())
        partial_epoch_loss += loss.cpu().detach().numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        batch_num += 1

    #print(epoch_loss)

Partial epoch 1: Train loss: 1.762515977025032 |Validation mAP: 0.0
time: 7.4733195304870605
Partial epoch 2: Train loss: 1.8422484993934631 |Validation mAP: 0.0018518507713451982
time: 7.919113636016846
Partial epoch 3: Train loss: 1.852149710059166 |Validation mAP: 0.0007142855320125818
time: 7.650855541229248
Partial epoch 4: Train loss: 2.1915039718151093 |Validation mAP: 0.0
time: 7.866865158081055
Partial epoch 5: Train loss: 1.88109590113163 |Validation mAP: 0.0
time: 7.776778697967529
Partial epoch 6: Train loss: 2.191684365272522 |Validation mAP: 0.0
time: 7.911764621734619
Partial epoch 7: Train loss: 1.9710239619016647 |Validation mAP: 0.0027777766808867455
time: 8.448320865631104
Partial epoch 8: Train loss: 2.307223930954933 |Validation mAP: 0.0
time: 8.152947187423706
Partial epoch 9: Train loss: 2.392590433359146 |Validation mAP: 0.00020833332382608205
time: 8.12332820892334
Partial epoch 10: Train loss: 2.1290668547153473 |Validation mAP: 0.0008928569150157273
time: 7.8

In [28]:
model_path = get_model_name("fasterrcnn_mobilenet", 16, .003, 109)
df = pd.read_csv(model_path+'_valrecalls.csv')

print(df.to_string())

     0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
0    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.492062538862228394e-02
1    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.124998882412910461e-02
2    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
3    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
4    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
5    0.000000000000000000e+00 7.471264898777008057e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
6    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.00000000

In [27]:
model_path = get_model_name("fasterrcnn_mobilenet", 16, .003, 109)
df = pd.read_csv(model_path+'_valAPs.csv')

print(df.to_string())

     0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
0    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 9.259253740310668945e-03
1    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.571427660062909126e-03
2    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
3    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
4    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
5    0.000000000000000000e+00 1.388888340443372726e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
6    0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.00000000

In [None]:
#original train code without partial epochs
# model.to(device)
# for epochs in range(num_epochs):
#     epoch_loss = 0
#     for batch in train_dl:
#         imgs = []
#         targets = []
#         for d in batch:
#             imgs.append(d[0].to(device))
#             targ = {}
#             targ["boxes"] = d[1]["boxes"].to(device)
#             targ["labels"] = d[1]["label"].to(device)
#             targets.append(targ)
#         loss_dict = model(imgs, targets)
#         loss = sum(values for values in loss_dict.values())
#         epoch_loss += loss.cpu().detach().numpy()
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#     print(epoch_loss)

KeyboardInterrupt: ignored