Another model to predict the type of each plaque in the rois
Model: EfficientNet (B1/B2) - a type of CNN
  - Input: cropped images of plaques (cropped according to the bounding box)
  - Output: type of each plaque

Model format (EfficientNet B2):
  - Input: (N, 3, 260, 260) - (batch size, C, H, W)
  - Output: (1, # of classes)

In [1]:
import torch
import torchvision
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
from utils import im_to_txt_path, imread, isfile, imwrite
from dataset_operations import clip_value
import pickle


parent_dir = 'rois2/'
object_dir = parent_dir + 'objects/'
img_dir = parent_dir + 'images/'
label_dir = parent_dir + 'labels/'
model_dir = parent_dir + 'models/'
tile_dir = parent_dir + 'tiles/'

obj_train_dir = object_dir + 'train/'
obj_val_dir = object_dir + 'val'
false_positive_label_dir = tile_dir + '/false_positives/labels/'
if not os.path.exists(object_dir):
    os.makedirs(object_dir)


In [2]:
class_num = 2

In [5]:
# clean up the directory
for dir, subdirs, files in os.walk(obj_train_dir):
    for file in files:
        os.remove(os.path.join(dir, file))

for dir, subdirs, files in os.walk(obj_val_dir):
    for file in files:
        os.remove(os.path.join(dir, file))            

if os.path.exists(obj_train_dir):       
    for dir in os.listdir(obj_train_dir):
        os.removedirs(obj_train_dir + dir)
        
if os.path.exists(obj_val_dir):
    for dir in os.listdir(obj_val_dir):
        os.removedirs(obj_val_dir + dir)


In [9]:
train_roi_paths = []
if isfile(img_dir + 'train_rois.txt'):
    with open(img_dir + 'train_rois.txt', 'r') as f:
        train_roi_paths = f.read().splitlines()
print(len(train_roi_paths))

val_roi_paths = []
if isfile(img_dir + 'val_rois.txt'):
    with open(img_dir + 'val_rois.txt', 'r') as f:
        val_roi_paths = f.read().splitlines()
print(len(val_roi_paths))

test_roi_paths = []
if isfile(img_dir + 'test_rois.txt'):
    with open(img_dir + 'test_rois.txt', 'r') as f:
        test_roi_paths = f.read().splitlines()
print(len(test_roi_paths))

334
71
71


In [10]:
print(len(train_roi_paths + val_roi_paths + test_roi_paths))

476


Calculate mean and std for each ROI

In [11]:
def calculate_mean_std_ROI(roi_paths):
    mean_std = {}
    mean = 0
    std = 0
    for roi_path in roi_paths:
        roi_name = roi_path.split('/')[-1].split('.')[0]    
        img = imread(roi_path)
        pixels = img.shape[0] * img.shape[1]
        temp_img = img / 255.0
        mean = np.sum(temp_img, axis=(0, 1)) / pixels
        std = np.sqrt(np.sum(temp_img ** 2, axis=(0, 1)) / pixels - mean ** 2)
        mean_std[roi_name] = (mean, std)
    return mean_std

In [12]:
# mean_std = calculate_mean_std_ROI(train_roi_paths + val_roi_paths + test_roi_paths)
# with open(img_dir + 'mean_std.pkl', 'wb') as f:
#     pickle.dump(mean_std, f)

In [13]:
mean_std = pickle.load(open(img_dir + 'mean_std.pkl', 'rb'))
print(len(mean_std))

476


Crop objects from ground truth bounding boxes

In [52]:
def crop_positive_objects(roi_paths, outer_size_percent, save_dir, class_num = 2):
    tot_num = 0
    for roi_fp in roi_paths:
        roi_name = roi_fp.split('/')[-1].split('.')[0]
        if isfile(im_to_txt_path(roi_fp)):
            roi_img = imread(roi_fp).copy()
            lines = open(im_to_txt_path(roi_fp), 'r').readlines()
            for line in lines:
                line = line.split()
                x1,y1,x2,y2 = int(line[1]), int(line[2]), int(line[3]), int(line[4])
                pixel_outter_x = int((x2 - x1) * outer_size_percent)
                pixel_outter_y = int((y2 - y1) * outer_size_percent)
                x1 = clip_value(x1 - pixel_outter_x, 0, roi_img.shape[1])
                y1 = clip_value(y1 - pixel_outter_y, 0, roi_img.shape[0])
                x2 = clip_value(x2 + pixel_outter_x, 0, roi_img.shape[1])
                y2 = clip_value(y2 + pixel_outter_y, 0, roi_img.shape[0])
                if class_num == 3:  
                    type = str(line[0])
                elif class_num == 2:
                    type = 'object'
                obj_crop_dir = save_dir + type + '/'
                if not os.path.exists(obj_crop_dir):
                    os.makedirs(obj_crop_dir)
                crop = roi_img[y1:y2,x1:x2]
                filename = roi_name + '__' + str(x1) + '_' + str(y1) + '_' + str(x2) + '_' + str(y2)
                # print(obj_crop_dir + filename + '.png')
                imwrite(obj_crop_dir + filename + '.png', crop)
                tot_num += 1
    return tot_num

In [53]:
tot_pos_obj_train = crop_positive_objects(train_roi_paths, 0.1, obj_train_dir, class_num)
print('Total number of train positive objects: ', tot_pos_obj_train)


Total number of train positive objects:  14312


In [54]:
tot_pos_obj_val = crop_positive_objects(test_roi_paths, 0.1, obj_val_dir, class_num)
print('Total number of test positive objects: ', obj_val_dir)

Total number of test positive objects:  2912


Crop objects from negative bounding boxes

In [55]:
import random
def crop_negative_objects(roi_paths, save_dir, nega_obj_num, nega_obj_size, mean_std_dict, false_positive_ratio = 0.5):
    # crop negative objects on background
    print('Cropping negative objects on background...')
    for num, roi_path in enumerate(roi_paths):
        img = imread(roi_path).copy()
        roi_name =  roi_path.split('/')[-1].split('.')[0]
        # get mean
        mean = mean_std_dict[roi_name][0]
        # read labels of current roi
        labels = []
        if isfile(im_to_txt_path(roi_path)):
            f = open(im_to_txt_path(roi_path), 'r')
            lines = f.readlines()
            for line in lines:
                line = line.split()
                x1,y1,x2,y2 = int(line[1]), int(line[2]), int(line[3]), int(line[4])
                labels.append([x1,y1,x2,y2])
        # cover true boxes with mean
        for label in labels:
            img[label[1]:label[3], label[0]:label[2]] = mean*255
        # randomly crop 
        for i in range(int(nega_obj_num * false_positive_ratio / len(roi_paths))):
            x1 = np.random.randint(0, img.shape[1] - nega_obj_size)
            y1 = np.random.randint(0, img.shape[0] - nega_obj_size)
            x2 = x1 + nega_obj_size
            y2 = y1 + nega_obj_size
            obj = img[y1:y2, x1:x2]
            filename = roi_name + '__' + str(x1) + '_' + str(y1) + '_' + str(x2) + '_' + str(y2)
            if not os.path.exists(save_dir + 'background/'):
                os.makedirs(save_dir + 'background/')
            imwrite(save_dir + 'background/' + filename + '.png', obj)
        print('     Cropped ROI ', num + 1, '/', len(roi_paths), ' with ', i, ' objects')
    
    # crop false positives
    print('Cropping false positives...')
    for dir, subdirs, files in os.walk(false_positive_label_dir):
        for num, file in enumerate(files):
            
            # read the labels
            labels = open(os.path.join(dir, file), 'r').readlines()
            # read the image
            # print(img_dir + file.split('.')[0] + '.png')
            img = imread(img_dir + file.split('.')[0] + '.png').copy()
            sample_labels = random.sample(labels, int(nega_obj_num * (1 - false_positive_ratio) / len(files)))
            total_num = len(sample_labels)
            for label in sample_labels:
                label = label.split()
                x1,y1,x2,y2 = int(float(label[1])), int(float(label[2])), int(float(label[3])), int(float(label[4]))
                x1 = clip_value(x1, 0, img.shape[1])
                y1 = clip_value(y1, 0, img.shape[0])
                x2 = clip_value(x2, 0, img.shape[1])
                y2 = clip_value(y2, 0, img.shape[0])
                obj = img[y1:y2, x1:x2]
                if not (x2 > x1 and y2 > y1):  
                    total_num -= 1
                    continue
                filename = file.split('.')[0] + '__' + str(x1) + '_' + str(y1) + '_' + str(x2) + '_' + str(y2)
                if not os.path.exists(save_dir + 'background/'):
                    os.makedirs(save_dir + 'background/')
                imwrite(save_dir + 'background/' + filename + '.png',obj)
            print('     Cropped ROI ', num + 1, '/', len(files), ' with ', total_num, ' objects')
            
        

In [56]:
if os.path.exists(obj_train_dir + 'background'):
    for file in os.listdir(obj_train_dir + 'background'):
        os.remove(os.path.join(obj_train_dir + 'background', file))


In [58]:
crop_negative_objects(train_roi_paths, obj_train_dir, tot_pos_obj_train*5, 260, mean_std, 0.5)

Cropping negative objects on background...
     Cropped ROI  1 / 334  with  106  objects
     Cropped ROI  2 / 334  with  106  objects
     Cropped ROI  3 / 334  with  106  objects
     Cropped ROI  4 / 334  with  106  objects
     Cropped ROI  5 / 334  with  106  objects
     Cropped ROI  6 / 334  with  106  objects
     Cropped ROI  7 / 334  with  106  objects
     Cropped ROI  8 / 334  with  106  objects
     Cropped ROI  9 / 334  with  106  objects
     Cropped ROI  10 / 334  with  106  objects
     Cropped ROI  11 / 334  with  106  objects
     Cropped ROI  12 / 334  with  106  objects
     Cropped ROI  13 / 334  with  106  objects
     Cropped ROI  14 / 334  with  106  objects
     Cropped ROI  15 / 334  with  106  objects
     Cropped ROI  16 / 334  with  106  objects
     Cropped ROI  17 / 334  with  106  objects
     Cropped ROI  18 / 334  with  106  objects
     Cropped ROI  19 / 334  with  106  objects
     Cropped ROI  20 / 334  with  106  objects
     Cropped ROI  21 / 334

In [59]:
if os.path.exists(obj_val_dir + 'background'):
    for file in os.listdir(obj_val_dir + 'background'):
        os.remove(os.path.join(obj_val_dir + 'background', file))

In [60]:
crop_negative_objects(test_roi_paths, obj_val_dir, tot_pos_obj_val*5, 260, mean_std, 0.5)

Cropping negative objects on background...
     Cropped ROI  1 / 71  with  101  objects
     Cropped ROI  2 / 71  with  101  objects
     Cropped ROI  3 / 71  with  101  objects
     Cropped ROI  4 / 71  with  101  objects
     Cropped ROI  5 / 71  with  101  objects
     Cropped ROI  6 / 71  with  101  objects
     Cropped ROI  7 / 71  with  101  objects
     Cropped ROI  8 / 71  with  101  objects
     Cropped ROI  9 / 71  with  101  objects
     Cropped ROI  10 / 71  with  101  objects
     Cropped ROI  11 / 71  with  101  objects
     Cropped ROI  12 / 71  with  101  objects
     Cropped ROI  13 / 71  with  101  objects
     Cropped ROI  14 / 71  with  101  objects
     Cropped ROI  15 / 71  with  101  objects
     Cropped ROI  16 / 71  with  101  objects
     Cropped ROI  17 / 71  with  101  objects
     Cropped ROI  18 / 71  with  101  objects
     Cropped ROI  19 / 71  with  101  objects
     Cropped ROI  20 / 71  with  101  objects
     Cropped ROI  21 / 71  with  101  objects


In [61]:
if class_num == 2:
    print('train: ')    
    print('     object: ', len(os.listdir(obj_train_dir + 'object/')))
    print('     background: ', len(os.listdir(obj_train_dir + 'background/')))
    print('test: ')
    print('     object: ', len(os.listdir(obj_val_dir + 'object/')))
    print('     background: ', len(os.listdir(obj_val_dir + 'background/')))
elif class_num == 3:
    print('train: ')    
    print('     0: ', len(os.listdir(obj_train_dir + '0/')))
    print('     1: ', len(os.listdir(obj_train_dir + '1/')))
    print('     background: ', len(os.listdir(obj_train_dir + 'background/')))
    print('test: ')
    print('     0: ', len(os.listdir(obj_val_dir + '0/')))
    print('     1: ', len(os.listdir(obj_val_dir + '1/')))
    print('     background: ', len(os.listdir(obj_val_dir + 'background/')))

train: 
     0:  2871
     1:  11436
     background:  67429
test: 
     0:  653
     1:  2259
     background:  13653
