In [7]:
import pandas as pd
import numpy as np 
import sys
from skimage.io import imread
from tqdm import tqdm
import datetime    

### MASK CATEGORIES

In [8]:
MASK_CATEGORIES = ['trophozoite','ring', 'schizont', 'gametocyte']
#MASK_CATEGORIES = ['trophozoite']

In [9]:
key_cat_dict = {'trophozoite': 0, 'ring': 0, 'schizont': 1, 'gametocyte': 2}

## Load TRAIN dataset

In [10]:
RAW_IMAGES_PATH = '../../data/raw_data/malaria/images/'
BOUNDING_BOX_PATH =  '../../data/raw_data/malaria/'
BOUNDING_BOX_YOLO_PATH =  '../YOLO/darknet/data/malaria_yolo/'
IMG_CHANNELS = 3

In [11]:
train_bounding_box_df = pd.read_json(BOUNDING_BOX_PATH + 'training.json')
train_bounding_box_df['path'] = train_bounding_box_df['image'].map(lambda x: x['pathname'][1:])

print(train_bounding_box_df.shape[0], 'images')
print(train_bounding_box_df.shape[0], 'images available')
train_bounding_box_df.sample(5)

1208 images
1208 images available


Unnamed: 0,image,objects,path
195,{'checksum': '65ac80ea8d0d43e027977208251c2d2b...,"[{'bounding_box': {'minimum': {'r': 899, 'c': ...",images/0901f9d1-be4f-4c19-8e83-dd724dded9f5.png
531,{'checksum': 'e6ea7ea21a343f85bd503de6dd6d644d...,"[{'bounding_box': {'minimum': {'r': 143, 'c': ...",images/1012abf2-cb05-4337-86c5-513ec4ca904d.png
83,{'checksum': '487d5566ea4614e671b37cbb4ad36381...,"[{'bounding_box': {'minimum': {'r': 749, 'c': ...",images/729733ff-a349-4a96-b636-ccabe7d7e167.png
1097,{'checksum': 'a27cbfca2858acd1ce8d9bebc95689ef...,"[{'bounding_box': {'minimum': {'r': 416, 'c': ...",images/a15e0fa8-2620-4f66-896e-175115c9dc3d.png
763,{'checksum': 'fcd2b53e1826188f7a4d84a1f76d79e2...,"[{'bounding_box': {'minimum': {'r': 30, 'c': 1...",images/99b13257-b9d9-422f-8262-802da0bdc510.png


In [12]:
objects = []
for im_index, c_row in train_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']
    img = imread( img_path )[:,:,:IMG_CHANNELS]
    im_height = img.shape[0]
    im_width = img.shape[1]
    

    for c_item in c_row['objects']:
        c_item.update({'im_index':im_index})
        c_item.update({'im_height':im_height})
        c_item.update({'im_width':im_width})
        objects.append(dict(image=c_row['path'], **c_item))  
    
object_df = pd.DataFrame(objects)

In [13]:
cat_dict = {v:k for k,v in enumerate(object_df['category'].value_counts().index, 1)}
print(object_df['category'].value_counts())
object_df.sample(5)

red blood cell    77420
trophozoite        1473
difficult           441
ring                353
schizont            179
gametocyte          144
leukocyte           103
Name: category, dtype: int64


Unnamed: 0,bounding_box,category,im_height,im_index,im_width,image
23708,"{'minimum': {'r': 930, 'c': 1387}, 'maximum': ...",red blood cell,1200,369,1600,images/79d7a014-21ff-40f1-9ac4-a69839390654.png
709,"{'minimum': {'r': 923, 'c': 162}, 'maximum': {...",red blood cell,1200,14,1600,images/dcc1a193-7b74-44f8-ad8d-813751d8fa6b.png
5237,"{'minimum': {'r': 436, 'c': 175}, 'maximum': {...",red blood cell,1200,83,1600,images/729733ff-a349-4a96-b636-ccabe7d7e167.png
48194,"{'minimum': {'r': 94, 'c': 1115}, 'maximum': {...",red blood cell,1200,748,1600,images/913099c5-8085-4908-bf4b-1f098bbfcf66.png
55533,"{'minimum': {'r': 11, 'c': 186}, 'maximum': {'...",red blood cell,1200,853,1600,images/a27b451d-44a6-4740-861a-9e7e99915563.png


In [17]:
cat_dict

{'red blood cell': 1,
 'trophozoite': 2,
 'difficult': 3,
 'ring': 4,
 'schizont': 5,
 'gametocyte': 6,
 'leukocyte': 7}

In [15]:
CATEGORIES_DICT = { i : cat for i,cat in enumerate(MASK_CATEGORIES) }
CATEGORIES_DICT

{0: 'trophozoite', 1: 'ring', 2: 'schizont', 3: 'gametocyte'}

In [27]:
print('Generating YOLO Training Data Anotation ... ')
sys.stdout.flush()
object_df_count = object_df.shape[0]
object_class_log = []


training_set = []
for n, row in tqdm(object_df.iterrows(), total=object_df_count):
    min_val = row['bounding_box']['minimum']
    max_val = row['bounding_box']['maximum']
    im_index = row['im_index']
    im_height = row['im_height']
    im_width = row['im_width']
    
    current_category = row['category']
    if(current_category in MASK_CATEGORIES) :
        training_set.append(row["image"])
        file_name = row["image"][7:-3] + "txt"
        
        absolute_height = max_val['r'] - min_val['r']
        absolute_width = max_val['c'] -min_val['c']
        
        absolute_y =  (max_val['r'] + min_val['r']) / 2.
        absolute_x =  (max_val['c'] + min_val['c']) / 2.
        
        y = absolute_y / im_height
        x = absolute_x / im_width
        
        height = absolute_height / im_height
        width  = absolute_width / im_width
        
        #print("absolute_y",absolute_y)
        #print("absolute_x", absolute_x)
        #print("absolute_height",absolute_height)
        #print("absolute_width",absolute_width)
        category = key_cat_dict[current_category]
        object_class_log.append(category)
        
        out = "{0} {1} {2} {3} {4}".format(category, x,y,width, height)
        f = open(BOUNDING_BOX_YOLO_PATH + "images/" + file_name, "a")
        f.write(out+ "\n")

    

   

Generating YOLO Training Data Anotation ... 


100%|██████████| 80113/80113 [00:09<00:00, 8342.99it/s]


In [29]:
print(object_class_log.count(0))
print(object_class_log.count(1))
print(object_class_log.count(2))

1826
179
144


# TEST DATA SET

In [30]:
test_bounding_box_df = pd.read_json(BOUNDING_BOX_PATH + 'test.json')
test_bounding_box_df['path'] = test_bounding_box_df['image'].map(lambda x:  x['pathname'][1:])

print(test_bounding_box_df.shape[0], 'images')
print(test_bounding_box_df.shape[0], 'images available')

120 images
120 images available


In [31]:
objects_test = []
for im_index, c_row in test_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']
    img = imread( img_path )[:,:,:IMG_CHANNELS]
    im_height = img.shape[0]
    im_width = img.shape[1]
    
    for c_item in c_row['objects']:
        c_item.update({'im_index':im_index})
        c_item.update({'im_height':im_height})
        c_item.update({'im_width':im_width})
        objects_test.append(dict(image=c_row['path'], **c_item))
        
object_test_df = pd.DataFrame(objects_test)

In [32]:
cat_dict_test = {v:k for k,v in enumerate(object_test_df['category'].value_counts().index, 1)}
print(object_test_df['category'].value_counts())
object_test_df.sample(5)

red blood cell    5614
ring               169
trophozoite        111
gametocyte          12
schizont            11
difficult            5
Name: category, dtype: int64


Unnamed: 0,bounding_box,category,im_height,im_index,im_width,image
4676,"{'minimum': {'r': 612, 'c': 1019}, 'maximum': ...",red blood cell,1383,90,1944,images/6d9a2241-2cce-42b3-a11d-a3a6b432dd89.jpg
2404,"{'minimum': {'r': 804, 'c': 472}, 'maximum': {...",red blood cell,1383,48,1944,images/604478c7-1259-441a-8ad0-7059e191c9e4.jpg
3911,"{'minimum': {'r': 563, 'c': 707}, 'maximum': {...",red blood cell,1383,75,1944,images/8448ac8c-fa7a-475a-9ca2-dc0174f78a39.jpg
1472,"{'minimum': {'r': 763, 'c': 997}, 'maximum': {...",red blood cell,1383,30,1944,images/78ae52cf-8dad-4070-a40e-9af02faec1f0.jpg
5900,"{'minimum': {'r': 95, 'c': 528}, 'maximum': {'...",red blood cell,1383,119,1944,images/887cc81a-bae3-4360-a115-23ae05ac3a4f.jpg


In [33]:
print('Generating YOLO Data Anotation ... ')
sys.stdout.flush()
object_df_count = object_test_df.shape[0]
object_class_log = []

test_set = []
for n, row in tqdm(object_test_df.iterrows(), total=object_df_count):
    #print(row)
    #print(n)
    min_val = row['bounding_box']['minimum']
    max_val = row['bounding_box']['maximum']
    im_index = row['im_index']
    im_height = row['im_height']
    im_width = row['im_width']
    
    current_category = row['category']
    if(current_category in MASK_CATEGORIES) :
        #print("")
        test_set.append(row["image"])
        file_name = row["image"][7:-3] + "txt"
        
        absolute_height = max_val['r'] - min_val['r']
        absolute_width = max_val['c'] -min_val['c']
        
        absolute_y =  min_val['r'] + (absolute_height / 2)
        absolute_x =  min_val['c'] + (absolute_width / 2)
        
        y = absolute_y / im_height
        x = absolute_x / im_width
        
        height = absolute_height / im_height
        width  = absolute_width / im_width
        
        #print("absolute_y",absolute_y)
        #print("absolute_x", absolute_x)
        #print("absolute_height",absolute_height)
        #print("absolute_width",absolute_width)
        category = key_cat_dict[current_category]
        object_class_log.append(category)
        
        out = "{0} {1} {2} {3} {4}".format(category, x,y,width, height)
        f = open(BOUNDING_BOX_YOLO_PATH+ "images/" + file_name, "a")
        f.write(out+ "\n")
        #print(out)
    

   

Generating YOLO Data Anotation ... 


100%|██████████| 5922/5922 [00:00<00:00, 8741.88it/s]


In [34]:
print(object_class_log.count(0))
print(object_class_log.count(1))
print(object_class_log.count(2))

280
11
12


In [35]:
test = np.unique(test_set)
test.shape

(115,)

In [36]:
train = np.unique(training_set)
train.shape

(888,)

### Creating training objects

#### train.txt

In [21]:
## TRAIN
for t in tqdm(train, total=train.shape[0]):
    train_item = "data/malaria_yolo/"+t 
    f = open("train.txt", "a")
    f.write(train_item + "\n")
f.close()    

100%|██████████| 888/888 [00:00<00:00, 33711.75it/s]


#### test.txt

In [22]:
## TEST
for t in tqdm(test, total=test.shape[0]):
    train_item = "data/malaria_yolo/"+t 
    f = open("test.txt", "a")
    f.write(train_item + "\n")
f.close()    

100%|██████████| 115/115 [00:00<00:00, 11863.47it/s]


#### obj.names

In [27]:
for k, v in CATEGORIES_DICT.items():
    print(v)
    f = open("obj.names", "a")
    f.write(v + "\n")
f.close() 

trophozoite
ring
schizont
gametocyte


# Training Commands

### Split training-validation set

In [39]:
all_training_dataset = open("all_train_data_set.txt", "r")

In [40]:
trainig_count = 0
validation_count = 0

for l in all_training_dataset:
    if (np.random.rand(1) > 0.2):
        ## Training
        trainig_count += 1
        f = open("train.txt", "a")
        f.write(l)
    else:
        ## Validation
        validation_count += 1
        f = open("valid.txt", "a")
        f.write(l)
        
all_training_dataset.close()
print(trainig_count)
print(validation_count)
    

703
185


### MODEL NAME

##### How to improve object detection:
- increase network resolution in your .cfg-file (height=608, width=608 or any value multiple of 32) - it will increase precision

In [42]:
str_masks_name = '_'.join(MASK_CATEGORIES)

In [59]:
IMG_WIDTH = 512
IMG_HEIGHT = 512
TODAY =  datetime.datetime.now().strftime("%Y%m%d%H%M")

nn_name = "YOLOv2-malaria" +  "_" +TODAY + "_" + str_masks_name + "_" + str(IMG_WIDTH) + "_" + str(IMG_HEIGHT)
nn_name

'YOLOv2-malaria_201908120022_trophozoite_ring_schizont_gametocyte_512_512'

In [41]:
nn_name = "YOLOv2-malaria_20190818_trofozoito_esquizonte_gametocito_512_512"
#nn_name = "YOLOv2-malaria_20190818_trofozoito_esquizonte_gametocito_1024_1024"

In [42]:
"./darknet detector train data/malaria_yolo/obj.data data/malaria_yolo/{0}.cfg -map > {0}.log".format(nn_name)

'./darknet detector train data/malaria_yolo/obj.data data/malaria_yolo/YOLOv2-malaria_20190818_trofozoito_esquizonte_gametocito_512_512.cfg -map > YOLOv2-malaria_20190818_trofozoito_esquizonte_gametocito_512_512.log'

In [58]:
"tail -f {0}.log".format(nn_name)

'tail -f YOLO-malaria_201908112336_trophozoite_ring_schizont_gametocyte_800_800.log'

### MODELS LOG

- YOLO-malaria_201908112336_trophozoite_ring_schizont_gametocyte_800_800


# TEST COMMANDS

### Generate video

In [20]:
import cv2
import os
import matplotlib.pyplot as plt

In [21]:
BOUNDING_BOX_YOLO_PATH

'../YOLO/darknet/data/malaria_yolo/'

In [22]:
video_name = 'video_2.mp4'
objects_test = []
IMG_WIDTH = 1944	
IMG_HEIGHT = 1383
fps = 0.

video = cv2.VideoWriter(video_name,cv2.VideoWriter_fourcc(*'DIVX'), fps, (IMG_WIDTH,IMG_HEIGHT))

for im_index, c_row in test_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']

    frame = cv2.imread(img_path)
    height, width, layers = frame.shape
    #print(height, width)
    
    if ((height==IMG_HEIGHT) and (IMG_WIDTH==width)):
        video.write(frame)

cv2.destroyAllWindows()
video.release()    