In [35]:
import pandas as pd
import numpy as np 
import sys
from skimage.io import imread
from tqdm import tqdm
import datetime    

### MASK CATEGORIES

In [36]:
MASK_CATEGORIES = ['trophozoite','ring', 'schizont', 'gametocyte']
#MASK_CATEGORIES = ['trophozoite']

## Load TRAIN dataset

In [37]:
RAW_IMAGES_PATH = '../../data/raw_data/malaria/images/'
BOUNDING_BOX_PATH =  '../../data/raw_data/malaria/'
BOUNDING_BOX_YOLO_PATH =  '../YOLO/darknet/data/malaria_yolo/'
IMG_CHANNELS = 3

In [38]:
train_bounding_box_df = pd.read_json(BOUNDING_BOX_PATH + 'training.json')
train_bounding_box_df['path'] = train_bounding_box_df['image'].map(lambda x: x['pathname'][1:])

print(train_bounding_box_df.shape[0], 'images')
print(train_bounding_box_df.shape[0], 'images available')
train_bounding_box_df.sample(5)

1208 images
1208 images available


Unnamed: 0,image,objects,path
20,{'checksum': '864f8a5ac3578cd53bc486f6f5b33540...,"[{'bounding_box': {'minimum': {'r': 628, 'c': ...",images/edcb4d77-a4d3-465d-9b59-eb3087766b1c.png
681,{'checksum': 'a54e2809d66b2d6b97e4a6e9dd26e916...,"[{'bounding_box': {'minimum': {'r': 1055, 'c':...",images/e0ef520d-7b65-485d-9888-e6859f189f70.png
676,{'checksum': '71e7a72c2fa68945a426829cbcd00ff9...,"[{'bounding_box': {'minimum': {'r': 220, 'c': ...",images/36569710-a574-40be-ad42-897a6e74a128.png
331,{'checksum': '2b565a42cd70e892b8040296894c7b24...,"[{'bounding_box': {'minimum': {'r': 641, 'c': ...",images/49efc89d-9cba-44f7-8257-7f911b73f68a.png
518,{'checksum': 'd51a8e5680a2ef056201bbb617ac6bf9...,"[{'bounding_box': {'minimum': {'r': 379, 'c': ...",images/5721e41b-90ef-4851-8587-8dc71227ef90.png


In [39]:
objects = []
for im_index, c_row in train_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']
    img = imread( img_path )[:,:,:IMG_CHANNELS]
    im_height = img.shape[0]
    im_width = img.shape[1]
    

    for c_item in c_row['objects']:
        c_item.update({'im_index':im_index})
        c_item.update({'im_height':im_height})
        c_item.update({'im_width':im_width})
        objects.append(dict(image=c_row['path'], **c_item))  
    
object_df = pd.DataFrame(objects)

In [40]:
cat_dict = {v:k for k,v in enumerate(object_df['category'].value_counts().index, 1)}
print(object_df['category'].value_counts())
object_df.sample(5)

red blood cell    77420
trophozoite        1473
difficult           441
ring                353
schizont            179
gametocyte          144
leukocyte           103
Name: category, dtype: int64


Unnamed: 0,bounding_box,category,im_height,im_index,im_width,image
55839,"{'minimum': {'r': 1037, 'c': 1486}, 'maximum':...",red blood cell,1200,855,1600,images/68f34a9c-f73d-4238-8f55-4b05a8a2154c.png
19096,"{'minimum': {'r': 418, 'c': 1245}, 'maximum': ...",red blood cell,1200,285,1600,images/6e39fb0a-aa41-4b16-b783-7589e97bf289.png
67921,"{'minimum': {'r': 561, 'c': 1353}, 'maximum': ...",red blood cell,1200,1034,1600,images/41ebf9a1-24dc-454e-8bf8-508218f5ee57.png
31830,"{'minimum': {'r': 979, 'c': 522}, 'maximum': {...",red blood cell,1200,496,1600,images/f62d4fb6-df7c-446c-94a8-d5aede0b1e0c.png
47453,"{'minimum': {'r': 479, 'c': 1089}, 'maximum': ...",red blood cell,1200,736,1600,images/f7231477-f2c1-439a-9c8b-204de07686a4.png


In [41]:
cat_dict

{'red blood cell': 1,
 'trophozoite': 2,
 'difficult': 3,
 'ring': 4,
 'schizont': 5,
 'gametocyte': 6,
 'leukocyte': 7}

In [42]:
CATEGORIES_DICT = { i : cat for i,cat in enumerate(MASK_CATEGORIES) }
CATEGORIES_DICT
#inv_map = {v: k for k, v in my_map.items()}

{0: 'trophozoite', 1: 'ring', 2: 'schizont', 3: 'gametocyte'}

In [43]:
key_cat_dict = {v: k for k, v in CATEGORIES_DICT.items()}
key_cat_dict

{'trophozoite': 0, 'ring': 1, 'schizont': 2, 'gametocyte': 3}

In [44]:
print('Generating YOLO Data Anotation ... ')
sys.stdout.flush()
object_df_count = object_df.shape[0]
#object_df_count = 500

training_set = []
for n, row in tqdm(object_df.iterrows(), total=object_df_count):
    #print(row)
    #print(n)
    min_val = row['bounding_box']['minimum']
    max_val = row['bounding_box']['maximum']
    im_index = row['im_index']
    im_height = row['im_height']
    im_width = row['im_width']
    
    current_category = row['category']
    if(current_category in MASK_CATEGORIES) :
        #print("")
        training_set.append(row["image"])
        file_name = row["image"][7:-3] + "txt"
        #print(file_name)
        
        absolute_height = max_val['r'] - min_val['r']
        absolute_width = max_val['c'] -min_val['c']
        
        absolute_y =  (max_val['r'] + min_val['r']) / 2.
        absolute_x =  (max_val['c'] + min_val['c']) / 2.
        
        y = absolute_y / im_height
        x = absolute_x / im_width
        
        height = absolute_height / im_height
        width  = absolute_width / im_width
        
        #print("absolute_y",absolute_y)
        #print("absolute_x", absolute_x)
        #print("absolute_height",absolute_height)
        #print("absolute_width",absolute_width)
        
        out = "{0} {1} {2} {3} {4}".format(key_cat_dict[current_category], x,y,width, height)
        f = open(BOUNDING_BOX_YOLO_PATH + file_name, "a")
        f.write(out+ "\n")
        #print(out)
    

   

Generating YOLO Data Anotation ... 


100%|██████████| 80113/80113 [00:09<00:00, 8883.45it/s]


# TEST DATA SET

In [45]:
test_bounding_box_df = pd.read_json(BOUNDING_BOX_PATH + 'test.json')
test_bounding_box_df['path'] = test_bounding_box_df['image'].map(lambda x:  x['pathname'][1:])

print(test_bounding_box_df.shape[0], 'images')
print(test_bounding_box_df.shape[0], 'images available')

120 images
120 images available


In [46]:
objects_test = []
for im_index, c_row in test_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']
    img = imread( img_path )[:,:,:IMG_CHANNELS]
    im_height = img.shape[0]
    im_width = img.shape[1]
    
    for c_item in c_row['objects']:
        c_item.update({'im_index':im_index})
        c_item.update({'im_height':im_height})
        c_item.update({'im_width':im_width})
        objects_test.append(dict(image=c_row['path'], **c_item))
        
object_test_df = pd.DataFrame(objects_test)

In [47]:
cat_dict_test = {v:k for k,v in enumerate(object_test_df['category'].value_counts().index, 1)}
print(object_test_df['category'].value_counts())
object_test_df.sample(5)

red blood cell    5614
ring               169
trophozoite        111
gametocyte          12
schizont            11
difficult            5
Name: category, dtype: int64


Unnamed: 0,bounding_box,category,im_height,im_index,im_width,image
5412,"{'minimum': {'r': 1039, 'c': 1486}, 'maximum':...",red blood cell,1383,108,1944,images/4da8f6c5-5f85-4280-92d0-1ba008e8c404.jpg
907,"{'minimum': {'r': 536, 'c': 953}, 'maximum': {...",red blood cell,1383,19,1944,images/8874ea02-d263-4830-99d1-e1b14230a56b.jpg
5525,"{'minimum': {'r': 564, 'c': 1520}, 'maximum': ...",red blood cell,1383,110,1944,images/623982a0-1f73-4246-92cb-28f7c219efaf.jpg
405,"{'minimum': {'r': 931, 'c': 872}, 'maximum': {...",red blood cell,1383,6,1944,images/d9ecacb0-14c7-4862-930b-18bb51d5f392.jpg
3932,"{'minimum': {'r': 1015, 'c': 1218}, 'maximum':...",red blood cell,1383,75,1944,images/8448ac8c-fa7a-475a-9ca2-dc0174f78a39.jpg


In [48]:
print('Generating YOLO Data Anotation ... ')
sys.stdout.flush()
object_df_count = object_test_df.shape[0]
#object_df_count = 500

test_set = []
for n, row in tqdm(object_test_df.iterrows(), total=object_df_count):
    #print(row)
    #print(n)
    min_val = row['bounding_box']['minimum']
    max_val = row['bounding_box']['maximum']
    im_index = row['im_index']
    im_height = row['im_height']
    im_width = row['im_width']
    
    current_category = row['category']
    if(current_category in MASK_CATEGORIES) :
        #print("")
        test_set.append(row["image"])
        file_name = row["image"][7:-3] + "txt"
        #print(file_name)
        
        absolute_height = max_val['r'] - min_val['r']
        absolute_width = max_val['c'] -min_val['c']
        
        absolute_y =  min_val['r'] + (absolute_height / 2)
        absolute_x =  min_val['c'] + (absolute_width / 2)
        
        y = absolute_y / im_height
        x = absolute_x / im_width
        
        height = absolute_height / im_height
        width  = absolute_width / im_width
        
        #print("absolute_y",absolute_y)
        #print("absolute_x", absolute_x)
        #print("absolute_height",absolute_height)
        #print("absolute_width",absolute_width)
        
        out = "{0} {1} {2} {3} {4}".format(key_cat_dict[current_category], x,y,width, height)
        f = open(BOUNDING_BOX_YOLO_PATH + file_name, "a")
        f.write(out+ "\n")
        #print(out)
    

   

Generating YOLO Data Anotation ... 


100%|██████████| 5922/5922 [00:00<00:00, 8222.78it/s]


In [18]:
test = np.unique(test_set)
test.shape

(115,)

In [19]:
train = np.unique(training_set)
train.shape

(888,)

### Creating training objects

#### train.txt

In [21]:
## TRAIN
for t in tqdm(train, total=train.shape[0]):
    train_item = "data/malaria_yolo/"+t 
    f = open("train.txt", "a")
    f.write(train_item + "\n")
f.close()    

100%|██████████| 888/888 [00:00<00:00, 33711.75it/s]


#### test.txt

In [22]:
## TEST
for t in tqdm(test, total=test.shape[0]):
    train_item = "data/malaria_yolo/"+t 
    f = open("test.txt", "a")
    f.write(train_item + "\n")
f.close()    

100%|██████████| 115/115 [00:00<00:00, 11863.47it/s]


#### obj.names

In [27]:
for k, v in CATEGORIES_DICT.items():
    print(v)
    f = open("obj.names", "a")
    f.write(v + "\n")
f.close() 

trophozoite
ring
schizont
gametocyte


## MODEL NAME

In [33]:
str_masks_name = '_'.join(MASK_CATEGORIES)

In [34]:
TODAY =  datetime.datetime.now().strftime("%Y%m%d%H%M")
nn_name = "YOLOv3-malaria_" +  "_" +TODAY + "_" + str_masks_name 
nn_name

'YOLOv3-malaria__201908081202_trophozoite_ring_schizont_gametocyte'

In [58]:
import cv2
import os
import matplotlib.pyplot as plt

In [62]:
video_name = 'video.mp4'
objects_test = []
IMG_WIDTH = 1944	
IMG_HEIGHT = 1383
fps = 0.5

video = cv2.VideoWriter(video_name,cv2.VideoWriter_fourcc(*'DIVX'), fps, (IMG_WIDTH,IMG_HEIGHT))

for im_index, c_row in test_bounding_box_df.iterrows():
    ## Get image heigth and weigth
    img_path = BOUNDING_BOX_YOLO_PATH + c_row['path']

    frame = cv2.imread(img_path)
    height, width, layers = frame.shape
    #print(height, width)
    
    if ((height==IMG_HEIGHT) and (IMG_WIDTH==width)):
        video.write(frame)

cv2.destroyAllWindows()
video.release()    

In [None]:
fps = 0.5
frame_array = []
files = [f for f in os.listdir(pathIn) if isfile(join(pathIn, f))]
#for sorting the file names properly
files.sort(key = lambda x: x[5:-4])
files.sort()
frame_array = []
files = [f for f in os.listdir(pathIn) if isfile(join(pathIn, f))]
#for sorting the file names properly
files.sort(key = lambda x: x[5:-4])
for i in range(len(files)):
    filename=pathIn + files[i]
    #reading each files
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width,height)
    
    #inserting the frames into an image array
    frame_array.append(img)
out = cv2.VideoWriter(pathOut,cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
for i in range(len(frame_array)):
    # writing to a image array
    out.write(frame_array[i])
out.release()
