In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import patches
import pandas as pd
import os 
import shutil 
import numpy as np 
import random
from PIL import Image, ImageDraw
from sklearn.model_selection import train_test_split


Load matlab parquet

In [None]:

# load parquet from matlab
df = pd.read_parquet('EyeLabelling.parquet')
df = df.rename(columns={'Var1':'Image', 'Var2':'Box_Type', 
                            'Var3':'Xmin', 'Var4':'Ymin', 'Var5':'Xmax', 
                            'Var6':'Ymax'})

# set types as int
df['Xmin'] = df['Xmin'].astype(int)
df['Xmax'] = df['Xmax'].astype(int)
df['Ymin'] = df['Ymin'].astype(int)
df['Ymax'] = df['Ymax'].astype(int)
print(df)


Pull matlab from annotate file and put into it's own dict for that image

In [None]:
def convert_matlab_to_yolov5(image_name):
    #initialise the dict
    info_dict = {}
    info_dict['bboxes'] = []
    df2 = pd.DataFrame(columns=df.columns)

    cond = df.Image == image_name
    rows = df.loc[cond, :]
    df2 = df2.append(rows, ignore_index=True)
    info_dict['filename'] = image_name
    target_image = "./images/" + image_name
    
    im = Image.open(target_image)
    size_w_dim = im.size + (3,)
    info_dict['image_size'] = size_w_dim
    # rename rows
    for index, row in df2.iterrows():
        bbox = {}
        bbox["class"] = row['Box_Type']
        bbox['xmin'] = row['Xmin']
        bbox['ymin'] = row['Ymin']
        bbox['xmax'] = row['Xmax']
        bbox['ymax'] = row['Ymax']
        info_dict['bboxes'].append(bbox)
        
    return info_dict


Conver to required yolo format

In [None]:
# Dictionary that maps class names to IDs
class_name_to_id_mapping = {"Haemorrhage": 0,
                           "Exudate": 1}

def convert_to_yolov(info_dict):
    print_buffer = []
    
    # For each bounding box
    for b in info_dict["bboxes"]:
        try:
            class_id = class_name_to_id_mapping[b["class"]]
        except KeyError:
            print("Invalid Class. Must be one from ", class_name_to_id_mapping.keys())
        
        # Transform the bbox co-ordinates as per the format required by YOLO v5
        b_center_x = (b["xmin"] + b["xmax"]) / 2 
        b_center_y = (b["ymin"] + b["ymax"]) / 2
        b_width    = (b["xmax"] - b["xmin"])
        b_height   = (b["ymax"] - b["ymin"])
        
        # Normalise the co-ordinates by the dimensions of the image
        image_w, image_h, image_c = info_dict["image_size"]  
        b_center_x /= image_w 
        b_center_y /= image_h 
        b_width    /= image_w 
        b_height   /= image_h 
        
        #Write the bbox details to the file 
        print_buffer.append("{} {:.3f} {:.3f} {:.3f} {:.3f}".format(class_id, b_center_x, b_center_y, b_width, b_height))

        print("\n".join(print_buffer), file= open('./annotations/' + info_dict["filename"].replace("jpg", "txt"),'w'))


Create markdown file for each image

In [None]:
#create annotation files for each image
unique_images = df.Image.unique()

for name in unique_images:
    info_dict = convert_matlab_to_yolov5(name)
    convert_to_yolov(info_dict)
annotations = [os.path.join('annotations', x) for x in os.listdir('annotations') if x[-3:] == "txt"]

Plot bounding box on image

In [None]:
random.seed(1)

class_id_to_name_mapping = dict(zip(class_name_to_id_mapping.values(), class_name_to_id_mapping.keys()))

def plot_bounding_box(image, annotation_list):
    annotations = np.array(annotation_list)
    print(annotations)
    w, h = image.size
    
    plotted_image = ImageDraw.Draw(image)
    # transform annotations to match yolov5 required
    transformed_annotations = np.copy(annotations)
    transformed_annotations[:,[1,3]] = annotations[:,[1,3]] * w
    transformed_annotations[:,[2,4]] = annotations[:,[2,4]] * h 
    
    transformed_annotations[:,1] = transformed_annotations[:,1] - (transformed_annotations[:,3] / 2)
    transformed_annotations[:,2] = transformed_annotations[:,2] - (transformed_annotations[:,4] / 2)
    transformed_annotations[:,3] = transformed_annotations[:,1] + transformed_annotations[:,3]
    transformed_annotations[:,4] = transformed_annotations[:,2] + transformed_annotations[:,4]
    
    for ann in transformed_annotations:
        obj_cls, x0, y0, x1, y1 = ann
        plotted_image.rectangle(((x0,y0), (x1,y1)))
        
        plotted_image.text((x0, y0 - 10), class_id_to_name_mapping[(int(obj_cls))])
    
    plt.imshow(np.array(image))
    plt.show()



Show example file:

In [None]:
# Get any random annotation file 
annotation_file = random.choice(annotations)
print(annotation_file)
with open(annotation_file, "r") as file:
    annotation_list = file.read().split("\n")[:-1]
    annotation_list = [x.split(" ") for x in annotation_list]
    annotation_list = [[float(y) for y in x ] for x in annotation_list]

# #Get the corresponding image file
image_file = annotation_file.replace("annotations", "images").replace("txt", "jpg")
assert os.path.exists(image_file)

# #Load the image
image = Image.open(image_file)

#Plot the Bounding Box
plot_bounding_box(image, annotation_list)

In [None]:
# Read images and annotations
images = [os.path.join('images', x) for x in os.listdir('images')]
annotations = [os.path.join('annotations', x) for x in os.listdir('annotations') if x[-3:] == "txt"]

images.sort()
annotations.sort()

# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotations, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

In [None]:
os.mkdir('images/train')
os.mkdir('images/val')
os.mkdir('images/test')
os.mkdir('annotations/train')
os.mkdir('annotations/val')
os.mkdir('annotations/test')

In [194]:
#Utility function to move images 
def move_files_to_folder(list_of_files, destination_folder):
    for f in list_of_files:
        try:
            shutil.move(f, destination_folder)
        except:
            print(f)
            assert False

#Move the splits into their folders
move_files_to_folder(train_images, 'images/train')
move_files_to_folder(val_images, 'images/val/')
move_files_to_folder(test_images, 'images/test/')
move_files_to_folder(train_annotations, 'annotations/train/')
move_files_to_folder(val_annotations, 'annotations/val/')
move_files_to_folder(test_annotations, 'annotations/test/')

Train using YoloV5 repository (https://github.com/ultralytics/yolov5) to generate model and weights

In [6]:
!python ../example/yolov5/detect.py --source ./images/test/ --weights ../example/yolov5/runs/train/eye_det12/weights/best.pt --conf 0.25 --name eye_det --line-thickness 1 --hide-conf --imgsz 512 --project ./detect

[34m[1mdetect: [0mweights=['../example/yolov5/runs/train/eye_det12/weights/best.pt'], source=./images/test/, data=..\example\yolov5\data\coco128.yaml, imgsz=[512, 512], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=./detect, name=eye_det, exist_ok=False, line_thickness=1, hide_labels=False, hide_conf=True, half=False, dnn=False
YOLOv5  v6.1-135-g7926afc torch 1.11.0 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12287MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
image 1/69 C:\Users\sheff\Desktop\code\yolov5project\images\test\1035_right.jpg: 512x512 Done. (0.006s)
image 2/69 C:\Users\sheff\Desktop\code\yolov5project\images\test\1094_right.jpg: 512x512 Done. (0.007s)
image 3/69 C:\Users\sheff\Desktop\code\yolov5project\images\test\1157_right.jpg: 512x512 5 Haemorrhages