In [None]:
pip install ultralytics


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
import pandas as pd
import os
from skimage.measure import label, regionprops
import shutil
from tqdm import tqdm
import glob
from skimage import exposure

In [None]:
class CFG:
    path_xlsx = "../input/miniddsm2/MINI-DDSM-Complete-JPEG-8/DataWMask.xlsx"
    path_img ="../input/miniddsm2/MINI-DDSM-Complete-JPEG-8/"
    path_more_two_mask = "../input/miniddsm2/Data-MoreThanTwoMasks/"
    ratio_train = 0.7
    ratio_valid = 0.2
    ratio_test = 0.1
    IMG_SIZE = 1280

In [None]:
!pip install openpyxl
data = pd.read_excel(CFG.path_xlsx, engine='openpyxl')

In [None]:
data.head()

In [None]:
import cv2
# Function for CLAHE (Contrast Limited Adaptive Histogram Equalization)
def apply_clahe(image):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(image)

In [None]:
%pylab inline
import matplotlib.pyplot as plt
import numpy as np


# Jupyter Cell 5

def show_img(img):
    figure(figsize=(30, 15), dpi=50)
    imgplot = plt.imshow(img)
    plt.show()

In [None]:
data.head(5)

In [None]:
import random

In [None]:
for status in data['Status'].unique():
    sub_data = data[data['Status']==status]
    for index in tqdm(range(data.shape[0]), total=data.shape[0]):
        point = data.iloc[index]
        path_img = CFG.path_img + point['fullPath'].replace('\\', '/')
        status = point['Status']
        mask1 = CFG.path_img + point['Tumour_Contour'].replace('\\', '/')
        mask2 = CFG.path_img + point['Tumour_Contour2'].replace('\\', '/')
        patient_id = point['fullPath'].split("\\")[1]
        mask_path = []
        if mask1.split('/')[-1] != '-':
            mask_path = [mask1]
        if mask2.split('/')[-1] != '-':
            mask_path.append(mask2)
            path_more_two_mask = CFG.path_more_two_mask + status + "/" + patient_id + "/"
            specific_type = point['fullPath'].split("\\")[2].replace('.jpg', '')
            for mask_id in range(3, 10):
                path_extend_mask = path_more_two_mask + specific_type + '_Mask' + str(mask_id) + ".png"
                if not os.path.exists(path_extend_mask):
                    pass
                else:
                    mask_path.append(path_extend_mask)
        #### make dirs ####
        path_image_status = [path_result_img + "/" + status + "/" + patient_id,
                             path_result_label + "/" + status + "/" + patient_id]
        os.makedirs(path_image_status[0], exist_ok=True)
        os.makedirs(path_image_status[1], exist_ok=True)
        ### make folder result ###
        path_image_new = path_result_img + "/" + point['fullPath'].replace('\\', '/')
        path_txt_new = path_result_label + "/" + point['fullPath'].replace('\\', '/').replace('.jpg', '.txt')
        img_main = cv2.imread(path_img)
        
        # Preprocess image
        img_main = cv2.cvtColor(img_main, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
        img_main = apply_clahe(img_main)  # Apply CLAHE
        img_main = cv2.resize(img_main, (CFG.IMG_SIZE, CFG.IMG_SIZE))  # Resize
        img_main = img_main.astype(np.float32) / 255.0  # Normalize
        
        # Display the preprocessed image
        show_img(img_main)
        
        cv2.imwrite(path_image_new, img_main)

        file_txt = open(path_txt_new, 'w')
        yolo_txt = []
        for path_mask_image in mask_path:
            if not os.path.exists(path_mask_image):
                continue
            mask = cv2.imread(path_mask_image)
            mask[(mask > 0) & (mask < 255)] = 255

            cancer_one_channel = mask[:, :, 0]
            bboxs = regionprops(cancer_one_channel)

            for prop in bboxs:
                bb = list(prop.bbox)
                bb[0], bb[2] = bb[0] * scale_y, bb[2] * scale_y 
                bb[1], bb[3] = bb[1] * scale_x, bb[3] * scale_x  
                w = mask.shape[1] * scale_x
                h = mask.shape[0] * scale_y
                if visualize:
                    mask_rs = cv2.resize(mask, dsize=(CFG.IMG_SIZE, CFG.IMG_SIZE), interpolation=cv2.INTER_AREA)
                    complete_rec = cv2.rectangle(img_main_rs, (int(bb[1]), int(bb[0])), (int(bb[3]), int(bb[2])), (255, 0, 0), 2)
                    mask_rec = cv2.rectangle(mask_rs, (int(bb[1]), int(bb[0])), (int(bb[3]), int(bb[2])), (255, 0, 0), 2)

                    show_img(img=complete_rec)
                    show_img(img=mask_rec)

                center_x = (bb[1] + bb[3]) / (2 * w)
                center_y = (bb[0] + bb[2]) / (2 * h)
                height_norm = (bb[2] - bb[0]) / h
                width_norm = (bb[3] - bb[1]) / w
                if center_x > 1 or center_y > 1 or width_norm > 1 or height_norm > 1:
                    continue
                yolo_txt.append("{} {} {} {} {}".format(mapping[status], \
                                                        str(np.round(center_x, 4)), \
                                                        str(np.round(center_y, 4)), \
                                                        str(np.round(width_norm, 4)), \
                                                        str(np.round(height_norm, 4))))
        yolo_txt = "\n".join(yolo_txt)
        yolo_txt = yolo_txt[:-1]
        file_txt.write(yolo_txt)
        file_txt.close()

In [None]:
for col in ['Status']:
    for breast_type in data[col].unique():
        print(breast_type,' ',data[data['Status']==breast_type].shape[0]/data.shape[0])

In [None]:
data[data['Tumour_Contour']!='-'].shape[0]/data.shape[0]

In [None]:
original = "./data/"
path_yolo = "./yolo_data/"
os.makedirs(path_yolo,exist_ok=True)
for phase in ['images','labels']:
    os.makedirs(path_yolo+phase, exist_ok=True)

for type_data in ['Benign','Cancer','Normal']:
    path_image_folder = original + 'images/'+type_data + "/"
#     path_label_folder = original + 'images/'+type_data + "/"
    data_set = glob.glob(path_image_folder+'*/*.jpg')
    len_train = int(CFG.ratio*len(data_set))
    random.shuffle(data_set)
    train = data_set[:len_train]
    valid = data_set[len_train:]
    
    for phase in ['train','valid']:
        path_txt_new = path_yolo + "labels" + "/"+phase+"/"
        path_img_new = path_yolo + "images" + "/" +phase+"/"
        os.makedirs(path_txt_new,exist_ok=True)
        os.makedirs(path_img_new,exist_ok=True)
        for path_image in tqdm(eval(phase),total=len(phase)):
#             print(path_image)
            name = path_image.split('/')[-1]
            txt = name.replace('.jpg','.txt')
            path_new_image = path_img_new + name
            path_old_txt = path_image.replace('.jpg','.txt').replace('images','labels')
            path_new_txt = path_txt_new +txt
            shutil.move(path_image, path_new_image)
            shutil.move(path_old_txt, path_new_txt)

In [None]:
!tar -czvf yolo_data.tar.gz ./yolo_data

In [None]:
from IPython.display import FileLink
FileLink('yolo_data.tar.gz')

In [None]:
!mv -v ./yolo_data ./yolov5/

In [None]:
cd yolov5

In [None]:
# Create .yaml file 
import yaml
data_yaml = dict(
    train = './yolo_data/images/train',
    val = './yolo_data/images/valid',
    nc = 3,
    names = [ 'Benign','Cancer','Normal']
)
# Note that I am creating the file in the yolov5/data/ directory.
with open('./data/breast_cancer.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
%cat ./data/breast_cancer.yaml

In [None]:
!pip install -r requirements.txt

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
!python train.py --img $CFG.IMG_SIZE \
                 --batch 8 \
                 --epochs 1000 \
                 --data ./data/breast_cancer.yaml \
                 --weights yolov5m.pt \
                 --save_period 1\
                 --project breast_cancer\
                 --name runrun\
                 --workers  8