**About** : This notebook is used to train models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import sys
import cv2
import glob
import yaml
import shutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 30)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from util.plots import *
from data.preparation import *

## Data

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

In [None]:
if "fold" not in df_patient.columns:
    folds = pd.read_csv(DATA_PATH + "folds_4.csv")
    df_img = df_img.merge(folds)
    df_patient = df_patient.merge(folds)

In [None]:
df = pd.read_csv('../input/active_extravasation_bounding_boxes.csv')
df = df.rename(columns={"pid": "patient_id", "series_id": "series", "instance_number": "instance"})
df = df[['patient_id', "series", "instance", "x1", "y1", "x2", "y2"]]

In [None]:
patients_extrav = df_patient[df_patient['extravasation_injury'] == 1].patient_id.values
df_no_extrav = df_img[~df_img['patient_id'].isin(patients_extrav)]
df_no_extrav = df_no_extrav[['patient_id', "series", "instance"]]

df_no_extrav_sample = df_no_extrav.sample(10000).reset_index(drop=True)

In [None]:
df = pd.concat([df, df_no_extrav_sample], ignore_index=True)

In [None]:
df = df.merge(df_img[['patient_id', 'series', 'instance', 'frame', 'extravasation_injury', 'path', 'fold',]])

In [None]:
len(df)

In [None]:
plt.figure(figsize=(20, 5))

for i in range(1, 5):
    plt.subplot(1, 4, i)

    idx = np.random.choice(len(df))
    img = cv2.imread(df['path'].values[idx])
    boxes = df[["x1", "y1", "x2", "y2"]].values[idx]

    plot_boxes(img, boxes[None], bbox_format="pascal_voc")

plt.show()

### Yolo prep

In [None]:
CLASSES = ['extravasation']

In [None]:
YOLO_PATH = '../yolov7/'
DATA_PATH = '../input/yolo/'

os.makedirs(YOLO_PATH, exist_ok=True)
os.makedirs(DATA_PATH, exist_ok=True)

In [None]:
VERSION = 2

In [None]:
PLOT = False
SAVE = True

In [None]:
# label_dict = {k: i for i, k in enumerate(CLASSES)}

if SAVE:
    os.makedirs(f'{DATA_PATH}/v{VERSION}', exist_ok=True)
    for fold in [0, 1, 2, 3]:
        yolo_img_dir = f'{DATA_PATH}/v{VERSION}/{fold}/images/'
        yolo_label_dir = f'{DATA_PATH}/v{VERSION}/{fold}/labels/'

        print(yolo_img_dir, yolo_label_dir)

        #(3) config file path
        yaml_file = f'{YOLO_PATH}/data_{VERSION}.yaml'

        os.makedirs(yolo_img_dir, exist_ok=True)
        os.makedirs(yolo_label_dir, exist_ok=True)

        shutil.rmtree(yolo_img_dir)
        shutil.rmtree(yolo_label_dir)

        os.makedirs(yolo_img_dir, exist_ok=True)
        os.makedirs(yolo_label_dir, exist_ok=True)

In [None]:
for i in tqdm(range(len(df))):
    img_file = df['path'].values[i]
    name = img_file.split('/')[-1][:-4]
    fold = df['fold'].values[i]

    yolo_img_dir = f'{DATA_PATH}/v{VERSION}/{fold}/images/'
    yolo_label_dir =f'{DATA_PATH}/v{VERSION}/{fold}/labels/'

    boxes = df[["x1", "y1", "x2", "y2"]].values[i][None].astype(float)
    
    img = cv2.imread(img_file, 0)
    h, w = img.shape
    
    # Crop
    assert h > 384 and w > 384
    dh = (h - 384) / 2
    dw = (w - 384) / 2
    boxes[:, [0, 2]] -= dw
    boxes[:, [1, 3]] -= dh

    img = center_crop_pad(img, 384)
            
    # To yolo
    h, w = 384, 384
    boxes[:, [0, 2]] /= w
    boxes[:, [1, 3]] /= h
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    boxes[:, 0] += boxes[:, 2] / 2
    boxes[:, 1] += boxes[:, 3] / 2

    if SAVE:
        cv2.imwrite(f'{yolo_img_dir}/{name}.png', img)
        written = []
        with open( f'{yolo_label_dir}/{name}.txt', 'w') as f:
            for box in boxes:
                if not np.isnan(boxes).any():
                    str_bbox = "0 " + ' '.join([f"{b:.4g}" for b in box])
                    if str_bbox not in written:
                        f.write(str_bbox)
                        f.write('\n')
                        written.append(str_bbox)

    if PLOT or not (i % 200):
        plt.figure(figsize=(5, 5))
        plot_boxes(img, boxes)
        plt.show()

#     if i >= 0:
#     break

### Create folds

In [None]:
for fold in [0, 1, 2, 3, "fullfit"]:
    print(f'\n ->  Creating fold {fold}\n')

    dest = f'{DATA_PATH}/v{VERSION}/{fold}_train/'
    
    os.makedirs(dest, exist_ok=True)
    shutil.rmtree(dest)
    os.makedirs(dest, exist_ok=True)
    
    os.makedirs(dest + f'/images/', exist_ok=True)
    os.makedirs(dest + f'/images/train/', exist_ok=True)
    os.makedirs(dest + f'/images/valid/', exist_ok=True)

    os.makedirs(dest + f'/labels/', exist_ok=True)
    os.makedirs(dest + f'/labels/train/', exist_ok=True)
    os.makedirs(dest + f'/labels/valid/', exist_ok=True)
        
    for fold_c in tqdm([0, 1, 2, 3]):
        yolo_img_dir = f'{DATA_PATH}/v{VERSION}/{fold_c}/images/'
        yolo_label_dir = f'{DATA_PATH}/v{VERSION}/{fold_c}/labels/'
        
        if fold == fold_c:
            shutil.copytree(yolo_img_dir, dest + f'/images/valid/',  dirs_exist_ok=True)
            shutil.copytree(yolo_label_dir, dest + f'/labels/valid/',  dirs_exist_ok=True)
        else:
            shutil.copytree(yolo_img_dir, dest + f'/images/train/',  dirs_exist_ok=True)
            shutil.copytree(yolo_label_dir, dest + f'/labels/train/',  dirs_exist_ok=True)
            
        if fold == "fullfit":
            if fold_c == 0:
                shutil.copytree(yolo_img_dir, dest + f'/images/valid/',  dirs_exist_ok=True)
                shutil.copytree(yolo_label_dir, dest + f'/labels/valid/',  dirs_exist_ok=True)

    print("- Number of training images :", len(os.listdir(dest + f'/images/train/')))
    print("- Number of training labels :", len(os.listdir(dest + f'/labels/train/')))
    print("- Number of validation images :", len(os.listdir(dest + f'/images/valid/')))
    print("- Number of validation labels :", len(os.listdir(dest + f'/labels/valid/')))


### Data yaml

In [None]:
for fold in [0, 1, 2, 3, "fullfit"]:
    dest = f'{DATA_PATH}/v{VERSION}/{fold}_train/'

    yaml_file = f'{YOLO_PATH}/data_{VERSION}_{fold}.yaml'

    # Dump config file
    data_yaml = dict(
        train=dest + f'/images/train/',
        val=dest + f'/images/valid/',
        nc=len(CLASSES),
        names=CLASSES
    )

    with open(yaml_file, 'w') as outfile:
        yaml.dump(data_yaml, outfile, default_flow_style=True)

    yaml_file

### YoloX

In [None]:
# !pip install globox

In [None]:
from globox import AnnotationSet

for fold in tqdm([0, 1, 2, 3, "fullfit"]):
    print(f'\n ->  Creating fold {fold}\n')

    dest = f'{DATA_PATH}/v{VERSION}/{fold}_train/'
    os.makedirs(f"{dest}/annotations/", exist_ok=True)

    # Train
    print("- Create train labels")
    yolo = AnnotationSet.from_yolo_v5(
        folder=f"{dest}/labels/train/",
        image_folder=f"{dest}/images/train/"
    )
    yolo.save_coco(f"{dest}/annotations/train2017.json", auto_ids=True)

    print("- Copy train images")
    shutil.copytree(f"{dest}/images/train/", f"{dest}/train2017")

    # Val
    print("- Create val labels")
    yolo = AnnotationSet.from_yolo_v5(
        folder=f"{dest}/labels/valid/",
        image_folder=f"{dest}/images/valid/"
    )
    yolo.save_coco(f"{dest}/annotations/val2017.json", auto_ids=True)
    
    print("- Copy val images")
    shutil.copytree(f"{dest}/images/valid/", f"{dest}/val2017")


#     break

In [None]:
# yolo.show_stats()

Done ! 