In [1]:
## YOLOv5

In [2]:
# Access WANDB account
!pip install -q --upgrade wandb
 
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mjutraman[0m (use `wandb login --relogin` to force relogin)


True

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import os
import shutil
import tensorflow as tf
import yaml

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import ast

import matplotlib.pyplot as plt
import cv2
import PIL

import warnings
warnings.filterwarnings('ignore')

In [5]:
TRAIN_PATH = "/content/drive/MyDrive/siim/tmp/train/"
HEIGHT,WIDTH = 256,256
CHANNELS = 3
BATCH_SIZE = 16
EPOCHS  = 20
SEED  =2021

In [6]:
# Get image path from image_id
def get_path(image_id):
    path = tf.io.gfile.glob(TRAIN_PATH + f"*{image_id}.jpg")[0]
    return path

image_dict = {
    "opacity" : 1,
    "none" : 0
}
df = pd.read_csv("/content/drive/MyDrive/siim/summary/train_info.csv")

df["image_label"] = df["image_label"].map(lambda x : x.split(" ")[0])
df["image_label_id"] = df["image_label"].map(lambda x  : image_dict[x])
df["filepath"] = df["image_id"].map(get_path)
df.head()

Unnamed: 0,image_id,study_id,pneumonia,pneumonia_class,height,width,boxes,image_label,image_label_id,filepath
0,000a312787f2,5776db0cec75,Typical Appearance,1,3488,4256,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity,1,/content/drive/MyDrive/siim/tmp/train/000a3127...
1,000c3a3f293f,ff0879eb20ed,Negative for Pneumonia,0,2320,2832,"[{'x': 0, 'y': 0, 'width': 1, 'height': 1}]",none,0,/content/drive/MyDrive/siim/tmp/train/000c3a3f...
2,0012ff7358bc,9d514ce429a7,Typical Appearance,1,2544,3056,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity,1,/content/drive/MyDrive/siim/tmp/train/0012ff73...
3,001398f4ff4f,28dddc8559b2,Atypical Appearance,3,3520,4280,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity,1,/content/drive/MyDrive/siim/tmp/train/001398f4...
4,001bd15d1891,dfd9fdd85a3e,Typical Appearance,1,2800,3408,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity,1,/content/drive/MyDrive/siim/tmp/train/001bd15d...


In [7]:
train_df,val_df = train_test_split(df,
                                    test_size=0.2,
                                    random_state = SEED,
                                    stratify = df.image_label.values
                                    )

train_df.loc[:,"data"] = "train"
val_df.loc[:,"data"] = "val"
df = pd.concat([train_df, val_df]).reset_index(drop=True)

In [8]:
os.makedirs('/content/drive/MyDrive/siim/version1/dataset/images/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/siim/version1/dataset/images/val', exist_ok=True)

os.makedirs('/content/drive/MyDrive/siim/version1/dataset/labels/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/siim/version1/dataset/labels/val', exist_ok=True)
print("Created folder structure")

Created folder structure


In [9]:
IMAGE_PATH = "/content/drive/MyDrive/siim/version1/dataset/images"
for i in df.values:
    data = i[10]
    img_name = i[9].split("/")[-1]
    shutil.copyfile(i[9],f"{IMAGE_PATH}/{data}/{img_name}")

In [9]:
# REF :  https://www.kaggle.com/ayuraj/train-covid-19-detection-using-yolov5

yaml_dict = dict(
    train = "/content/drive/MyDrive/siim/version1/dataset/images/train",
    val = "/content/drive/MyDrive/siim/version1/dataset/images/val",
    nc = 2,
    names = ["none","opacity"]
)

with open("/content/drive/MyDrive/siim/version1/yolov5/data/data.yaml", "w") as f:
    yaml.dump(yaml_dict,f,default_flow_style=True)

In [10]:
df["boxes"] = df["boxes"].map(lambda x : ast.literal_eval(x))

def preprocess_bbox(row):
    factor_x = 1/row[5]
    factor_y = 1/row[4]
    bboxes = []
    
    if row[7] == "opacity":
        for box in row[6]:
            x = box["x"]*factor_x
            y = box["y"]*factor_y
            w = box["width"]*factor_x
            h = box["height"]*factor_y
            xc = x + w/2
            yc = y + h/2
        
            bboxes.append([xc,yc,w,h])
    return bboxes

In [11]:
# Prepare txt files
LABEL_PATH = "/content/drive/MyDrive/siim/version1/dataset/labels"
for row in df.values:
    filename = row[9].split("/")[-1][:-4]
    filepath = f"{LABEL_PATH}/{row[10]}/{filename}.txt"
    
    if row[7] == "opacity":
        bbox = preprocess_bbox(row)
        with open(filepath, "w") as f:
            for box in bbox:
                box = [1] + box
                box = [str(i) for i in box]
                box = ' '.join(box)
                f.write(box)
                f.write('\n')

In [11]:
!python /content/drive/MyDrive/siim/version1/yolov5/train.py --img {HEIGHT} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights yolov5s.pt \
                 --save_period 1\
                 --project siim_covid19_yolov5

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=data.yaml, hyp=data/hyps/hyp.scratch.yaml, epochs=20, batch_size=16, imgsz=256, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, adam=False, sync_bn=False, workers=8, project=siim_covid19_yolov5, entity=None, name=exp, exist_ok=False, quad=False, linear_lr=False, label_smoothing=0.0, upload_dataset=False, bbox_interval=-1, save_period=1, artifact_alias=latest, local_rank=-1, freeze=0, patience=30
[34m[1mgithub: [0mskipping check (not a git repository), for updates see https://github.com/ultralytics/yolov5
YOLOv5 🚀 v5.0-405-gfad57c2 torch 1.9.0+cu102 CUDA:0 (Tesla T4, 15109.75MB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.

In [12]:
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version1/yolov5/exp/weights/best.pt"

In [13]:
!python /content/drive/MyDrive/siim/version1/yolov5/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img {HEIGHT} \
                  --conf 0.3 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

[34m[1mdetect: [0mweights=['/content/drive/MyDrive/siim/version1/yolov5/exp/weights/best.pt'], source=/content/drive/MyDrive/siim/tmp/test, imgsz=[256, 256], conf_thres=0.3, iou_thres=0.5, max_det=3, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False
[31m[1mrequirements:[0m /content/requirements.txt not found, check failed.
YOLOv5 🚀 v5.0-405-gfad57c2 torch 1.9.0+cu102 CUDA:0 (Tesla T4, 15109.75MB)

Traceback (most recent call last):
  File "/content/drive/MyDrive/siim/version1/yolov5/detect.py", line 287, in <module>
    main(opt)
  File "/content/drive/MyDrive/siim/version1/yolov5/detect.py", line 282, in main
    run(**vars(opt))
  File "/usr/local/lib/python3.7/dist-packages/torch/autograd/grad_mode.py", line 28, in decorate_context
    return func(*arg

In [18]:
PREDICTIONS_PATH = "/content/drive/MyDrive/siim/version1/dataset/labels/train/"
PRED_FILES = os.listdir(PREDICTIONS_PATH)

print("Sample prediction (in txt file) : \n")

with open(PREDICTIONS_PATH + PRED_FILES[0], "r") as f:
    ls = f.read().strip("\n").split(" ")
    print(f"LABEL : {ls[0]} \nX_CENTER : {ls[1]} \nY_CENTER : {ls[2]} \nWIDTH : {ls[3]} \nHEIGHT : {ls[4]} \nCONFIDENCE : {ls[5]}")

Sample prediction (in txt file) : 

LABEL : 1 
X_CENTER : 0.7437499983638745 
Y_CENTER : 0.6952043926886793 
WIDTH : 0.15916666557591624 
HEIGHT : 0.19320232311320756
1 
CONFIDENCE : 0.2970833229712042
