In [None]:
#imports
!pip install ultralytics -q
from ultralytics import YOLO
import os
import pandas as pd
from glob import glob
import cv2
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import wandb
import shutil
import yaml
from IPython.display import Image
from datetime import datetime

In [None]:
#configure wandb
WANDB_API_KEY = "KEY"
wandb.login(key=WANDB_API_KEY)
wandb.init(project="Animal Type Detection")

In [None]:
#download dataset
os.makedirs('animal_type_dataset', exist_ok=True)
!git clone -b animal_type_detection_dataset https://github.com/MVet-Platform/M-Vet_Hackathon24.git ./animal_type_dataset 

In [None]:
#load label data
df = pd.read_csv('/kaggle/working/animal_type_dataset/label_train.csv')
df.head()

In [None]:
df['class'].value_counts(normalize=True)

In [None]:
df['filename'].duplicated().any()

In [None]:
def drawBoundingBoxAroundImage(image_path, bbox, class_):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255,0,0), 2)
    cv2.putText(img, class_, (bbox[0], bbox[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

In [None]:
#get file path for image files
df['filepath'] = df.apply(lambda row: glob(f'animal_type_dataset/**/{row.filename}')[0], axis=1)

In [None]:
len(df.filename.unique()), len(df.filepath.unique())

In [None]:
df.head()

In [None]:
df.sample(3).apply(lambda row: drawBoundingBoxAroundImage(row.filepath, [row.xmin, row.ymin, row.xmax, row.ymax], row['class']), axis=1)

In [None]:
def convert_voc_to_yolo(xmin, ymin, xmax, ymax, img_width=640, img_height=640):
    # Calculate the center of the bounding box
    center_x = (xmin + xmax) / 2.0
    center_y = (ymin + ymax) / 2.0

    # Calculate the width and height of the bounding box
    width = xmax - xmin
    height = ymax - ymin

    # Normalize the values by the dimensions of the image
    center_x /= img_width
    center_y /= img_height
    width /= img_width
    height /= img_height

    return [center_x, center_y, width, height]

In [None]:
df_train, df_valid = train_test_split(df, test_size=0.3, random_state=32)

In [None]:
labels = ['cow', 'goat', 'pig']

In [None]:
os.makedirs('yolo_dataset/train/labels', exist_ok=True)
os.makedirs('yolo_dataset/train/images', exist_ok=True)
train_grouping = df_train.groupby('filepath')
for index, group in enumerate(train_grouping):
    bboxes = group[1].apply(lambda row: [row.xmin, row.ymin, row.xmax, row.ymax] , axis=1).to_list()   
    classes = group[1].apply(lambda row: labels.index(row['class']) , axis=1).to_list() 
    file_path = group[0]
    filename = os.path.basename(file_path)
    with open(f'yolo_dataset/train/labels/{os.path.splitext(filename)[0]}.txt', 'w') as file:
        for class_, bbox in zip(classes, bboxes):
            center_x, center_y, width, height = convert_voc_to_yolo(*bbox)
            file.write(f'{class_} {center_x} {center_y} {width} {height}\n')
    shutil.copy(file_path, 'yolo_dataset/train/images')

In [None]:
os.makedirs('yolo_dataset/valid/labels', exist_ok=True)
os.makedirs('yolo_dataset/valid/images', exist_ok=True)
valid_grouping = df_valid.groupby('filepath')
for index, group in enumerate(valid_grouping):
    bboxes = group[1].apply(lambda row: [row.xmin, row.ymin, row.xmax, row.ymax] , axis=1).to_list()   
    classes = group[1].apply(lambda row: labels.index(row['class']) , axis=1).to_list() 
    file_path = group[0]
    filename = os.path.basename(file_path)
    with open(f'yolo_dataset/valid/labels/{os.path.splitext(filename)[0]}.txt', 'w') as file:
        for class_, bbox in zip(classes, bboxes):
            center_x, center_y, width, height = convert_voc_to_yolo(*bbox)
            file.write(f'{class_} {center_x} {center_y} {width} {height}\n')
    shutil.copy(file_path, 'yolo_dataset/valid/images')

In [None]:
df_sample_submission = pd.read_csv('/kaggle/working/animal_type_dataset/sample_submission.csv')
df_sample_submission.head()

In [None]:
#get file path for test image files
df_sample_submission['filepath'] = df_sample_submission.apply(lambda row: glob(f'animal_type_dataset/**/{row.filename}')[0], axis=1)
df_sample_submission.head()

In [None]:
test_file_paths = df_sample_submission.filepath.unique()

In [None]:
#create test directory
os.makedirs('yolo_dataset/test/images')
for file_path in test_file_paths:
    shutil.copy(file_path, 'yolo_dataset/test/images')

In [None]:
data = {
'names': labels,
'nc': len(labels),
'test': './test/images',
'train': './train/images',
'val': './valid/images'
}
with open('yolo_dataset/data.yaml', 'w') as file:
    yaml.dump(data, file, default_flow_style=False)

In [None]:
#train model
model = YOLO("yolov8n.pt")
model.train(data="/kaggle/working/yolo_dataset/data.yaml", epochs=1, imgsz=640, plots=True)

In [None]:
#list yolo generated training files
os.listdir("runs/detect/train/")

In [None]:
Image(filename='runs/detect/train/results.png', width=600)

In [None]:
Image(filename='runs/detect/train/val_batch0_pred.jpg', width=600)

In [None]:
#make predictions
results = model.predict(source='yolo_dataset/test/images', conf=0.25, save=True, project=f"runs/detect", name="predict", exist_ok=True)

In [None]:
#display predictions
for image_path in glob('runs/detect/predict/*.jpg')[:2]:
    display(Image(filename=image_path, width=1000,height=1000))
    print("\n")

In [None]:
#create submission file
data = []
for result in results:
    image_name = os.path.basename(result.path)
    class_names =result.names
    for box, cls, conf in zip(result.boxes.xyxy, result.boxes.cls, result.boxes.conf):
        class_name = class_names[int(cls)]
        bbox = box.cpu().tolist()
        record = [image_name, class_name, conf]+bbox
        data.append(record)

with open(f'submission{int(datetime.now().timestamp())}.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    header = ['filename', 'class', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']
    writer.writerow(header)
    writer.writerows(data)