In [None]:
from ultralytics import YOLO
import torch
import os
import shutil
from pathlib import Path
import numpy as np
import random
import zipfile
import pandas as pd
import cv2

In [None]:
# set random seed
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)

# Load dataset from kaggle

In [None]:
# this cell is for running in Kaggle
# load kaggle.json from google drive
# !gdown <item id>

In [None]:
# check if the kaggle.json file exists
from sys import platform

# Windows
# C:\Users\<Windows-username>\.kaggle\kaggle.json
if platform == "win32":
    print("Windows")
    kaggle_json_path = os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json")
    if os.path.exists(kaggle_json_path):
        print("kaggle.json exists")
    else:
        print("kaggle.json does not exist")
        shutil.copy("./kaggle.json", kaggle_json_path)
        print("kaggle.json is copied to the directory")

# Linux
# ~/.kaggle/kaggle.json
elif platform == "linux" or platform == "linux2":
    print("Linux")
    kaggle_json_path = os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json")
    if os.path.exists(kaggle_json_path):
        print("kaggle.json exists")
    else:
        print("kaggle.json does not exist")
        os.makedirs(os.path.join(os.path.expanduser("~"), ".kaggle"), exist_ok=True)
        shutil.copy("./kaggle.json", kaggle_json_path)
        print("kaggle.json is copied to the directory")

# macOS
# ~/.kaggle/kaggle.json
elif platform == "darwin":
    print("macOS")
    kaggle_json_path = os.path.join(os.path.expanduser("~"), ".kaggle", "kaggle.json")
    if os.path.exists(kaggle_json_path):
        print("kaggle.json exists")
    else:
        print("kaggle.json does not exist")
        os.makedirs(os.path.join(os.path.expanduser("~"), ".kaggle"), exist_ok=True)
        shutil.copy("./kaggle.json", kaggle_json_path)
        print("kaggle.json is copied to the directory")

In [None]:
# download and unzip the dataset
if(os.path.exists("./rice-image-dataset-for-object-detection")):
    print("The dataset is already downloaded")
else:
    !kaggle datasets download -d alikhalilit98/rice-image-dataset-for-object-detection
    with zipfile.ZipFile("rice-image-dataset-for-object-detection.zip", 'r') as zip_ref:
        zip_ref.extractall("./rice-image-dataset-for-rice-germ-detection")

In [None]:
raw_dataset_path = './rice-image-dataset-for-rice-germ-detection'
dataset_path = './rice-germ-cls-dataset'
Path(dataset_path).mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/train/Good').mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/train/Moderate').mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/train/Bad').mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/val/Good').mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/val/Moderate').mkdir(parents=True, exist_ok=True)
Path(f'{dataset_path}/val/Bad').mkdir(parents=True, exist_ok=True)

In [None]:
!gdown 1Gk73cVdNwL4PYF37XRBKa-lybbdE0k7O -O ./rice-germ-cls-dataset/rice-germ-1050labels.zip

In [None]:
with zipfile.ZipFile(f"{dataset_path}/rice-germ-1050labels.zip", 'r') as zip_ref:
    zip_ref.extractall(path=dataset_path)

# Splitting dataset

In [None]:
df = pd.read_csv(f"{dataset_path}/rice-germ-1050labels.csv")
df.sort_values(by=['image_name'], inplace=True)
df[:10]

In [None]:
from sklearn.model_selection import train_test_split

raw_labels_path = f"{dataset_path}/rice-germ-1050labels"
raw_images_path = f"{raw_dataset_path}/Rice_Image_Dataset_OD/Jasmine"

data_lists = os.listdir(raw_labels_path)
data_lists = [data_list.split(".")[0] for data_list in data_lists]
print(data_lists[:10])

train, val = train_test_split(data_lists, test_size=0.2, random_state=random_seed, stratify=df["label_name"])
print(f"train: {len(train)}, val: {len(val)}")

In [None]:
for x in train:
    img_path = f"{raw_images_path}/{x}.jpg"
    label_path = f"{raw_labels_path}/{x}.txt"
    with open(label_path, "r") as f:
        label = f.read(1) # 0: Good, 1: Moderate, 2: Bad
    if label == "0":
        shutil.copy(img_path, f"{dataset_path}/train/Good")
    elif label == "1":
        shutil.copy(img_path, f"{dataset_path}/train/Moderate")
    elif label == "2":
        shutil.copy(img_path, f"{dataset_path}/train/Bad")

for x in val:
    img_path = f"{raw_images_path}/{x}.jpg"
    label_path = f"{raw_labels_path}/{x}.txt"
    with open(label_path, "r") as f:
        label = f.read(1) # 0: Good, 1: Moderate, 2: Bad
    if label == "0":
        shutil.copy(img_path, f"{dataset_path}/val/Good")
    elif label == "1":
        shutil.copy(img_path, f"{dataset_path}/val/Moderate")
    elif label == "2":
        shutil.copy(img_path, f"{dataset_path}/val/Bad")

# Create yaml file

In [None]:
import yaml

full_path = os.path.abspath(dataset_path)

yaml_data = dict(
    path = full_path,
    train='train',
    val='val',
    nc=3,
    names=['Good', 'Moderate', 'Bad']
)

with open(f"{dataset_path}/data.yaml", "w") as f:
    yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)

# Train model

In [None]:
# training parameters
epochs = 400
batch = 320
imgsz = 128

In [None]:
model = YOLO("yolov8x-cls.pt")
trained_model = model.train(data=dataset_path, epochs=epochs, batch=batch, imgsz=imgsz, device="0", project="rice-germ-clssification", name="run", seed=random_seed)

# Export model as onnx

In [None]:
run_path = "./rice-germ-clssification"
model_path = "runxxx" # change this to the model path you want to use
model_path = os.path.join(run_path, model_path)
model_path_pt = os.path.join(model_path, "weights", "best.pt")

In [None]:
to_save_model = YOLO(model_path_pt)
to_save_model.export(format="onnx")
model_path_onnx = os.path.join(model_path, "weights", "best.onnx")

In [None]:
import datetime
from pathlib import Path

to_save_path = "./saved_model"
Path(to_save_path).mkdir(parents=True, exist_ok=True)

time = datetime.datetime.now().strftime("%d%m%y%H%M")

# zip run17 folder into name seg-model-<ddmmyyhhmm>.zip
zip_name = f" rice-germ-cls-model-{time}"

shutil.make_archive(f"{to_save_path}/{zip_name}_imgsz{imgsz}", 'zip', model_path)