### Import

In [None]:
import random
import pandas as pd
import time
import json
import yaml
from ultralytics import YOLO
from pathlib import Path

### Yolo model parameters

**EPOCHS**: The number of times the entire training dataset is passed through the model during training. More epochs can help the model learn better, but too many can lead to overfitting.

**IMG_SIZE** (image size): The target size (usually width and height in pixels) to which all training and inference images are resized. Larger sizes can improve detection accuracy but use more memory and computation.

**BATCH** (batch size): The number of images the model processes at once before updating its internal parameters. A larger batch size can speed up training (if you have enough GPU memory) but may require more resources.

In [13]:
DATASET_YAML = Path("../data/silver/")  # point to your dataset.yaml
BASE_MODEL = "yolov8n-cls.pt"
EPOCHS = 50
IMG_SIZE = 512
FRACTION = 1
BATCH = 16
DEVICE = "cpu"
PROJECT_PATH = "../runs/classify"

CLASSES = {
    "pizza": 76,
    "spaghetti_bolognese": 90,
    "spaghetti_carbonara": 91,
}

### First run of the pre-trained model

In [None]:
model = YOLO(BASE_MODEL)
random.seed(42)
N = 8 
subset = []
for cls in CLASSES.keys():
    cls_dir = Path(DATASET_YAML) / "val" / cls
    all_imgs = list(cls_dir.glob("*.jpg"))
    if len(all_imgs) >= N:
        subset.extend(random.sample(all_imgs, N))
    else:
        subset.extend(all_imgs)
results = model.predict(source=subset, imgsz=IMG_SIZE, device=DEVICE, save=True, verbose=False, project=PROJECT_PATH, name="food101_base_model",exist_ok=True)
for r in results:
    fname = Path(r.path).name
    pred_class = r.names[r.probs.top1]
    conf = r.probs.top1conf.item()
    print(f"{fname}: {pred_class} ({conf:.2f})")

Results saved to [1m/workspaces/marktguru-home-assignment/runs/base_model[0m
002620_76_pizza.jpg: pizza (0.99)
002667_76_pizza.jpg: pizza (0.86)
002673_76_pizza.jpg: pizza (1.00)
002610_76_pizza.jpg: pizza (0.72)
002675_76_pizza.jpg: pizza (0.96)
002650_76_pizza.jpg: pizza (0.98)
002577_76_pizza.jpg: pizza (0.96)
002504_76_pizza.jpg: pizza (0.99)
025088_90_spaghetti_bolognese.jpg: crayfish (0.91)
025101_90_spaghetti_bolognese.jpg: carbonara (0.38)
025187_90_spaghetti_bolognese.jpg: carbonara (0.80)
025023_90_spaghetti_bolognese.jpg: plate (0.68)
025036_90_spaghetti_bolognese.jpg: plate (0.29)
025201_90_spaghetti_bolognese.jpg: carbonara (1.00)
025011_90_spaghetti_bolognese.jpg: carbonara (1.00)
025068_90_spaghetti_bolognese.jpg: plate (0.50)
022289_91_spaghetti_carbonara.jpg: carbonara (0.95)
022254_91_spaghetti_carbonara.jpg: carbonara (1.00)
022257_91_spaghetti_carbonara.jpg: carbonara (0.99)
022410_91_spaghetti_carbonara.jpg: carbonara (1.00)
022473_91_spaghetti_carbonara.jpg: car

Pizza and spaghetti carbonara are recognized almost perfectly.
Spaghetti bolognese is missing as a class - confused often with carbonara.

### Fine-tune model with food101 data

The complete set consists of 750 images for training and 250 images for validation in each of three classes.

Eight models will be trained for each constellation of:
- two epoch values: 20 and 50,
- two images sizes: 224 (standard for Yolo) and 512 (images from Hugging Face)
- two dataset lengths: 75 and 750

In [None]:
model = YOLO(BASE_MODEL)
EPOCHS_LIST   = [20, 50]
IMG_SIZE_LIST = [224,512]
FRACTIONS     = [0.1, 1]
for epochs in EPOCHS_LIST:
    for img in IMG_SIZE_LIST:
        for frac in FRACTIONS:
            name = f"food101_e{epochs}_img{img}_frac{frac}"
            print(f"\n▶️ {name}")
            t0 = time.time()
            res = model.train(
                data=str(DATASET_YAML),
                epochs=epochs,
                imgsz=img,
                batch=BATCH,
                device=DEVICE,
                project=str(PROJECT_PATH),
                name=name,
                exist_ok=True,
                fraction=frac,
                verbose=False
            )
            runtime_min = round((time.time() - t0) / 60, 3)
            (PROJECT_PATH / name / "summary.json").write_text(
                json.dumps({"runtime_min": runtime_min}, ensure_ascii=False)
            )
            print(f"⏱️ runtime: {runtime_min} min")


▶️ food101_e20_img224_frac0.1
Ultralytics 8.3.201 🚀 Python-3.11.13 torch-2.8.0+cu128 CPU (Intel Xeon Platinum 8370C CPU @ 2.80GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=../data/silver, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=0.1, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=food101_e20_img224_frac0.1, nbs=64, nms=False, opset=None, optimize=False, optimizer=aut

### Gather statistics of model training runs

In [None]:
rows = []
def pick_col(cols, candidates):
    for c in candidates:
        if c in cols: return c
    return None
for run in sorted(PROJECT_PATH.glob("food101_e*_img*_frac*")):
    args_p     = run / "args.yaml"
    results_p  = run / "results.csv"
    summary_p  = run / "summary.json"
    best_pt_p  = run / "weights" / "best.pt"
    if not results_p.exists():
        continue
    args = {}
    if args_p.exists():
        args = yaml.safe_load(args_p.read_text())
    df = pd.read_csv(results_p)
    top1_col = pick_col(df.columns, ["metrics/accuracy_top1", "top1", "val/top1"])
    if top1_col is None:
        best_top1 = float("nan")
    else:
        best_row_idx = df[top1_col].idxmax()
        best_top1 = float(df.loc[best_row_idx, top1_col])
    runtime_min = None
    if summary_p.exists():
        runtime_min = json.loads(summary_p.read_text()).get("runtime_min")
    rows.append({
        "run_dir": str(run.resolve()),
        "epochs": args.get("epochs"),
        "img_size": args.get("imgsz") or args.get("img_size"),
        "fraction": args.get("fraction"),
        "top1_acc_best": round(best_top1, 4) if best_top1 == best_top1 else None,  # NaN guard
        "runtime_min": runtime_min,
        "best_weights": str(best_pt_p.resolve()) if best_pt_p.exists() else None,
    })
stats = pd.DataFrame(rows)
stats_path = PROJECT_PATH / "experiment_stats.csv"
stats.to_csv(stats_path, index=False)
print(f"📄 Saved stats: {stats_path}")
print(stats)

📄 Saved stats: ../runs/experiment_stats.csv
Empty DataFrame
Columns: []
Index: []


### Predict custom uploaded images

In [None]:
model = YOLO("runs/classify/food101_e20_img224_frac1/weights/best.pt")
model.predict(
    source=Path(DATASET_YAML) / "upload",
    imgsz=224,
    device="cpu",
    save=True,
    project=PROJECT_PATH,
    name="food101_predict_upload"
)