### Import

In [30]:
import shutil
import os
import tqdm as notebook_tqdm
import csv
import sys
import random
from PIL import Image
from datasets import load_dataset
from ultralytics import YOLO
from pathlib import Path
sys.path.append("../src")
from utils import prepare_dataset, resize_folder


In [23]:
import importlib,utils
importlib.reload(utils)

<module 'utils' from '/workspaces/marktguru-home-assignment/notebooks/../src/utils.py'>

### Download from hugging face

The images are put into /silver as they are already preprocessed: properly named and resized to 512 px max side.

Three classes are selected: pizza, spaghetti_bolognese, and spaghetti_carbonara.
Pizza can serve as a class that is more distinct. But both spaghetti dishes can be confused for each other.

Per class, 750 images are downloaded for training, and 250 for validation. 

It is planned to use the Yolo classification model. Due to that, the folder structure and naming are adapted accordingly, and dataset.yaml is created.

In [24]:
CLASSES = {
    "pizza": 76,
    "spaghetti_bolognese": 90,
    "spaghetti_carbonara": 91,
}
OUT_ROOT = Path("../data/silver")
MAKE_CSV = True
SPLIT_MAP = {"train": "train", "validation": "val"}
N_SPLIT = {"train": 750, "validation": 250}


In [25]:
prepare_dataset(
    CLASSES=CLASSES,
    N_SPLIT=N_SPLIT,
    OUT_ROOT=OUT_ROOT,
    MAKE_CSV=MAKE_CSV,
    SPLIT_MAP=SPLIT_MAP,
)

[train] saved counts: {'pizza': 750, 'spaghetti_bolognese': 750, 'spaghetti_carbonara': 750}
[validation] saved counts: {'pizza': 250, 'spaghetti_bolognese': 250, 'spaghetti_carbonara': 250}
dataset.yaml written to: ../data/silver/dataset.yaml
Class order: ['pizza', 'spaghetti_bolognese', 'spaghetti_carbonara']


PosixPath('../data/silver/dataset.yaml')

### Model parameters

**EPOCHS**: The number of times the entire training dataset is passed through the model during training. More epochs can help the model learn better, but too many can lead to overfitting.

**BATCH** (batch size): The number of images the model processes at once before updating its internal parameters. A larger batch size can speed up training (if you have enough GPU memory) but may require more resources.

**IMG_SIZE** (image size): The target size (usually width and height in pixels) to which all training and inference images are resized. Larger sizes can improve detection accuracy but use more memory and computation.

In [28]:
DATASET_YAML = Path("../data/silver/")  # point to your dataset.yaml
BASE_MODEL = "yolov8n-cls.pt"
EPOCHS = 20
IMG_SIZE = 224
BATCH = 16
DEVICE = "cpu"

### First run of the pre-trained model
Pizza and spaghetti carbonara are recognized almost perfectly.
Spaghetti bolognese is missing as a class - confused often with carbonara.

In [41]:
model = YOLO(BASE_MODEL)
random.seed(42)
N = 8 
subset = []
for cls in CLASSES.keys():
    cls_dir = Path(DATASET_YAML) / "val" / cls
    all_imgs = list(cls_dir.glob("*.jpg"))
    if len(all_imgs) >= N:
        subset.extend(random.sample(all_imgs, N))
    else:
        subset.extend(all_imgs)
results = model.predict(source=subset, imgsz=IMG_SIZE, device=DEVICE, save=True, verbose=False, project="../runs", name="base_model",exist_ok=True)
for r in results:
    fname = Path(r.path).name
    pred_class = r.names[r.probs.top1]
    conf = r.probs.top1conf.item()
    print(f"{fname}: {pred_class} ({conf:.2f})")



Results saved to [1m/workspaces/marktguru-home-assignment/runs/base_model[0m
002620_76_pizza.jpg: pizza (0.99)
002667_76_pizza.jpg: pizza (0.86)
002673_76_pizza.jpg: pizza (1.00)
002610_76_pizza.jpg: pizza (0.72)
002675_76_pizza.jpg: pizza (0.96)
002650_76_pizza.jpg: pizza (0.98)
002577_76_pizza.jpg: pizza (0.96)
002504_76_pizza.jpg: pizza (0.99)
025088_90_spaghetti_bolognese.jpg: crayfish (0.91)
025101_90_spaghetti_bolognese.jpg: carbonara (0.38)
025187_90_spaghetti_bolognese.jpg: carbonara (0.80)
025023_90_spaghetti_bolognese.jpg: plate (0.68)
025036_90_spaghetti_bolognese.jpg: plate (0.29)
025201_90_spaghetti_bolognese.jpg: carbonara (1.00)
025011_90_spaghetti_bolognese.jpg: carbonara (1.00)
025068_90_spaghetti_bolognese.jpg: plate (0.50)
022289_91_spaghetti_carbonara.jpg: carbonara (0.95)
022254_91_spaghetti_carbonara.jpg: carbonara (1.00)
022257_91_spaghetti_carbonara.jpg: carbonara (0.99)
022410_91_spaghetti_carbonara.jpg: carbonara (1.00)
022473_91_spaghetti_carbonara.jpg: car

### Fine-tune model with food101 data
* on 15 images
* on 75 images
* on 750 images


In [None]:
model = YOLO(BASE_MODEL)
results = model.train(
    data=str(DATASET_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    project="../runs/classify",
    name="food101_15",
    exist_ok=True,
    fraction=0.02
)

In [None]:
model = YOLO(BASE_MODEL)
results = model.train(
    data=str(DATASET_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    project="../runs/classify",
    name="food101_75",
    exist_ok=True,
    fraction=0.1
)

In [None]:
model = YOLO(BASE_MODEL)
results = model.train(
    data=str(DATASET_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    project="../runs/classify",
    name="food101_750",
    exist_ok=True,
    fraction=1.0
)

In [None]:
model = YOLO(BASE_MODEL)
results = model.train(
    data=str(DATASET_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    project="../runs/classify",
    name="food101_750",
    exist_ok=True,
    fraction=1.0
)
for cls in ["pizza", "spaghetti_bolognese", "spaghetti_carbonara"]:
    model.predict(
        source=Path(DATASET_YAML) / "val" / cls,
        imgsz=IMG_SIZE,
        device=DEVICE,
        save=True,
    )

In [23]:
model = YOLO("runs/classify/train3/weights/best.pt")
model.predict(
    source=Path(DATASET_YAML) / "upload",
    imgsz=224,
    device="cpu",   # or "0" if you have GPU
    save=True,
)



image 1/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/11973-spaghetti-carbonara-ii-DDMFS-4x3-6edea51e421e4457ac0c3269f3be5157.jpg: 224x224 spaghetti_bolognese 0.98, spaghetti_carbonara 0.02, pizza 0.00, 13.1ms


image 2/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/Eq_it-na_pizza-margherita_sep2005_sml.jpg: 224x224 pizza 0.93, spaghetti_bolognese 0.06, spaghetti_carbonara 0.01, 17.1ms
image 3/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/Homemade-Pizza_EXPS_FT23_376_EC_120123_3.jpg: 224x224 spaghetti_bolognese 0.49, pizza 0.47, spaghetti_carbonara 0.04, 16.3ms
image 4/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/Pizza-3007395.jpg: 224x224 spaghetti_bolognese 0.61, pizza 0.38, spaghetti_carbonara 0.02, 12.4ms
image 5/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/Salami-pizza-hero.jpg: 224x224 pizza 0.80, spaghetti_bolognese 0.12, spaghetti_carbonara 0.07, 13.9ms
image 6/15 /workspaces/marktguru-home-assignment/notebooks/../data/silver/upload/Spaghetti-Bolognese-Chicken.jpg: 224x224 spaghetti_bolognese 1.00, pizza 0.00, spaghetti_carbonara 0.00, 12.5ms
image 7/15 /workspaces/marktguru-hom

[ultralytics.engine.results.Results object with attributes:
 
 boxes: None
 keypoints: None
 masks: None
 names: {0: 'pizza', 1: 'spaghetti_bolognese', 2: 'spaghetti_carbonara'}
 obb: None
 orig_img: array([[[210, 212, 213],
         [210, 212, 213],
         [207, 210, 214],
         ...,
         [ 97, 157, 209],
         [ 84, 151, 206],
         [ 60, 134, 188]],
 
        [[211, 211, 211],
         [210, 212, 212],
         [211, 213, 214],
         ...,
         [ 92, 156, 210],
         [ 75, 147, 201],
         [ 42, 118, 171]],
 
        [[214, 212, 211],
         [214, 212, 211],
         [214, 212, 212],
         ...,
         [ 70, 142, 196],
         [ 85, 160, 216],
         [ 63, 140, 196]],
 
        ...,
 
        [[205, 200, 201],
         [205, 200, 201],
         [207, 201, 202],
         ...,
         [194, 194, 194],
         [194, 194, 194],
         [188, 188, 188]],
 
        [[204, 201, 203],
         [205, 202, 204],
         [208, 203, 205],
         ...,
  

### Validate model

In [None]:
model = YOLO(BASE_MODEL)
results = model.train(
    data=str(DATASET_YAML),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    verbose=False
)

val_results = model.val(data=str(DATASET_YAML), imgsz=IMG_SIZE, device=DEVICE)
print("Top-1 Accuracy:", val_results.top1)
print("Top-5 Accuracy:", val_results.top5)
print("Summary dict:", val_results.summary())

model.predict(
    source=Path(DATASET_YAML).parent / "silver/val/spaghetti_carbonara",
    imgsz=IMG_SIZE,
    device=DEVICE,
    save=True,
)

### Predict customly downloaded images

Resize 

In [21]:
src = Path("../data/upload")
dst = Path("../data/silver/upload")
resize_folder(src, dst, max_side=512)

📏 ../data/silver/upload/pizza_02.jpg
📏 ../data/silver/upload/spaghetti_carbonara_01.jpg
📏 ../data/silver/upload/spaghetti_carbonara_02.jpg
📏 ../data/silver/upload/spaghetti_bolognese_05.jpg
📏 ../data/silver/upload/pizza_05.jpg
📏 ../data/silver/upload/spaghetti_carbonara_03.jpg
📏 ../data/silver/upload/spaghetti_bolognese_03.jpeg
📏 ../data/silver/upload/pizza_01.jpeg
📏 ../data/silver/upload/spaghetti_bolognese_04.jpg
📏 ../data/silver/upload/spaghetti_carbonara_04.jpg
📏 ../data/silver/upload/spaghetti_bolognese_01.jpeg
📏 ../data/silver/upload/pizza_03.jpg
📏 ../data/silver/upload/pizza_04.jpg
📏 ../data/silver/upload/spaghetti_bolognese_02.jpg
📏 ../data/silver/upload/spaghetti_carbonara_05.jpg
