# Market Recognition Notebook
Use these cells to run the data checks, training, evaluation, and a quick single-image prediction without touching backend/frontend code.

In [15]:
# Paths and knobs
DATA_DIR = r"C:\Users\mommy\.cache\kagglehub\datasets\misrakahmed\vegetable-image-dataset\versions\1\Vegetable Images"
TRAIN_DIR = f"{DATA_DIR}/train"
OUTPUT_DIR = "./model"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 1e-3
FINE_TUNE = False

In [16]:
import tensorflow as tf
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    print("GPUs detected:")
    for g in gpus:
        print("-", g)
else:
    print("No GPU detected; training will run on CPU.")

No GPU detected; training will run on CPU.


In [17]:
from pathlib import Path
from collections import Counter
import os
import textwrap

train_path = Path(TRAIN_DIR)
if not train_path.exists():
    raise FileNotFoundError(f"Missing training folder: {train_path}")

class_counts = {}
for cls_dir in train_path.iterdir():
    if not cls_dir.is_dir():
        continue
    count = sum(1 for p in cls_dir.glob("**/*") if p.is_file())
    class_counts[cls_dir.name] = count

total = sum(class_counts.values())
if total == 0:
    print("No images found under", train_path)
else:
    width = max(len(name) for name in class_counts)
    print(f"Found {total} images across {len(class_counts)} classes:\n")
    for name, cnt in sorted(class_counts.items(), key=lambda kv: kv[0]):
        pct = (cnt / total) * 100
        bar = "#" * max(1, int(pct // 2))
        print(f"{name.ljust(width)} | {str(cnt).rjust(4)} | {pct:5.1f}% | {bar}")

Found 15000 images across 15 classes:

Bean         | 1000 |   6.7% | ###
Bitter_Gourd | 1000 |   6.7% | ###
Bottle_Gourd | 1000 |   6.7% | ###
Brinjal      | 1000 |   6.7% | ###
Broccoli     | 1000 |   6.7% | ###
Cabbage      | 1000 |   6.7% | ###
Capsicum     | 1000 |   6.7% | ###
Carrot       | 1000 |   6.7% | ###
Cauliflower  | 1000 |   6.7% | ###
Cucumber     | 1000 |   6.7% | ###
Papaya       | 1000 |   6.7% | ###
Potato       | 1000 |   6.7% | ###
Pumpkin      | 1000 |   6.7% | ###
Radish       | 1000 |   6.7% | ###
Tomato       | 1000 |   6.7% | ###


In [21]:
import subprocess, sys
RUN_TRAIN = True  # flip to True when ready to train
cmd = [
    sys.executable, "model/train.py",
    "--data-dir", DATA_DIR,
    "--output-dir", OUTPUT_DIR,
    "--epochs", str(EPOCHS),
    "--batch-size", str(BATCH_SIZE),
    "--img-size", str(IMG_SIZE[0]), str(IMG_SIZE[1]),
    "--learning-rate", str(LEARNING_RATE),
]
if FINE_TUNE:
    cmd.append("--fine-tune")

if RUN_TRAIN:
    print("Running:", " ".join(cmd))
    result = subprocess.run(cmd, check=False)
    print("Exit code:", result.returncode)
else:
    print("Training skipped; set RUN_TRAIN = True to start a run.")

Running: c:\Users\mommy\miniconda3\envs\marketrec\python.exe model/train.py --data-dir C:\Users\mommy\.cache\kagglehub\datasets\misrakahmed\vegetable-image-dataset\versions\1\Vegetable Images --output-dir ./model --epochs 5 --batch-size 32 --img-size 224 224 --learning-rate 0.001
Exit code: 2


## Evaluate the saved model
Loads the saved `.keras` model, reuses the same validation split from the training folder, and reports overall and per-class accuracy.

In [None]:
from pathlib import Path
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
RUN_EVAL = False  # flip to True to run evaluation
model_path = Path(OUTPUT_DIR) / "product_classifier.keras"
if RUN_EVAL:
    if not model_path.exists():
        raise FileNotFoundError(f"Model not found at {model_path}")
    model = tf.keras.models.load_model(model_path)
    datagen = ImageDataGenerator(rescale=1.0/255, validation_split=0.2)
    val_gen = datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        subset="validation",
        shuffle=False,
    )
    probs = model.predict(val_gen, verbose=1)
    preds = probs.argmax(axis=1)
    acc = float((preds == val_gen.classes).mean())
    print(f"Validation accuracy: {acc:.3f}")
    idx_to_class = {v: k for k, v in val_gen.class_indices.items()}
    per_class_correct = {}
    for idx in range(val_gen.num_classes):
        mask = val_gen.classes == idx
        if mask.sum() == 0:
            continue
        per_class_correct[idx] = float((preds[mask] == idx).mean())
    print("\nPer-class accuracy:")
    for idx, score in sorted(per_class_correct.items()):
        name = idx_to_class.get(idx, f"class_{idx}")
        print(f"- {name}: {score:.3f}")
else:
    print("Evaluation skipped; set RUN_EVAL = True to run.")

## Quick single-image test
Load the saved model and run a prediction on a single image file to sanity check outputs.

In [None]:
from tensorflow.keras.preprocessing import image
SAMPLE_PATH = "./dataset/train/banana/banana_0001.jpg"  # change to any local file
labels_path = Path(OUTPUT_DIR) / "labels.txt"
if not Path(SAMPLE_PATH).is_file():
    print("Update SAMPLE_PATH to point to a real image.")
elif not labels_path.is_file():
    print("labels.txt not found; train the model first.")
elif not model_path.exists():
    print("Saved model missing; train before testing.")
else:
    labels = labels_path.read_text(encoding="utf-8").splitlines()
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(SAMPLE_PATH, target_size=IMG_SIZE)
    arr = image.img_to_array(img) / 255.0
    probs = model.predict(arr[None, ...])[0]
    idx = int(probs.argmax())
    print(f"Predicted: {labels[idx]} (p={probs[idx]:.3f})")