# Food-101 data.py Tests

This notebook checks that `src/data.py` works correctly:
 - Locates the 2 dataset folders `meta/` and `images/`
 - Validates required files exist
 - Builds `(image_path, lavel)` pairs for train/test splits

## Prerequisites:

Place the **food-101 folder** in `DATA_DIR`, where `DATA_DIR` is the name of the directory you create to store the dataset

`DATA_DIR` has **only one** `food-101` folder, structured exactly like below:

```
DATA_DIR (tbd)/
└── food-101/
    ├── meta/
    └── images/
```

In [2]:
from pathlib import Path
import sys

#check that notebook is running from repo root or notebooks folder
REPO_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.insert(0, str(REPO_ROOT))

from src.data import (
    get_dataset_root,
    get_meta_dir,
    get_image_dir,
    read_classes,
    class_to_index,
    validate_dataset,
    build_dataset,
)

from src.dataset import (
    dataSet,
    train_transform,
    eval_transform,
)

In [3]:
DATA_DIR = REPO_ROOT / "data" #name of folder containing dataset
dataset_root = get_dataset_root(DATA_DIR)

print("REPO_ROOT:", REPO_ROOT)
print("DATA_DIR:", DATA_DIR)
print("dataset_root:", dataset_root)


REPO_ROOT: /home/mohannadali/PersonalProjects/Food101-Classifier
DATA_DIR: /home/mohannadali/PersonalProjects/Food101-Classifier/data
dataset_root: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101


In [4]:
validate_dataset(dataset_root)
print("Dataset structure validated successfully")

Dataset structure validated successfully


In [5]:
meta_dir = get_meta_dir(dataset_root)

classes = read_classes(meta_dir)
class_dict = class_to_index(classes)

print("Number of classes:", len(classes))
print("First 5 classes and their indexes:")
for i, (class_name, idx) in enumerate(class_dict.items()):
    if i == 5:
        break
    print(f"{class_name} -> {idx}")

Number of classes: 101
First 5 classes and their indexes:
apple_pie -> 0
baby_back_ribs -> 1
baklava -> 2
beef_carpaccio -> 3
beef_tartare -> 4


In [6]:
assert len(classes) ==101, "Food-101 should include 101 classes"
assert isinstance(class_dict, dict), "Class_dict should be dict"
assert all(isinstance(k, str) for k in class_dict.keys()), "All class names should be strings"
assert all(isinstance(v, int) for v in class_dict.values()), "All indexes should be ints"

print("Checks passed")

Checks passed


In [7]:
train_data = build_dataset(dataset_root, "train")
test_data = build_dataset(dataset_root, "test")

print("Training Samples:", len(train_data))
print("Testing Samples:", len(test_data))

Training Samples: 75750
Testing Samples: 25250


In [8]:
for img_path, label in train_data[:10]:
    assert isinstance(img_path, Path), "img_path should be of type Path"
    assert isinstance(label, int), "label should be of type int"
    assert img_path.suffix == ".jpg", "image files must be of type jpg"
    assert img_path.exists(), f"Missing image file: {img_path}"
    
print("Sample data structure checks passed")

Sample data structure checks passed


In [9]:
from random import randint
print("First 5 training samples:")
for i, (img_path, label) in enumerate(train_data[:5]):
    print(f"{i}: {img_path} -> label {label}")

First 5 training samples:
0: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1005649.jpg -> label 0
1: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1014775.jpg -> label 0
2: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1026328.jpg -> label 0
3: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1028787.jpg -> label 0
4: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1043283.jpg -> label 0


In [12]:
train_dataset = dataSet(train_data, train_transform)
test_dataset = dataSet(test_data, eval_transform)

image, label = train_dataset[0]
image2, label2 = test_dataset[0]

print(image[:5,:5], label)
print(image2[:5,:5], label2)

tensor([[[0.6734, 0.3481, 0.4679,  ..., 0.8789, 0.8104, 0.6563],
         [0.6734, 0.4679, 0.5878,  ..., 1.0159, 0.9474, 0.7077],
         [0.5364, 0.4508, 0.5878,  ..., 0.9988, 0.9303, 0.7419],
         [0.5707, 0.5022, 0.6392,  ..., 0.9646, 0.8789, 0.8276],
         [0.5193, 0.5364, 0.6049,  ..., 0.9646, 0.8447, 0.8789]],

        [[0.9055, 0.5728, 0.6954,  ..., 1.0805, 0.9930, 0.8529],
         [0.9055, 0.6954, 0.8179,  ..., 1.2031, 1.1331, 0.9055],
         [0.7654, 0.6779, 0.8179,  ..., 1.2031, 1.1331, 0.9580],
         [0.8004, 0.7304, 0.8704,  ..., 1.1681, 1.0805, 1.0280],
         [0.7479, 0.7654, 0.8354,  ..., 1.1506, 1.0455, 1.0630]],

        [[1.2282, 0.8971, 1.0017,  ..., 1.0365, 0.9668, 0.8099],
         [1.2282, 1.0191, 1.1237,  ..., 1.1759, 1.1062, 0.8448],
         [1.0888, 1.0017, 1.1237,  ..., 1.1237, 1.0539, 0.8797],
         [1.1237, 1.0539, 1.1759,  ..., 1.0888, 1.0017, 0.9494],
         [1.0714, 1.0888, 1.1585,  ..., 1.0714, 0.9668, 1.0191]]]) tensor(0)
tensor([[