# Food-101 data.py Tests

This notebook checks that `src/data.py` works correctly:
 - Locates the 2 dataset folders `meta/` and `images/`
 - Validates required files exist
 - Builds `(image_path, lavel)` pairs for train/test splits

## Prerequisites:

Place the **food-101 folder** in `DATA_DIR`, where `DATA_DIR` is the name of the directory you create to store the dataset

`DATA_DIR` has **only one** `food-101` folder, structured exactly like below:

```
DATA_DIR (tbd)/
└── food-101/
    ├── meta/
    └── images/
```

In [2]:
from pathlib import Path
import sys

#check that notebook is running from repo root or notebooks folder
REPO_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.insert(0, str(REPO_ROOT))

from src.data import (
    get_dataset_root,
    get_meta_dir,
    get_image_dir,
    read_classes,
    class_to_index,
    validate_dataset,
    build_dataset,
)

from src.dataset import (
    dataSet,
    train_transform,
    eval_transform,
)

In [3]:
DATA_DIR = REPO_ROOT / "data" #name of folder containing dataset
dataset_root = get_dataset_root(DATA_DIR)

print("REPO_ROOT:", REPO_ROOT)
print("DATA_DIR:", DATA_DIR)
print("dataset_root:", dataset_root)


REPO_ROOT: /home/mohannadali/PersonalProjects/Food101-Classifier
DATA_DIR: /home/mohannadali/PersonalProjects/Food101-Classifier/data
dataset_root: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101


In [4]:
validate_dataset(dataset_root)
print("Dataset structure validated successfully")

Dataset structure validated successfully


In [5]:
meta_dir = get_meta_dir(dataset_root)

classes = read_classes(meta_dir)
class_dict = class_to_index(classes)

print("Number of classes:", len(classes))
print("First 5 classes and their indexes:")
for i, (class_name, idx) in enumerate(class_dict.items()):
    if i == 5:
        break
    print(f"{class_name} -> {idx}")

Number of classes: 101
First 5 classes and their indexes:
apple_pie -> 0
baby_back_ribs -> 1
baklava -> 2
beef_carpaccio -> 3
beef_tartare -> 4


In [6]:
assert len(classes) ==101, "Food-101 should include 101 classes"
assert isinstance(class_dict, dict), "Class_dict should be dict"
assert all(isinstance(k, str) for k in class_dict.keys()), "All class names should be strings"
assert all(isinstance(v, int) for v in class_dict.values()), "All indexes should be ints"

print("Checks passed")

Checks passed


In [7]:
train_data = build_dataset(dataset_root, "train")
test_data = build_dataset(dataset_root, "test")

print("Training Samples:", len(train_data))
print("Testing Samples:", len(test_data))

Training Samples: 75750
Testing Samples: 25250


In [8]:
for img_path, label in train_data[:10]:
    assert isinstance(img_path, Path), "img_path should be of type Path"
    assert isinstance(label, int), "label should be of type int"
    assert img_path.suffix == ".jpg", "image files must be of type jpg"
    assert img_path.exists(), f"Missing image file: {img_path}"
    
print("Sample data structure checks passed")

Sample data structure checks passed


In [9]:
from random import randint
print("First 5 training samples:")
for i, (img_path, label) in enumerate(train_data[:5]):
    print(f"{i}: {img_path} -> label {label}")

First 5 training samples:
0: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1005649.jpg -> label 0
1: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1014775.jpg -> label 0
2: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1026328.jpg -> label 0
3: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1028787.jpg -> label 0
4: /home/mohannadali/PersonalProjects/Food101-Classifier/data/food-101/images/apple_pie/1043283.jpg -> label 0


In [14]:
train_dataset = dataSet(train_data, train_transform)
test_dataset = dataSet(test_data, eval_transform)

image, label = train_dataset[0]
image2, label2 = test_dataset[0]

print(len(train_data)) #Ensuring len works properly

# Print and check datatypes
print(image[:5,:5], label)
print(image2[:5,:5], label2)
print(image.shape, image.dtype)
print(label.shape, label.dtype)

75750
tensor([[[ 2.1633,  2.1290,  2.0605,  ..., -0.4739, -0.4739, -0.4739],
         [ 2.1804,  2.1462,  2.0777,  ..., -0.4739, -0.4911, -0.5082],
         [ 2.1633,  2.1462,  2.0948,  ..., -0.4739, -0.5082, -0.5253],
         [ 2.0605,  2.0948,  2.1290,  ..., -0.4568, -0.5082, -0.5424],
         [ 1.7523,  1.8550,  1.9920,  ..., -0.4397, -0.4739, -0.5082]],

        [[ 2.4286,  2.3936,  2.3235,  ..., -0.9503, -0.9503, -0.9503],
         [ 2.4286,  2.4111,  2.3410,  ..., -0.9328, -0.9678, -0.9853],
         [ 2.4111,  2.3936,  2.3585,  ..., -0.9328, -0.9853, -1.0028],
         [ 2.3235,  2.3585,  2.3936,  ..., -0.9153, -0.9853, -1.0203],
         [ 2.0084,  2.1134,  2.2535,  ..., -0.8978, -0.9503, -0.9678]],

        [[ 2.5180,  2.4831,  2.4134,  ..., -1.5604, -1.5604, -1.5604],
         [ 2.5354,  2.5006,  2.4308,  ..., -1.5779, -1.5953, -1.5953],
         [ 2.5180,  2.5006,  2.4657,  ..., -1.5953, -1.6127, -1.6127],
         [ 2.4308,  2.4657,  2.5006,  ..., -1.5779, -1.6127, -1.630