# Customize Food101 Data set to be 3 classses 

In [1]:
# Chapter 4 creating our Custom dataset 

import torch, torchvision

assert int(torch.__version__.split('.')[0]) >= 1
assert int(torchvision.__version__.split('.')[0]) >= 0


import torchvision.datasets as datasets
import torchvision.transforms as transforms


import pathlib

data_dir = pathlib.Path("./Data")

In [4]:
# setting training and testing roots 
train_root = './Data/train_data'
test_root = './Data/test_data'

# Making training and testing pathes
train_dir = data_dir / 'train_data'
test_dir =  data_dir / 'test_data'

# Creating training and testing directories
data_dir.mkdir(parents=True,
               exist_ok=True)

train_dir.mkdir(parents=True,
                exist_ok = True)

test_dir.mkdir(parents=True,
               exist_ok=True)

# Creating training and testing datasets using torchvision.datasets "we are going to download Food101 dataset"
train_dataset = datasets.Food101(root = train_root,
                                 split = 'train',
                                 transform = transforms.ToTensor(),
                                 target_transform = None,
                                 download = True)

test_dataset = datasets.Food101(root = test_root,
                                split = 'test',
                                transform = transforms.ToTensor(),
                                target_transform = None,
                                download = True)


Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to ./Data/train_data\food-101.tar.gz


100%|██████████| 4996278331/4996278331 [43:52<00:00, 1897806.14it/s]  


Extracting ./Data/train_data\food-101.tar.gz to ./Data/train_data
Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to ./Data/test_data\food-101.tar.gz


100%|██████████| 4996278331/4996278331 [1:00:54<00:00, 1367340.47it/s]


Extracting ./Data/test_data\food-101.tar.gz to ./Data/test_data


In [94]:
# Getting 10% of the data for just 3 classes 

# Setup data paths
import random

target_classes = ['pizza', 'steak', 'sushi']

# creating amout to get variable for the percentage you want to get from the original data (e.g 0.1 means 10% of the original data)
amount_to_get = 0.2


# Creating a function to seperate a random amout of data 
def get_subset(data_splits = ['train', 'test'],
               target_classes = ['pizza', 'sushi', 'steak'],
               amount = 0.1,
               seed = 42
               ):
    random.seed(seed)
    label_splits = {}

    # Get labels
    for data_split in data_splits :
        image_path = data_dir / f"{data_split}_data"/ "food-101" / "images"
        print(f"[INFO] Creating image split for {data_split}...")
        label_path = data_dir / f"{data_split}_data"/ "food-101" / "meta" / f"{data_split}.txt"
        with open(label_path, 'r') as f:
            labels = [line.strip("\n") for line in f.readlines() if line.split('/')[0] in target_classes]

        # Get Random subset of target classes images ID's
        number_to_sample = round(amount * len(labels))
        print(f"[INFO] Getting random subset of {number_to_sample} images for {data_split}...")
        sampled_images = random.sample(labels, k = number_to_sample)

        # Apply full paths
        image_paths = [pathlib.Path(str(image_path / sampled_image) + '.jpg') for sampled_image in sampled_images]
        label_splits[data_split] = image_paths
    return label_splits


label_splits = get_subset(amount = amount_to_get)

[INFO] Creating image split for train...
[INFO] Getting radom subset of 450 images for train...
[INFO] Creating image split for test...
[INFO] Getting radom subset of 150 images for test...


In [95]:
label_splits['test']

[WindowsPath('Data/test_data/food-101/images/steak/367422.jpg'),
 WindowsPath('Data/test_data/food-101/images/sushi/46797.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/1082384.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/1335842.jpg'),
 WindowsPath('Data/test_data/food-101/images/pizza/3497151.jpg'),
 WindowsPath('Data/test_data/food-101/images/sushi/715227.jpg'),
 WindowsPath('Data/test_data/food-101/images/pizza/2572488.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/3100563.jpg'),
 WindowsPath('Data/test_data/food-101/images/pizza/148765.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/831681.jpg'),
 WindowsPath('Data/test_data/food-101/images/pizza/771336.jpg'),
 WindowsPath('Data/test_data/food-101/images/pizza/482858.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/697562.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/2752603.jpg'),
 WindowsPath('Data/test_data/food-101/images/steak/1982192.jpg'),
 WindowsPath('Data/

In [96]:
# Creating target directory path
target_dir_name = f"../data/pizza_steak_sushi_{str(int(amount_to_get * 100))}_perecent"
print(f"Creating directory: '{target_dir_name}'")

# Setup the directories
target_dir = pathlib.Path(target_dir_name)

# Make the Directories
target_dir.mkdir(parents = True, exist_ok = True)



Creating directory: '../data/pizza_steak_sushi_20_perecent'


In [98]:
import shutil

for image_split in label_splits.keys():
    for image_path in label_splits[str(image_split)]:
        print(image_path)
        dest_dir = target_dir / image_split / image_path.parent.stem / image_path.name
        if not dest_dir.parent.is_dir():
            dest_dir.parent.mkdir(parents = True, exist_ok = True)
        print(f"[INFO] Copying {image_path} to {dest_dir}...")
        shutil.copy2(image_path, dest_dir)

Data\train_data\food-101\images\pizza\3269634.jpg
[INFO] Copying Data\train_data\food-101\images\pizza\3269634.jpg to ..\data\pizza_steak_sushi_20_perecent\train\pizza\3269634.jpg...
Data\train_data\food-101\images\pizza\1524655.jpg
[INFO] Copying Data\train_data\food-101\images\pizza\1524655.jpg to ..\data\pizza_steak_sushi_20_perecent\train\pizza\1524655.jpg...
Data\train_data\food-101\images\steak\2825100.jpg
[INFO] Copying Data\train_data\food-101\images\steak\2825100.jpg to ..\data\pizza_steak_sushi_20_perecent\train\steak\2825100.jpg...
Data\train_data\food-101\images\steak\225990.jpg
[INFO] Copying Data\train_data\food-101\images\steak\225990.jpg to ..\data\pizza_steak_sushi_20_perecent\train\steak\225990.jpg...
Data\train_data\food-101\images\steak\1839481.jpg
[INFO] Copying Data\train_data\food-101\images\steak\1839481.jpg to ..\data\pizza_steak_sushi_20_perecent\train\steak\1839481.jpg...
Data\train_data\food-101\images\pizza\38349.jpg
[INFO] Copying Data\train_data\food-101\