In [None]:
import torch
import torchvision
from torchvision import datasets
import os
from pathlib import Path

In [None]:
data_dir = Path("data/")
image_path = data_dir / "pizza_hamburger_lasagna_sushi_steak"
train_path = image_path / "train"
test_path = image_path / "test"

isExist = os.path.exists(image_path)

if isExist:
  print("Directory already eixsts")
else:
  print("Creating directory")
  # Create the main train and test directories
  os.makedirs(test_path)
  os.makedirs(train_path)

food_categories = ["pizza", "hamburger", "lasagna", "sushi", "steak"]

# Create subdirectories for each food category in both train and test folders
for category in food_categories:
  train_category_path = train_path / category
  test_category_path = test_path / category

  # Create the folder if it doesn't exist already
  os.makedirs(train_category_path, exist_ok=True)
  os.makedirs(test_category_path, exist_ok=True)

Creating directory


In [None]:
train_data = datasets.Food101(root=data_dir,
                              split="train",
                              download=True
)
test_data = datasets.Food101(root=data_dir,
                             split="test",
                             download=True)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data/food-101.tar.gz


100%|██████████| 5.00G/5.00G [02:51<00:00, 29.1MB/s]


Extracting data/food-101.tar.gz to data


In [None]:
import random
from typing import List

food101_path = data_dir / "food-101" / "images"

def get_images(data_splits=["train", "test"],
               target_classes=["pizza", "hamburger", "lasagna", "sushi", "steak"],
               amount=0.4):
  random.seed(42)

  Id_dict = {"train": [],
             "test": []}

  for data_split in data_splits:
    print(f"Creating image split for {data_split}")
    path = data_dir / "food-101" / "meta" / (data_split + ".txt")

    with open(path, "r") as f:
      lines = f.readlines()
      for line in lines:
        line = line.strip()

        class_name = line.split("/")[0]

        if class_name in target_classes:
          image_dir_path = Path(f"data/food-101/images/{line}.jpg")
          # Fill up the dictionary with all the data paths for the images
          Id_dict[data_split].append(image_dir_path)

    # Now randomly pick 70% of those image paths to keep.
    values = Id_dict[data_split]
    num_to_keep = round(len(values) * amount)
    randomly_chosen_image_paths = random.sample(values, k=num_to_keep)
    Id_dict[data_split] = randomly_chosen_image_paths

  return Id_dict





In [None]:
Id_dict = get_images()

Creating image split for train
Creating image split for test


In [None]:
Id_dict

{'train': [PosixPath('data/food-101/images/steak/2796102.jpg'),
  PosixPath('data/food-101/images/hamburger/3289634.jpg'),
  PosixPath('data/food-101/images/hamburger/1492254.jpg'),
  PosixPath('data/food-101/images/sushi/1165660.jpg'),
  PosixPath('data/food-101/images/lasagna/2855003.jpg'),
  PosixPath('data/food-101/images/lasagna/2242849.jpg'),
  PosixPath('data/food-101/images/lasagna/1801532.jpg'),
  PosixPath('data/food-101/images/hamburger/3878886.jpg'),
  PosixPath('data/food-101/images/sushi/1070104.jpg'),
  PosixPath('data/food-101/images/hamburger/3101158.jpg'),
  PosixPath('data/food-101/images/steak/3470083.jpg'),
  PosixPath('data/food-101/images/sushi/1143725.jpg'),
  PosixPath('data/food-101/images/sushi/665556.jpg'),
  PosixPath('data/food-101/images/pizza/937915.jpg'),
  PosixPath('data/food-101/images/hamburger/2726558.jpg'),
  PosixPath('data/food-101/images/steak/1846706.jpg'),
  PosixPath('data/food-101/images/pizza/2148129.jpg'),
  PosixPath('data/food-101/image

In [None]:
import os
from pathlib import Path
import shutil

target_base_dir = Path("data/pizza_hamburger_lasagna_sushi_steak")
train_target = target_base_dir / "train"
test_target = target_base_dir / "test"

def copy_images(data_split=["train", "test"],
                target_path=None):
  for file_path in Id_dict[data_split]:
    food_category = file_path.parts[-2]

    food_category_dir = target_path / food_category

    shutil.copy(file_path, food_category_dir)



copy_images("train", train_target)
copy_images("test", test_target)
print("Successfully copied images to pizza_hamburger_lasagna_sushi_steak")

Successfully copied images to pizza_hamburger_lasagna_sushi_steak


In [None]:
# Path to the directory you want to zip
dir_to_zip = Path("data/pizza_hamburger_lasagna_sushi_steak")

# Create a zip file
output_filename = "pizza_hamburger_lasagna_sushi_steak"  # This will be the name of the output zip file

shutil.make_archive(output_filename, 'zip', root_dir=dir_to_zip)

print(f"'{output_filename}.zip' has been successfully created.")

'pizza_hamburger_lasagna_sushi_steak.zip' has been successfully created.
