In [None]:
import torch
import torchvision
from torchvision import datasets
import os
from pathlib import Path

In [None]:
target_classes = [
    "apple_pie",
    "baby_back_ribs",
    "beet_salad",
    "beignets",
    "bibimbap",
    "breakfast_burrito",
    "bruschetta",
    "caesar_salad",
    "cannoli",
    "caprese_salad",
    "carrot_cake",
    "ceviche",
    "cheesecake",
    "chicken_curry",
    "chicken_quesadilla",
    "chicken_wings",
    "chocolate_cake",
    "chocolate_mousse",
    "churros",
    "clam_chowder",
    "club_sandwich",
    "crab_cakes",
    "creme_brulee",
    "croque_madame",
    "cup_cakes",
    "dumplings",
    "edamame",
    "eggs_benedict",
    "falafel",
    "filet_mignon",
    "fish_and_chips",
    "french_fries",
    "french_onion_soup",
    "french_toast",
    "fried_calamari",
    "frozen_yogurt",
    "garlic_bread",
    "greek_salad",
    "grilled_cheese_sandwich",
    "grilled_salmon",
    "guacamole",
    "hamburger",
    "huevos_rancheros",
    "hummus",
    "ice_cream",
    "lasagna",
    "lobster_bisque",
    "macaroni_and_cheese",
    "miso_soup",
    "mussels",
    "omelette",
    "onion_rings",
    "pad_thai",
    "paella",
    "pancakes",
    "panna_cotta",
    "peking_duck",
    "pho",
    "pizza",
    "pork_chop",
    "poutine",
    "prime_rib",
    "ramen",
    "ravioli",
    "risotto",
    "samosa",
    "scallops",
    "seaweed_salad",
    "shrimp_and_grits",
    "spaghetti_bolognese",
    "spaghetti_carbonara",
    "spring_rolls",
    "steak",
    "strawberry_shortcake",
    "sushi",
    "tacos",
    "takoyaki",
    "tiramisu",
    "tuna_tartare",
    "waffles"
]

In [None]:
data_dir = Path("data/")
image_path = data_dir / "food80_classes"
train_path = image_path / "train"
test_path = image_path / "test"

isExist = os.path.exists(image_path)

if isExist:
  print("Directory already eixsts")
else:
  print("Creating directory")
  # Create the main train and test directories
  os.makedirs(test_path)
  os.makedirs(train_path)

food_categories = target_classes

# Create subdirectories for each food category in both train and test folders
for category in food_categories:
  train_category_path = train_path / category
  test_category_path = test_path / category

  # Create the folder if it doesn't exist already
  os.makedirs(train_category_path, exist_ok=True)
  os.makedirs(test_category_path, exist_ok=True)

Creating directory


In [None]:
train_data = datasets.Food101(root=data_dir,
                              split="train",
                              download=True
)
test_data = datasets.Food101(root=data_dir,
                             split="test",
                             download=True)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data/food-101.tar.gz


100%|██████████| 5.00G/5.00G [00:24<00:00, 206MB/s]


Extracting data/food-101.tar.gz to data


In [None]:
len(target_classes)

80

In [None]:
import random
from typing import List

food101_path = data_dir / "food-101" / "images"

def get_images(data_splits=["train", "test"],
               target_classes=target_classes,
               amount=0.7):
  random.seed(42)

  Id_dict = {"train": [],
             "test": []}

  for data_split in data_splits:
    print(f"Creating image split for {data_split}")
    path = data_dir / "food-101" / "meta" / (data_split + ".txt")

    with open(path, "r") as f:
      lines = f.readlines()
      for line in lines:
        line = line.strip()

        class_name = line.split("/")[0]

        if class_name in target_classes:
          image_dir_path = Path(f"data/food-101/images/{line}.jpg")
          # Fill up the dictionary with all the data paths for the images
          Id_dict[data_split].append(image_dir_path)

    # Now randomly pick 70% of those image paths to keep.
    values = Id_dict[data_split]
    num_to_keep = round(len(values) * amount)
    randomly_chosen_image_paths = random.sample(values, k=num_to_keep)
    Id_dict[data_split] = randomly_chosen_image_paths

  return Id_dict





In [None]:
Id_dict = get_images()

Creating image split for train
Creating image split for test


In [None]:
Id_dict

{'train': [PosixPath('data/food-101/images/panna_cotta/477914.jpg'),
  PosixPath('data/food-101/images/caprese_salad/372704.jpg'),
  PosixPath('data/food-101/images/beet_salad/1590794.jpg'),
  PosixPath('data/food-101/images/risotto/3852551.jpg'),
  PosixPath('data/food-101/images/cup_cakes/1138268.jpg'),
  PosixPath('data/food-101/images/crab_cakes/2291775.jpg'),
  PosixPath('data/food-101/images/clam_chowder/2836930.jpg'),
  PosixPath('data/food-101/images/cheesecake/174793.jpg'),
  PosixPath('data/food-101/images/risotto/2292969.jpg'),
  PosixPath('data/food-101/images/cannoli/871667.jpg'),
  PosixPath('data/food-101/images/pork_chop/1419390.jpg'),
  PosixPath('data/food-101/images/risotto/3554045.jpg'),
  PosixPath('data/food-101/images/tiramisu/845342.jpg'),
  PosixPath('data/food-101/images/macaroni_and_cheese/3321771.jpg'),
  PosixPath('data/food-101/images/caesar_salad/304497.jpg'),
  PosixPath('data/food-101/images/onion_rings/307615.jpg'),
  PosixPath('data/food-101/images/ga

In [None]:
import os
from pathlib import Path
import shutil

target_base_dir = Path("data/food80_classes")
train_target = target_base_dir / "train"
test_target = target_base_dir / "test"

def copy_images(data_split=["train", "test"],
                target_path=None):
  for file_path in Id_dict[data_split]:
    food_category = file_path.parts[-2]

    food_category_dir = target_path / food_category

    shutil.copy(file_path, food_category_dir)



copy_images("train", train_target)
copy_images("test", test_target)
print("Successfully copied images to food80_classes")

Successfully copied images to food80_classes


In [None]:
# Renaming directory

import os

os.rename("data/food80_classes", "data/food80_dataset")

In [None]:
# Path to the directory you want to zip
dir_to_zip = Path("data/food80_dataset")

# Create a zip file
output_filename = "food80_multiclass_dataset"  # This will be the name of the output zip file

shutil.make_archive(output_filename, 'zip', root_dir=dir_to_zip)

print(f"'{output_filename}.zip' has been successfully created.")

'food80_dataset.zip' has been successfully created.
