Manually upload binary_data.zip to the base file (not under any folder)

In [None]:
import torch
import torchvision
from torchvision import datasets
import os
from pathlib import Path

In [None]:
data_dir = Path("data/")
image_path = data_dir / "food_non-food"

train_path = image_path / "train"
test_path = image_path / "test"

isExist = os.path.exists(image_path)

if isExist:
  print("Directory already eixsts")
else:
  print("Creating directory")
  # Create the main train and test directories
  os.makedirs(test_path)
  os.makedirs(train_path)


categories = ["food", "non-food"]

food_categories = ["pizza", "hamburger", "lasagna", "sushi", "steak"]


for category in categories:
  train_category_path = train_path / category
  test_category_path = test_path / category

  # Create the folder if it doesn't exist already
  os.makedirs(train_category_path, exist_ok=True)
  os.makedirs(test_category_path, exist_ok=True)

  if category == "food":
    for food_category in food_categories:
      train_food_category_path = train_path / "food" / food_category
      test_food_category_path = test_path / "food" / food_category

      os.makedirs(train_food_category_path, exist_ok=True)
      os.makedirs(test_food_category_path, exist_ok=True)



Creating directory


In [None]:
train_category_path

PosixPath('data/food_non-food/train/non-food')

In [None]:
train_data = datasets.Food101(root=data_dir,
                              split="train",
                              download=True
)
test_data = datasets.Food101(root=data_dir,
                             split="test",
                             download=True)

Downloading https://data.vision.ee.ethz.ch/cvl/food-101.tar.gz to data/food-101.tar.gz


100%|██████████| 5.00G/5.00G [02:34<00:00, 32.3MB/s]


Extracting data/food-101.tar.gz to data


In [None]:
import random
from typing import List

food101_path = data_dir / "food-101" / "images"

def get_images(data_splits=["train", "test"],
               target_classes=["pizza", "hamburger", "lasagna", "sushi", "steak"],
               amount=0.3):
  random.seed(42)

  Id_dict = {"train": [],
             "test": []}

  for data_split in data_splits:
    print(f"Creating image split for {data_split}")
    path = data_dir / "food-101" / "meta" / (data_split + ".txt")

    with open(path, "r") as f:
      lines = f.readlines()
      for line in lines:
        line = line.strip()

        class_name = line.split("/")[0]

        if class_name in target_classes:
          image_dir_path = Path(f"data/food-101/images/{line}.jpg")
          # Fill up the dictionary with all the data paths for the images
          Id_dict[data_split].append(image_dir_path)

    # Now randomly pick {amount}% of those image paths to keep.
    values = Id_dict[data_split]
    num_to_keep = round(len(values) * amount)
    randomly_chosen_image_paths = random.sample(values, k=num_to_keep)
    Id_dict[data_split] = randomly_chosen_image_paths

  return Id_dict





In [None]:
Id_dict = get_images()

Creating image split for train
Creating image split for test


In [None]:
import os
from pathlib import Path
import shutil

target_base_dir = Path("data/food_non-food")

train_target = target_base_dir / "train" / "food"
test_target = target_base_dir / "test" / "food"

def copy_images(data_split=["train", "test"],
                target_path=None):
  for file_path in Id_dict[data_split]:
    food_category = file_path.parts[-2]

    food_category_dir = target_path / food_category

    shutil.copy(file_path, food_category_dir)


copy_images("train", train_target)
copy_images("test", test_target)
print("Successfully copied images")

Successfully copied images


In [None]:
# Transport all the images from its food categories into the general 'food' folder.

food_categories = ['hamburger', 'lasagna', 'pizza', 'sushi', 'steak']
splits = ["train", "test"]

for split in splits:
  foodnonfood_dir = Path("data/food_non-food")
  split_dir = foodnonfood_dir / split

  for category in food_categories:
    food_dir = split_dir / "food"
    food_category_dir = food_dir / category

    for image_id in os.listdir(food_category_dir):
      shutil.move(os.path.join(food_category_dir, image_id), food_dir)











Manually upload binary_data.zip


In [None]:
import zipfile

with zipfile.ZipFile('binary_data.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
import os
import random
import shutil

# Define the source and destination folders
source_folder = "binary_data/nonfood"
train_nonfood = "data/food_non-food/train/non-food"
test_nonfood = "data/food_non-food/test/non-food"

os.makedirs(train_nonfood, exist_ok=True)
os.makedirs(test_nonfood, exist_ok=True)

image_files = [f for f in os.listdir(source_folder)] # Get all the image_files/ids. For ex: '1234.jpg'

random.shuffle(image_files)


# Doing a 75-25 split. 75% of 1500 images is 1125.

train_split_images = image_files[:1125] # Split the first 1125 images to train
test_split_images = image_files[1125:] # Split all the images that come after (which is 375 images) and hand it to train

for image_id in train_split_images:
  shutil.move(os.path.join(source_folder, image_id), train_nonfood)


for image_id in test_split_images:
  shutil.move(os.path.join(source_folder, image_id), test_nonfood)


In [None]:
def walk_through_dir(dir_path):
    """
    Walks through dir_path returning its contents.
    Args:
    dir_path (str): target directory

    Returns:
    A print out of:
      number of subdiretories in dir_path
      number of images (files) in each subdirectory
      name of each subdirectory
    """
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
walk_through_dir("data/food_non-food")

There are 2 directories and 0 images in 'data/food_non-food'.
There are 2 directories and 0 images in 'data/food_non-food/train'.
There are 1 directories and 1125 images in 'data/food_non-food/train/food'.
There are 0 directories and 0 images in 'data/food_non-food/train/food/.ipynb_checkpoints'.
There are 0 directories and 1125 images in 'data/food_non-food/train/non-food'.
There are 2 directories and 0 images in 'data/food_non-food/test'.
There are 1 directories and 375 images in 'data/food_non-food/test/food'.
There are 0 directories and 0 images in 'data/food_non-food/test/food/.ipynb_checkpoints'.
There are 0 directories and 375 images in 'data/food_non-food/test/non-food'.


In [None]:
!rm -rf data/food-101/

In [None]:
# Turn directory into a zip file


import shutil
shutil.make_archive("binary_dataset", 'zip', "data")

'/content/binary_dataset.zip'