In [None]:
import os
from PIL import Image, ImageOps

from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [13]:
# setup folders etc
for split in ['training', 'evaluation', 'validation']:
    for category in ['Bread', 'Dairy product', 'Dessert', 'Egg', 'Fried food', 'Meat', 'Noodles-Pasta', 'Rice', 'Seafood', 'Soup', 'Vegetable-Fruit']:
        os.makedirs(f"data\\reshaped\\{split}\\{category}")

In [None]:
# get list of corrupted files

bad_files = []
for split in ['training', 'evaluation', 'validation']:
    path = f"data\\unprocessed\\{split}"
    categories = os.listdir(path)
    print(f"checking {split}")
    for category in categories:
        images = os.listdir(f"{path}\\{category}")
        # print(f"checking {split}\\{category}")
        for i in images:
            try:
                Image.open(f"{path}\\{category}\\{i}").verify()
            except Exception:
                bad_files.append(f"{path}\\{category}\\{i}")
print(f"The following files had been corrupted, couldn't be opened and have now been deleted: {bad_files}")

checking training
checking evaluation
checking validation
The following files had been corrupted, couldn't be opened and have now been deleted: []


In [None]:
# delete corrupted files

for file in bad_files:
    os.remove(file)

In [14]:
# resize with padding to square

def resize(input, output):
    img = Image.open(input).convert("RGB")
    img = ImageOps.pad(img, size=(224, 224), color=(0, 0, 0))
    img.save(output)

print("starting reshaping")
for split in ['training', 'evaluation', 'validation']:
    path = f"data\\unprocessed\\{split}"
    categories = os.listdir(path)
    print(f"re-shaping {split}")
    for category in categories:
        images = os.listdir(f"{path}\\{category}")
        print(f"re-shaping {split}\\{category}")
        for i in images:
            resize(
                input = f"{path}\\{category}\\{i}",
                output=f"data\\reshaped\\{split}\\{category}\\{i}"
                )
            

starting reshaping
re-shaping training
re-shaping training\Bread
re-shaping training\Dairy product
re-shaping training\Dessert
re-shaping training\Egg
re-shaping training\Fried food
re-shaping training\Meat
re-shaping training\Noodles-Pasta
re-shaping training\Rice
re-shaping training\Seafood
re-shaping training\Soup
re-shaping training\Vegetable-Fruit
re-shaping evaluation
re-shaping evaluation\Bread
re-shaping evaluation\Dairy product
re-shaping evaluation\Dessert
re-shaping evaluation\Egg
re-shaping evaluation\Fried food
re-shaping evaluation\Meat
re-shaping evaluation\Noodles-Pasta
re-shaping evaluation\Rice
re-shaping evaluation\Seafood
re-shaping evaluation\Soup
re-shaping evaluation\Vegetable-Fruit
re-shaping validation
re-shaping validation\Bread
re-shaping validation\Dairy product
re-shaping validation\Dessert
re-shaping validation\Egg
re-shaping validation\Fried food
re-shaping validation\Meat
re-shaping validation\Noodles-Pasta
re-shaping validation\Rice
re-shaping validatio

In [None]:
# images are now square.
# next step is to normalize them to 224 * 224
