# Augment the images in the directory which have less than 10 images per directory

## For balancing the images in each directory

In [53]:
import os                                     # interact with the operating system
import shutil                                 # operating on file like copying, create and del directories and file
from PIL import Image                         # image processing 
from multiprocessing.pool import ThreadPool   # parallelizing the process
from tqdm import tqdm                         # for progess bar
import glob                                   # for searcing the file with specific file pattern or name
import math
import random
from collections import Counter

In [54]:
import torch
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.utils import save_image
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.axes_grid import ImageGrid

## Checking the total no of images in each directory

In [55]:
processed_root="/home/ritushwar/Nutritional-Assistance/data"
total_img = 0
imgs_per_dir = []  # for making the list of images in each directory
total_dirs = 0     # for calculating total no of directories
for dir in os.listdir(processed_root): 
    processed_class_path = os.path.join(processed_root, dir)
    if os.path.isdir(processed_class_path):
        total_dirs +=1
        img = 0            # for calculating total no of images in each directory
        for im in os.listdir(processed_class_path):
            img_path = os.path.join(processed_class_path, im)
            if os.path.isfile(img_path):
                img += 1
                total_img +=1
        imgs_per_dir.append(img)
        img = 0

print("Done")

Done


In [56]:
print(f"Total Image: {total_img}")
print(f"Total Directory: {total_dirs}")
print(f"Images per directory: {imgs_per_dir}")

Total Image: 49647
Total Directory: 4571
Images per directory: [8, 3, 20, 8, 8, 20, 6, 8, 20, 8, 3, 20, 21, 8, 8, 8, 8, 20, 8, 8, 20, 8, 20, 20, 8, 8, 8, 8, 8, 8, 8, 20, 21, 20, 8, 8, 20, 8, 20, 8, 20, 20, 8, 8, 8, 8, 6, 20, 20, 8, 6, 20, 20, 8, 8, 23, 8, 8, 20, 20, 7, 8, 12, 8, 8, 20, 8, 20, 8, 8, 7, 20, 8, 20, 8, 8, 8, 8, 20, 2, 8, 5, 5, 8, 8, 6, 8, 8, 20, 6, 8, 20, 8, 8, 20, 20, 20, 8, 3, 6, 8, 9, 20, 8, 8, 8, 8, 6, 8, 8, 8, 20, 8, 8, 20, 21, 20, 8, 8, 7, 20, 28, 21, 20, 5, 8, 8, 8, 20, 8, 6, 8, 8, 8, 8, 8, 8, 6, 8, 20, 8, 8, 8, 20, 20, 7, 2, 2, 21, 8, 8, 8, 20, 8, 2, 22, 23, 8, 21, 5, 8, 8, 8, 8, 8, 20, 8, 5, 8, 20, 20, 8, 8, 20, 8, 8, 20, 20, 8, 20, 8, 8, 8, 7, 7, 8, 8, 3, 8, 8, 2, 8, 7, 8, 8, 6, 20, 20, 8, 8, 20, 8, 7, 20, 8, 8, 8, 8, 20, 20, 8, 8, 7, 8, 8, 8, 8, 8, 15, 8, 8, 6, 8, 20, 8, 20, 8, 24, 20, 8, 6, 8, 8, 20, 8, 6, 8, 24, 2, 8, 8, 2, 6, 8, 8, 8, 8, 20, 8, 8, 8, 24, 20, 8, 8, 8, 8, 20, 21, 8, 8, 8, 8, 8, 20, 8, 8, 20, 2, 8, 8, 2, 8, 21, 8, 20, 20, 21, 20, 8, 6, 8, 23, 8,

In [57]:
unq_images = set(imgs_per_dir)
print(unq_images)

{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}


## Making the dictionary

In [58]:
count_dict = Counter(imgs_per_dir)
count_dict

Counter({8: 2593,
         20: 944,
         6: 177,
         3: 167,
         21: 162,
         2: 156,
         7: 90,
         4: 66,
         22: 61,
         5: 34,
         23: 28,
         24: 26,
         11: 9,
         15: 8,
         13: 7,
         14: 6,
         12: 5,
         17: 5,
         9: 4,
         25: 4,
         27: 4,
         19: 3,
         10: 3,
         26: 3,
         28: 2,
         18: 2,
         16: 2})

In [60]:
transformations = [
    ("rotated", transforms.RandomRotation(45)),
    ("flipped", transforms.RandomHorizontalFlip(p=1.0)),  # Always flip
    ("color_jitter", transforms.ColorJitter(brightness=0.4, contrast=0.6, saturation=0.8)),
    ("crop", transforms.RandomCrop(size=(224, 224))),
]

In [62]:
def generate_image(dir_path, no_of_img):
    all_images = os.listdir(dir_path)
    num_images_to_create = 10-no_of_img
    num_images_to_select = math.ceil(num_images_to_create/4)
    random_indices = random.sample(range(no_of_img), num_images_to_select)
    for idx in random_indices:
        image_path = os.path.join(dir_path, all_images[idx])
        image = Image.open(image_path)
        for i in range(4):
            name, transform = transformations[i]
            augmented_image = transform(image)
            augmented_image.save(os.path.join(dir_path, f"{name}{all_images[idx]}"))

In [63]:
processed_root="/home/ritushwar/Nutritional-Assistance/data"
for dir in os.listdir(processed_root): 
    processed_class_path = os.path.join(processed_root, dir)
    if os.path.isdir(processed_class_path):
        num_of_img = len(os.listdir(processed_class_path))
        if num_of_img < 10:
            generate_image(processed_class_path, num_of_img)
print(f"Done")

Done


## Again checking the total images per directory

In [64]:
processed_root="/home/ritushwar/Nutritional-Assistance/data"
total_img = 0
imgs_per_dir = []  # for making the list of images in each directory
total_dirs = 0     # for calculating total no of directories
for dir in os.listdir(processed_root): 
    processed_class_path = os.path.join(processed_root, dir)
    if os.path.isdir(processed_class_path):
        total_dirs +=1
        img = 0            # for calculating total no of images in each directory
        for im in os.listdir(processed_class_path):
            img_path = os.path.join(processed_class_path, im)
            if os.path.isfile(img_path):
                img += 1
                total_img +=1
        imgs_per_dir.append(img)
        img = 0

print("Done")

Done


In [65]:
print(f"Total Image: {total_img}")
print(f"Total Directory: {total_dirs}")
print(f"Images per directory: {imgs_per_dir}")

Total Image: 64487
Total Directory: 4571
Images per directory: [12, 11, 20, 12, 12, 20, 10, 12, 20, 12, 11, 20, 21, 12, 12, 12, 12, 20, 12, 12, 20, 12, 20, 20, 12, 12, 12, 12, 12, 12, 12, 20, 21, 20, 12, 12, 20, 12, 20, 12, 20, 20, 12, 12, 12, 12, 10, 20, 20, 12, 10, 20, 20, 12, 12, 23, 12, 12, 20, 20, 11, 12, 12, 12, 12, 20, 12, 20, 12, 12, 11, 20, 12, 20, 12, 12, 12, 12, 20, 10, 12, 13, 13, 12, 12, 10, 12, 12, 20, 10, 12, 20, 12, 12, 20, 20, 20, 12, 11, 10, 12, 13, 20, 12, 12, 12, 12, 10, 12, 12, 12, 20, 12, 12, 20, 21, 20, 12, 12, 11, 20, 28, 21, 20, 13, 12, 12, 12, 20, 12, 10, 12, 12, 12, 12, 12, 12, 10, 12, 20, 12, 12, 12, 20, 20, 11, 10, 10, 21, 12, 12, 12, 20, 12, 10, 22, 23, 12, 21, 13, 12, 12, 12, 12, 12, 20, 12, 13, 12, 20, 20, 12, 12, 20, 12, 12, 20, 20, 12, 20, 12, 12, 12, 11, 11, 12, 12, 11, 12, 12, 10, 12, 11, 12, 12, 10, 20, 20, 12, 12, 20, 12, 11, 20, 12, 12, 12, 12, 20, 20, 12, 12, 11, 12, 12, 12, 12, 12, 15, 12, 12, 10, 12, 20, 12, 20, 12, 24, 20, 12, 10, 12, 12, 20, 

In [66]:
unq_images = set(imgs_per_dir)
print(unq_images)

{10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}


In [67]:
count_dict = Counter(imgs_per_dir)
count_dict

Counter({12: 2664,
         20: 944,
         10: 336,
         11: 266,
         21: 162,
         22: 61,
         13: 45,
         23: 28,
         24: 26,
         15: 8,
         14: 6,
         17: 5,
         25: 4,
         27: 4,
         19: 3,
         26: 3,
         28: 2,
         18: 2,
         16: 2})

# Randomly delete the images from directory which have more than 12 images

In [68]:
def del_image(dir_path, no_of_img):
    print(dir_path)
    all_images = os.listdir(dir_path)
    num_images_to_delete = no_of_img - 12
    random_indices = random.sample(range(no_of_img), num_images_to_delete)
    for idx in random_indices:
        image_path = os.path.join(dir_path, all_images[idx])
        if os.path.exists(image_path):
            os.remove(image_path)

In [69]:
processed_root="/home/ritushwar/Nutritional-Assistance/data"
for dir in os.listdir(processed_root): 
    processed_class_path = os.path.join(processed_root, dir)
    if os.path.isdir(processed_class_path):
        num_of_img = len(os.listdir(processed_class_path))
        if num_of_img > 12:
            del_image(processed_class_path, num_of_img)
print(f"Done")

/home/ritushwar/Nutritional-Assistance/data/dish_1550862993
/home/ritushwar/Nutritional-Assistance/data/dish_1551394781
/home/ritushwar/Nutritional-Assistance/data/dish_1551224283
/home/ritushwar/Nutritional-Assistance/data/dish_1551568285
/home/ritushwar/Nutritional-Assistance/data/dish_1550769483
/home/ritushwar/Nutritional-Assistance/data/dish_1551135590
/home/ritushwar/Nutritional-Assistance/data/dish_1551563213
/home/ritushwar/Nutritional-Assistance/data/dish_1551378955
/home/ritushwar/Nutritional-Assistance/data/dish_1551397591
/home/ritushwar/Nutritional-Assistance/data/dish_1550708556
/home/ritushwar/Nutritional-Assistance/data/dish_1551390528
/home/ritushwar/Nutritional-Assistance/data/dish_1550778401
/home/ritushwar/Nutritional-Assistance/data/dish_1550769725
/home/ritushwar/Nutritional-Assistance/data/dish_1551380856
/home/ritushwar/Nutritional-Assistance/data/dish_1551566547
/home/ritushwar/Nutritional-Assistance/data/dish_1557863104
/home/ritushwar/Nutritional-Assistance/d

## Again Check the total images

In [70]:
processed_root="/home/ritushwar/Nutritional-Assistance/data"
total_img = 0
imgs_per_dir = []  # for making the list of images in each directory
total_dirs = 0     # for calculating total no of directories
for dir in os.listdir(processed_root): 
    processed_class_path = os.path.join(processed_root, dir)
    if os.path.isdir(processed_class_path):
        total_dirs +=1
        img = 0            # for calculating total no of images in each directory
        for im in os.listdir(processed_class_path):
            img_path = os.path.join(processed_class_path, im)
            if os.path.isfile(img_path):
                img += 1
                total_img +=1
        imgs_per_dir.append(img)
        img = 0

print("Done")

Done


In [71]:
print(f"Total Image: {total_img}")
print(f"Total Directory: {total_dirs}")
print(f"Images per directory: {imgs_per_dir}")

Total Image: 53914
Total Directory: 4571
Images per directory: [12, 11, 12, 12, 12, 12, 10, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 10, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 12, 11, 10, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 11, 10, 10, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 12, 12, 11, 12, 12, 10, 12, 11, 12, 12, 10, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 12, 

In [72]:
count_dict = Counter(imgs_per_dir)
count_dict

Counter({12: 3969, 10: 336, 11: 266})