In [1]:
import os
import shutil
# import cv2
# import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import numpy as np
import magic
# from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
path = "alfagift\\images"
merge_dir = "alfagift\\merge"
list = os.listdir(path)
current_path = "./"
nyemil_dir = "dataset\\nyemil\\nyemil_image"
dataset_dir = "datasets"
klik_indomaret_makanan = "dataset_makanan"
klik_indomaret_minuman = "dataset_minuman"


In [3]:
os.makedirs(merge_dir, exist_ok=True)

In [5]:

def copy_to_merge_dir() -> None:
    """
    Copies files from subdirectories of 'path' to corresponding subdirectories in 'merge_dir'.

    This function:
    - Iterates through subdirectories in 'path'.
    - Replaces underscores in filenames with spaces.
    - Copies files to corresponding subdirectories in 'merge_dir'.

    Parameters:
    None

    Returns:
    None
    """
    for i in os.listdir(path):
        curr_dir = os.listdir(os.path.join(path, i))
        src_dir = os.path.join(path, i)
        for j in curr_dir:
            r = j.replace("_", " ")
            shutil.copyfile(os.path.join(src_dir, j), os.path.join(os.path.join(merge_dir, i), r))


In [6]:
def copyf_makanan_minuman() -> None:
    """
    Copies files from specific subdirectories of 'alfagift' to corresponding subdirectories in 'merge_dir' if they do not already exist.

    This function:
    - Iterates through subdirectories in 'alfagift'.
    - Checks if the subdirectory name is in a predefined list.
    - For each file in the subdirectory, checks if the file exists in the corresponding subdirectory in 'merge_dir'.
    - Copies the file if it does not already exist in the target directory.

    Parameters:
    None

    Returns:
    None
    """
    for i in os.listdir("alfagift"):
        if i in list:
            for j in os.listdir(f"alfagift/{i}"):
                if not j in os.listdir(os.path.join(merge_dir, i)):
                    shutil.copyfile(os.path.join("alfagift", i, j), os.path.join(merge_dir, i, j))


In [7]:

def format_file(j: str) -> str:
    """
    Reformat a filename by merging the last two segments of the filename before the extension.

    Parameters:
    j (str): The filename to be reformatted.

    Returns:
    str: The reformatted filename.
    """
    j_splt = j.split()
    tmp: str = j_splt[-1]
    j_splt.pop(-1)
    j_splt[-1] += tmp
    
    j: str = ' '.join(j_splt)

    return j




def format_file_ext() -> None:
    """
    Formats and renames files within subdirectories of 'merge_dir' based on specific criteria.

    This function:
    - Iterates through subdirectories in 'merge_dir'.
    - Applies specific formatting to filenames.
    - Renames files based on their format.

    Parameters:
    None

    Returns:
    None
    """
    for i in os.listdir(merge_dir):
        curr_dir: str = os.path.join(merge_dir, i)
        for j in os.listdir(curr_dir):
            if j.endswith(".jpg"):
                pass
                r: str = j[:-4]
                r = format_file(r)
                os.rename(os.path.join(curr_dir, j), os.path.join(curr_dir, r)+".jpg")
            elif j[0] == "[":
                pass
                r = j[j.index("]")+1:]
                r = format_file(r)
                os.rename(os.path.join(curr_dir, j), os.path.join(curr_dir, r)+".jpg")
                
            else:
                pass
                r = format_file(j)
                os.rename(os.path.join(curr_dir, j), os.path.join(curr_dir, r)+".jpg")

In [8]:
def remove_special_char(x):
    x = x.replace("(","").replace(")","")
    return x+".jpg"

def format_nyemil_dataset() -> None:
    for i in os.listdir(nyemil_dir):
        r = remove_special_char(i)
        os.rename(os.path.join(nyemil_dir, i), os.path.join(nyemil_dir, r))

In [9]:
def get_file_type(filename):
    mime = magic.Magic(mime=True)
    return mime.from_file(filename)

In [10]:
def make_dataset_dir() -> None:
    os.makedirs(dataset_dir, exist_ok=True)
    shutil.copytree("alfagift/merge/makanan", "datasets/makanan")
    shutil.copytree("alfagift/merge/minuman", "datasets/minuman")


In [11]:
# copy_to_merge_dir()
# copyf_makanan_minuman()
# format_file_ext()
# format_nyemil_dataset()
make_dataset_dir()

In [12]:

def filter_image_datasets() -> None:
    """
    Filters and organizes image files within subdirectories of 'dataset_dir'.

    This function:
    - Iterates through subdirectories in 'dataset_dir'.
    - Checks if a file in the subdirectories is an image.
    - If the file is an image, it creates a subdirectory named after the file (excluding its extension) and moves the file there.
    - If the file is not an image, it removes the file.
    - Handles exceptions where files are not found during the process.

    Parameters:
    None

    Returns:
    None
    """
    for i in os.listdir(dataset_dir):
        curr_dir = os.path.join(dataset_dir, i)
        for j in os.listdir(curr_dir):
            try:
                if get_file_type(os.path.join(curr_dir, j))[:5] == "image":
                    os.makedirs(os.path.join(curr_dir, j[:-4]), exist_ok=True)
                    shutil.move(os.path.join(curr_dir, j), os.path.join(os.path.join(curr_dir, j[:-4]), j))
                else:
                    os.remove(os.path.join(curr_dir, j))
            except:
                print("file not found")

In [13]:

def copy_n_filter_indo_dataset() -> None:
    """
    Filters and copies image files from 'klik_indomaret_makanan' and 'klik_indomaret_minuman'
    to corresponding subdirectories in 'dataset_dir'.

    This function:
    - Iterates through files in 'klik_indomaret_makanan' and 'klik_indomaret_minuman'.
    - Checks if a file in the source directories matches any subdirectory name (case-insensitive) in 'dataset_dir'.
    - If the file is an image, it is copied to the appropriate subdirectory.
    - If the file is not an image, it is removed from the source directories.
    - Creates new subdirectories in 'dataset_dir' if needed and copies files accordingly.

    Parameters:
    None

    Returns:
    None
    """
    for i in os.listdir(klik_indomaret_makanan):
        for j in os.listdir(os.path.join(dataset_dir, "makanan")):
            if ''.join(j.split()).lower() in ''.join(i[:-4].split()).lower():
                if get_file_type(os.path.join(klik_indomaret_makanan, i))[:5] == "image":
                    if i in os.listdir(os.path.join(os.path.join(dataset_dir, "makanan"), j)):  
                        shutil.copyfile(os.path.join(klik_indomaret_makanan, i), os.path.join(os.path.join(os.path.join(dataset_dir, "makanan"), j), "1_"+i))
                    else:
                        shutil.copyfile(os.path.join(klik_indomaret_makanan, i), os.path.join(os.path.join(os.path.join(dataset_dir, "makanan"), j), "1_"+i))
                else:
                    print("not a image")
                    os.remove(os.path.join(klik_indomaret_makanan, i))
                break
        if not ''.join(i[:-4].split()).lower() in [''.join(x.split()).lower() for x in os.listdir(os.path.join(dataset_dir, "makanan"))]:
            if get_file_type(os.path.join(klik_indomaret_makanan, i))[:5] == "image":
                os.makedirs(os.path.join(os.path.join(dataset_dir, "makanan"), i[:-4]), exist_ok=True)
                shutil.copyfile(os.path.join(klik_indomaret_makanan, i), os.path.join(os.path.join(os.path.join(dataset_dir, "makanan"), i[:-4]), i))
            else:
                print("not a image")
                os.remove(os.path.join(klik_indomaret_makanan, i))

    for i in os.listdir(klik_indomaret_minuman):
        for j in os.listdir(os.path.join(dataset_dir, "minuman")):
            if ''.join(j.split()).lower() in ''.join(i[:-4].split()).lower():
                if get_file_type(os.path.join(klik_indomaret_minuman, i))[:5] == "image":
                    if i in os.listdir(os.path.join(os.path.join(dataset_dir, "minuman"), j)):  
                        shutil.copyfile(os.path.join(klik_indomaret_minuman, i), os.path.join(os.path.join(os.path.join(dataset_dir, "minuman"), j), "1_"+i))
                    else: 
                        shutil.copyfile(os.path.join(klik_indomaret_minuman, i), os.path.join(os.path.join(os.path.join(dataset_dir, "minuman"), j), "1_"+i))
                    
                else:
                    print("not a image")
                    os.remove(os.path.join(klik_indomaret_minuman, i))
                break
        if not ''.join(i[:-4].split()).lower() in [''.join(x.split()).lower() for x in os.listdir(os.path.join(dataset_dir, "minuman"))]:
            if get_file_type(os.path.join(klik_indomaret_minuman, i))[:5] == "image":
                os.makedirs(os.path.join(os.path.join(dataset_dir, "minuman"), i[:-4]), exist_ok=True)
                shutil.copyfile(os.path.join(klik_indomaret_minuman, i), os.path.join(os.path.join(os.path.join(dataset_dir, "minuman"), i[:-4]), i))
                
            else:
                print("not a image")
                os.remove(os.path.join(klik_indomaret_minuman, i))
        




In [14]:
def copy_n_filter_nyemil() -> None:
    """
    Filters and copies image files from the 'nyemil_dir' to corresponding subdirectories in 'dataset_dir'.
    
    This function:
    - Iterates through files in 'nyemil_dir'.
    - Checks if a file in 'nyemil_dir' matches any subdirectory name (case-insensitive) in 'dataset_dir'.
    - If the file is an image, it is copied to the appropriate subdirectory.
    - If the file is not an image, it is removed from 'nyemil_dir'.
    - Creates new subdirectories in 'dataset_dir' if needed and copies files accordingly.
    
    Parameters:
    None
    
    Returns:
    None
    """
    for i in os.listdir(nyemil_dir):
        for t in os.listdir(dataset_dir):
            for j in os.listdir(os.path.join(dataset_dir, t)):
                if ''.join(j.split()).lower() in ''.join(i[:-4].split()).lower():
                    if get_file_type(os.path.join(nyemil_dir, i))[:5] == "image":
                        if i in os.listdir(os.path.join(os.path.join(dataset_dir, t), j)):  
                            shutil.copyfile(os.path.join(nyemil_dir, i), os.path.join(os.path.join(os.path.join(dataset_dir, t), j), "2_"+i))
                        else: 
                            shutil.copyfile(os.path.join(nyemil_dir, i), os.path.join(os.path.join(os.path.join(dataset_dir, t), j), "2_"+i))
                    else:
                        print("not a image")
                        os.remove(os.path.join(nyemil_dir, i))
                    break
            if not ''.join(i[:-4].split()).lower() in [''.join(x.split()).lower() for x in os.listdir(os.path.join(dataset_dir, t))]:
                if get_file_type(os.path.join(nyemil_dir, i))[:5] == "image":
                    os.makedirs(os.path.join(os.path.join(dataset_dir, t), i[:-4]), exist_ok=True)
                    shutil.copyfile(os.path.join(nyemil_dir, i), os.path.join(os.path.join(os.path.join(dataset_dir, t), i[:-4]), i))
                else:
                    print("not a image")
                    os.remove(os.path.join(nyemil_dir, i))

In [15]:
filter_image_datasets()
copy_n_filter_indo_dataset()

In [16]:
# check total images
count_i = 0
for i in os.listdir(os.path.join(dataset_dir, "makanan")):
    if len(os.listdir(os.path.join(os.path.join(dataset_dir, "makanan"), i))) == 3:
        count_i += 1
        
print(count_i, len(os.listdir(os.path.join(dataset_dir, "makanan")))*200)

0 620800


In [17]:

datagen = ImageDataGenerator(
    rescale=1./255,  
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen1 = ImageDataGenerator(
    rescale=1./255,  
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)
def generate_di(seq, src, cdir):
    """
    Generates augmented images using color transformations and saves them to the specified directory.

    Parameters:
    seq (int): The number of augmented images to generate.
    src (str): The filename of the source image.
    cdir (str): The directory where the source image is located and where the augmented images will be saved.

    Returns:
    None
    """
    img = load_img(os.path.join(cdir,src))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    
    # Generate and save augmented images
    total_generated = 0
    for batch in datagen.flow(img, batch_size=1, save_to_dir=cdir, save_prefix=src[:-4], save_format='jpg'):
        total_generated += batch.shape[0]
        if total_generated >= seq:  
            break
def generate_di_g(seq, src, cdir):
    """
    Generates augmented grayscale images using additional transformations and saves them to the specified directory.

    Parameters:
    seq (int): The number of augmented images to generate.
    src (str): The filename of the source image.
    cdir (str): The directory where the source image is located and where the augmented images will be saved.

    Returns:
    None
    """
    img = load_img(os.path.join(cdir,src), color_mode='grayscale')
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    
    # Generate and save augmented images
    total_generated = 0
    for batch in datagen1.flow(img, batch_size=1, save_to_dir=cdir, save_prefix=f"{src[:-4]} - gray", save_format='jpg'):
        total_generated += batch.shape[0]
        if total_generated >= seq:  
            break


In [22]:
# process_directory(dataset_alfagift)

3 Ayam Mie Telor Super Kuning 200G
Processing 3 Ayam Mie Telor Super Kuning 200G.jpg in datasets\makanan\3 Ayam Mie Telor Super Kuning 200G, index 0 of 1
3 Ayam Mie Telor Super Merah 200G
Processing 3 Ayam Mie Telor Super Merah 200G.jpg in datasets\makanan\3 Ayam Mie Telor Super Merah 200G, index 0 of 1
5 Days Croissant Chocolate 60G
Processing 5 Days Croissant Chocolate 60G.jpg in datasets\makanan\5 Days Croissant Chocolate 60G, index 0 of 1
5 Days Croissant Mix Berries 60G
Processing 5 Days Croissant Mix Berries 60G.jpg in datasets\makanan\5 Days Croissant Mix Berries 60G, index 0 of 1
5 Days Croissant Pandan Srikaya 60G
Processing 5 Days Croissant Pandan Srikaya 60G.jpg in datasets\makanan\5 Days Croissant Pandan Srikaya 60G, index 0 of 1
5DAYS Croissant Cheese 60g
Processing 1_5 Days Croissant Cheese 60G.jpg in datasets\makanan\5DAYS Croissant Cheese 60g, index 0 of 2
Processing 5DAYS Croissant Cheese 60g.jpg in datasets\makanan\5DAYS Croissant Cheese 60g, index 1 of 2
5DAYS Croiss

In [19]:
def process_directory(e, num):
    """
    Process the directory and augment files to ensure each subdirectory has 200 files.

    Parameters:
    e (str): The path to the main directory containing subdirectories to be processed.
    num (int): Total augment
    
    Returns:
    None
    """
    for i in os.listdir(e):
        index = 0
        dir_path_i = os.path.join(e, i)
        sub_dirs_j = os.listdir(dir_path_i)
        
        for j in sub_dirs_j:
            print(j)
            curr_dir = os.path.join(dir_path_i, j)
            files_k = os.listdir(curr_dir)
            num_files_k = len(files_k)
            remaining_augments = num - num_files_k
            max_aug = num // num_files_k

            for idx, k in enumerate(files_k):
                last = (idx == num_files_k - 1)
                y = max_aug // 2

                if last:
                    additional_aug = remaining_augments - (max_aug * num_files_k)
                    if num_files_k > 1:
                        generate_di(y, k, curr_dir)
                        generate_di_g(y + additional_aug, k, curr_dir)
                    else:
                        generate_di(y, k, curr_dir)
                        generate_di_g(y, k, curr_dir)
                    index = 0  
                else:
                    generate_di(y, k, curr_dir)
                    generate_di_g(y, k, curr_dir)
                    index += 1

                print(f"Processing {k} in {curr_dir}, index {idx} of {num_files_k}")


In [20]:
process_directory(dataset_dir, 5)

3 Ayam Mie Telor Super Kuning 200G
Processing 3 Ayam Mie Telor Super Kuning 200G.jpg in datasets\makanan\3 Ayam Mie Telor Super Kuning 200G, index 0 of 1
3 Ayam Mie Telor Super Merah 200G
Processing 3 Ayam Mie Telor Super Merah 200G.jpg in datasets\makanan\3 Ayam Mie Telor Super Merah 200G, index 0 of 1
5 Days Croissant Chocolate 60G
Processing 5 Days Croissant Chocolate 60G.jpg in datasets\makanan\5 Days Croissant Chocolate 60G, index 0 of 1
5 Days Croissant Mix Berries 60G
Processing 5 Days Croissant Mix Berries 60G.jpg in datasets\makanan\5 Days Croissant Mix Berries 60G, index 0 of 1
5 Days Croissant Pandan Srikaya 60G
Processing 5 Days Croissant Pandan Srikaya 60G.jpg in datasets\makanan\5 Days Croissant Pandan Srikaya 60G, index 0 of 1
5DAYS Croissant Cheese 60g
Processing 1_5 Days Croissant Cheese 60G.jpg in datasets\makanan\5DAYS Croissant Cheese 60g, index 0 of 2
Processing 5DAYS Croissant Cheese 60g.jpg in datasets\makanan\5DAYS Croissant Cheese 60g, index 1 of 2
5DAYS Croiss

In [24]:
# Split dataset into training and testing sets

train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, 'test')


os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)


for i in os.listdir(dataset_dir):
    sub_dir = os.listdir(os.path.join(dataset_dir, i))
    for j in sub_dir:
        cat_dir = os.path.join(os.path.join(dataset_dir, i), j)
        
        if os.path.isdir(cat_dir):
            os.makedirs(os.path.join(train_dir, j), exist_ok=True)
            os.makedirs(os.path.join(test_dir, j), exist_ok=True)
            
            images = os.listdir(cat_dir)
            
            train_images, test_images = train_test_split(images, test_size=0.02, random_state=42)
            
            for image in train_images:
                shutil.move(os.path.join(cat_dir, image), os.path.join(train_dir, j, image))
            for image in test_images:
                shutil.move(os.path.join(cat_dir, image), os.path.join(test_dir, j, image))

ValueError: With n_samples=0, test_size=0.02 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [31]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(200, 200),  
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(200, 200),  
    batch_size=32,
    class_mode='categorical'
)

Found 18671 images belonging to 4643 classes.
Found 4757 images belonging to 4643 classes.


In [32]:
classes_train = train_generator.class_indices
classes_test = test_generator.class_indices
print("Training samples:", train_generator.samples)
print("Testing samples:", test_generator.samples)

Training samples: 18671
Testing samples: 4757


In [33]:
print("Class Indicate", train_generator.class_indices)

Class Indicate {'3 Ayam Mie Telor Super Kuning 200G': 0, '3 Ayam Mie Telor Super Merah 200G': 1, '5 Days Croissant Chocolate 60G': 2, '5 Days Croissant Mix Berries 60G': 3, '5 Days Croissant Pandan Srikaya 60G': 4, '5DAYS Croissant Cheese 60g': 5, '5DAYS Croissant Creamy Chocolate 60g': 6, '5DAYS Croissant Sweet Mixed Berries 60g': 7, '5DAYS Croissant Sweet Pandan Srikaya 60g': 8, '7+ Cereal Bar Cocoa Chia 22g': 9, '7+ Cereal Bar Yoghurt Berry 22g': 10, '801 Kerupuk Palembang Keriting 75g': 11, 'A&W Minuman Soda Rasa Sarsaparila Kaleng 250ml': 12, 'A&W Soft Drink Sarsaparila 250mL': 13, 'A.T.B Biskuit Marie Susu 180g': 14, 'A.T.B Marie Milk 180G': 15, 'AAA Kopi Bubuk Bag 100g': 16, 'ABC Jus Apel 250ml': 17, 'ABC Jus Jambu 250ml': 18, 'ABC Jus Jeruk 250ml': 19, 'ABC Jus Leci 250ml': 20, 'ABC Jus Mangga 250ml': 21, 'ABC Jus Sirsak 250ml': 22, 'ABC Kopi Bubuk Bali 210g': 23, 'ABC Kopi Rasa Klepon Bag 5 x 25g': 24, 'ABC Kopi Susu Gula Aren 11 x 27g': 25, 'ABC Kopi Susu Instan 10 x 30g': 26