In [8]:
import os
import shutil
import splitfolders
import cv2

import pandas as pd
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt

from tensorflow.keras.utils import img_to_array, array_to_img, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [9]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [11]:
DATA_DIR = './datasets'
LABELED_DATA_DIR = f'{DATA_DIR}/raw_data'
MODEL_DATA_DIR = f'{DATA_DIR}/model_data'

In [4]:
df = pd.read_csv(f'{DATA_DIR}/metadata.csv')
df.shape

(760, 6)

In [5]:
unique_classes = df['label'].value_counts()
filtered_df = df[df['label'].isin(unique_classes[unique_classes>=10].index)]
filtered_df.shape

(638, 6)

In [6]:
classes = filtered_df['label'].unique()
num_of_classes = len(classes)
print(f'Total Number of Classes: {num_of_classes}')

Total Number of Classes: 38


In [44]:
try:
    for label in classes:
        os.makedirs(f'{LABELED_DATA_DIR}/{label}')
except Exception as e:
    print(e)

In [49]:
for i in range(len(filtered_df)):
    shutil.copy2(f'{DATA_DIR}/images/{filtered_df.iloc[i, 0]}',
                f'{LABELED_DATA_DIR}/{filtered_df.iloc[i, 1]}')

## Augmentation

In [22]:
datagen = ImageDataGenerator(zoom_range=0.1, fill_mode='nearest', rotation_range=0.1)

In [23]:
os.makedirs('./alt')

FileExistsError: [WinError 183] Cannot create a file when that file already exists: './alt'

In [24]:
try:
    for label in os.listdir('./short_dataset'):
        os.makedirs(f'./alt/{label}')
except Exception as e:
    print(e)

In [25]:
for label in os.listdir('./short_dataset'):
    files = os.listdir(f'./short_dataset/{label}')
    for image in files:
        shutil.copy2(f'./short_dataset/{label}/{image}', f'./alt/{label}')

In [26]:
folder_dir = './short_dataset'

for label in os.listdir('./short_dataset'):
    for image in os.listdir(f'{folder_dir}/{label}'):
        img = load_img(os.path.join(f'{folder_dir}/{label}', image))
        factor = 1.5
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)
        i = 0
        for batch in datagen.flow(x, batch_size=1,
                save_to_dir=f'./alt/{label}', save_format='png'):
            i += 1
            if i > 2:
                break

In [12]:
splitfolders.ratio(LABELED_DATA_DIR, output=MODEL_DATA_DIR,
    seed=1337, ratio=(.8, .1, .1), group_prefix=None, move=False)

Copying files: 982 files [00:07, 126.60 files/s]


In [32]:
data = {
    'image': [],
    'label': []
}

In [33]:
for label in classes:
    for image in os.listdir(f'./data/alt/{label}'):
        shutil.copy2(f'./data/alt/{label}/{image}',
                     f'./data/f_images')
        data['image'].append(image)
        data['label'].append(label)

temp = pd.DataFrame(data=data)
temp.to_csv('x.csv')