In [12]:
import json
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

In [13]:
from options import Options
options = Options(batch_size=16)

In [14]:
# constants
IF_DATA_AUGMENTATION = True
NUM_CLASSES = 2
IMAGE_WIDTH = IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS = 1
INPUT_SHAPE = [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS]

In [15]:
print("Load Config ...")
with open('./config/config_mac.json', 'r') as f:
    CONFIG = json.load(f)
ROOT_PATH = CONFIG["ROOT_PATH"]
print(f"ROOT_PATH: {ROOT_PATH}")
ROOT_PATH = os.path.expanduser(ROOT_PATH)
print(f"ROOT_PATH: {ROOT_PATH}")
TRAIN_DATA_DIR = os.path.join(ROOT_PATH, CONFIG["TRAIN_DATA_DIR"])
print(f"TRAIN_DATA_DIR: {TRAIN_DATA_DIR}")

Load Config ...
ROOT_PATH: /Volumes/MacSDCard/DeepLearningData/semi-conductor-image-classification-first
ROOT_PATH: /Volumes/MacSDCard/DeepLearningData/semi-conductor-image-classification-first
TRAIN_DATA_DIR: /Volumes/MacSDCard/DeepLearningData/semi-conductor-image-classification-first/data/origin/train/


In [16]:
classes = ["good_0", "bad_1"]

print('Using real-time data augmentation.')
print("Training Generator...")
train_datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
    rotation_range=15,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    subset='training',
    target_size=IMAGE_SIZE,
    classes=classes,
    color_mode="grayscale",
    class_mode='categorical',
    batch_size=options.batch_size,
    shuffle=True,
    seed=42
)

print("Validation Generator...")
valid_datagen = ImageDataGenerator(validation_split=0.2, rescale=1./255)
validation_generator = valid_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    subset='validation',
    target_size=IMAGE_SIZE,
    classes=classes,
    color_mode="grayscale",
    class_mode='categorical',
    batch_size=options.batch_size,
    shuffle=True,
    seed=42
)

print("Train class_indices: ", train_generator.class_indices)
print("Val class_indices: ", validation_generator.class_indices)

Using real-time data augmentation.
Training Generator...
Found 24000 images belonging to 2 classes.
Validation Generator...
Found 6000 images belonging to 2 classes.
Train class_indices:  {'good_0': 0, 'bad_1': 1}
Val class_indices:  {'good_0': 0, 'bad_1': 1}


In [20]:
len(train_generator.filenames)

24000

In [21]:
len(validation_generator.filenames)

6000

In [27]:
print(f"classes: {classes}")
# l = validation_generator.filenames
l = train_generator.filenames
count = 0
for f in l:
    if f.startswith(classes[0]):
        count += 1
print(f"count: {count}")

classes: ['good_0', 'bad_1']
count: 21600
