In [2]:
import json
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img

In [3]:
from options import Options
options = Options(batch_size=16)

In [4]:
# constants
IF_DATA_AUGMENTATION = True
NUM_CLASSES = 2
IMAGE_WIDTH = IMAGE_HEIGHT = 224
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS = 1
INPUT_SHAPE = [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS]

In [5]:
print("If in eager mode: ", tf.executing_eagerly())
print("Use tensorflow version 2.")
assert tf.__version__[0] == "2"

print("Load config ...")
with open('./config/config_win.json', 'r') as f:
    CONFIG = json.load(f)
ROOT_PATH = CONFIG["ROOT_PATH"]
print(f"ROOT_PATH: {ROOT_PATH}")
ROOT_PATH = os.path.expanduser(ROOT_PATH)
print(f"ROOT_PATH: {ROOT_PATH}")
TRAIN_DATA_DIR = os.path.join(ROOT_PATH, CONFIG["TRAIN_DATA_DIR"])
print(f"TRAIN_DATA_DIR: {TRAIN_DATA_DIR}")
TEST_DATA_DIR = os.path.join(ROOT_PATH, CONFIG["TEST_DATA_DIR"])
print(f"TEST_DATA_DIR: {TEST_DATA_DIR}")

If in eager mode:  True
Use tensorflow version 2.
Load config ...
ROOT_PATH: D:\DeepLearningData\semi-conductor-image-classification-first
ROOT_PATH: D:\DeepLearningData\semi-conductor-image-classification-first
TRAIN_DATA_DIR: D:\DeepLearningData\semi-conductor-image-classification-first\data\origin\train\
TEST_DATA_DIR: D:\DeepLearningData\semi-conductor-image-classification-first\data\origin\test\all_tests


In [6]:
test_on_train = True

print("Prepare testing data...")
if test_on_train:
    num_samples = num_train = 30000
    label_names = os.listdir(TRAIN_DATA_DIR)
    filenames, labels = [], []
    for i, label in enumerate(label_names):
        files = os.listdir(os.path.join(TRAIN_DATA_DIR, label))
        for f in files:
            filenames.append(label+"/"+f)
            labels.append(i)  # 0 or 1
    table = np.asarray([filenames, labels])
    table = table.T
    columns = ["filename", "label"]
    # test on train dataset
    test_df = pd.DataFrame(data=table, columns=columns)
else:
    test_filenames = os.listdir(TEST_DATA_DIR)
    test_df = pd.DataFrame({
        'filename': test_filenames
    })
    num_samples = test_df.shape[0]

Prepare testing data...


In [17]:
# test_df["label"].dtypes
# test_df.dtypes
label = test_df["label"].to_numpy(dtype=int)
label

array([0, 0, 0, ..., 1, 1, 1])

In [8]:
classes = ["good_0", "bad_1"]

print('Using real-time data augmentation.')
print("Training Generator...")
train_datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
    rotation_range=15,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    subset='training',
    target_size=IMAGE_SIZE,
    classes=classes,
    color_mode="grayscale",
    class_mode='categorical',
    batch_size=options.batch_size,
    shuffle=True,
    seed=42
)

print("Validation Generator...")
valid_datagen = ImageDataGenerator(validation_split=0.2, rescale=1./255)
validation_generator = valid_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    subset='validation',
    target_size=IMAGE_SIZE,
    classes=classes,
    color_mode="grayscale",
    class_mode='categorical',
    batch_size=options.batch_size,
    shuffle=True,
    seed=42
)

print("Train class_indices: ", train_generator.class_indices)
print("Val class_indices: ", validation_generator.class_indices)

Using real-time data augmentation.
Training Generator...
Found 24000 images belonging to 2 classes.
Validation Generator...
Found 6000 images belonging to 2 classes.
Train class_indices:  {'good_0': 0, 'bad_1': 1}
Val class_indices:  {'good_0': 0, 'bad_1': 1}


In [9]:
len(train_generator.filenames)

24000

In [10]:
len(validation_generator.filenames)

6000

In [11]:
print(f"classes: {classes}")
# l = validation_generator.filenames
l = train_generator.filenames
count = 0
for f in l:
    if f.startswith(classes[0]):
        count += 1
print(f"count: {count}")

classes: ['good_0', 'bad_1']
count: 21600
