In [31]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models, layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

import splitfolders

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dataclasses import dataclass
from typing import Tuple
import optuna
from optuna.integration import TFKerasPruningCallback

Prepare training data
===

In [32]:
train_src = "asl_alphabet_train"
train_dir = 'datasets/train'
val_dir = 'datasets/val'
test_dir  = 'datasets/test'
splitfolders.ratio(train_src, output="datasets",
    seed=1337, ratio=(.8, .1, .1), group_prefix=None, move=False) # 產生trian(訓練集)、val(驗證集)、test(測試集)

Copying files: 223074 files [03:09, 1175.60 files/s]


Preprocessing and Get labels
===

In [38]:
@dataclass(frozen=True)
class dataconfig:
    batch_size: int = 16
    target_size: Tuple[int, int] = (224,224)

In [39]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
val_datagen   = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
test_datagen  = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
train_generator = train_datagen.flow_from_directory( # 多目錄時，目錄名為標籤
        train_dir,
        target_size=dataconfig.target_size,
        batch_size=dataconfig.batch_size,
        color_mode="grayscale",
        class_mode='categorical',
        shuffle=True)

val_generator = val_datagen.flow_from_directory(
        val_dir,
        target_size=dataconfig.target_size,
        batch_size=dataconfig.batch_size,
        color_mode="grayscale",
        class_mode='categorical',
        shuffle=False)

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=dataconfig.target_size,
        batch_size=dataconfig.batch_size,
        color_mode="grayscale",
        class_mode='categorical',
        shuffle=False)

Found 178447 images belonging to 29 classes.
Found 22296 images belonging to 29 classes.
Found 22332 images belonging to 29 classes.


In [40]:
labels = list(train_generator.class_indices.keys())
print(labels)

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


Model
===

In [46]:
@dataclass(frozen=True)
class modelconfig:
    num_classes: int = len(labels)
    input_shape: Tuple[int,int,int] = (224,224,1)
    dropout: float = 0.1

@dataclass(frozen=True)
class traingingconfig:
    checkpoint_path: str = "model/B_best_mode1.keras"
    epochs: int = 15

In [47]:
model = models.Sequential()
model.add(layers.Input(shape=modelconfig.input_shape))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(modelconfig.dropout))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(modelconfig.dropout))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(modelconfig.dropout))

model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(modelconfig.dropout))

model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(Dropout(modelconfig.dropout*2))
model.add(layers.Dense(modelconfig.num_classes, activation='softmax'))

checkpoint_path = "model/B_best_model_V1_3.keras"
checkpoint = ModelCheckpoint(checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=traingingconfig.epochs,
                    callbacks=[checkpoint],
                    verbose=1)

Epoch 1/15
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.0379 - loss: 3.4853
Epoch 1: val_accuracy improved from -inf to 0.03723, saving model to model/B_best_model_V1_3.keras
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m387s[0m 34ms/step - accuracy: 0.0379 - loss: 3.4853 - val_accuracy: 0.0372 - val_loss: 3.3582
Epoch 2/15
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0378 - loss: 3.3585
Epoch 2: val_accuracy did not improve from 0.03723
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 33ms/step - accuracy: 0.0378 - loss: 3.3585 - val_accuracy: 0.0365 - val_loss: 3.3582
Epoch 3/15
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0374 - loss: 3.3590
Epoch 3: val_accuracy did not improve from 0.03723
[1m11153/11153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 33ms/step - accuracy: 0.0374 - loss:


KeyboardInterrupt



In [48]:
scores = model.evaluate(test_generator) 
print('Test loss: ', scores[0])
print('Test accuracy: ', scores[1])

[1m1396/1396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 17ms/step - accuracy: 0.0582 - loss: 3.3363
Test loss:  3.358131170272827
Test accuracy:  0.03600214794278145


In [23]:
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
model_path = 'model/B_best_model_V1_3.keras'
model = load_model(model_path)

img_path = 'asl_alphabet_train/W/100.jpg'
img = image.load_img(img_path, color_mode='grayscale', target_size=(32, 32))  # 根据模型输入尺寸调整
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)  # 增加一个维度，用于批量预测
img_array = img_array / 255.0  # 归一化

# 使用模型进行预测
predictions = model.predict(img_array)
predicted_label = labels[np.argmax(predictions)]

# 输出预测结果
print(f"Predicted Label: {predicted_label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
Predicted Label: W
