In [2]:
import cv2
import time
import tensorflow as tf
import os
from keras import layers
from keras import models
import tensorflow as tf
from keras.src.activations.activations import gelu

#TensorflowのInfoとWarningを非表示
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


image_dir = r'C:\Users\ishigaki\Downloads\archive\Original Images\Original Images' # ローカルの画像データファイルパス

In [3]:
processed_images = []
labels = []
label_name = []
haarcascade_path = r'data\haarcascade_frontalface_default.xml'
print("Using Haar Cascade path:", haarcascade_path)
if not os.path.exists(haarcascade_path):
    raise FileNotFoundError(f'Haar Cascade XML file not found at {haarcascade_path}')
face_cascade = cv2.CascadeClassifier(haarcascade_path)

Using Haar Cascade path: data\haarcascade_frontalface_default.xml


In [4]:
from keras.src.utils.numerical_utils import to_categorical

for label, person_name in enumerate(os.listdir(image_dir)):
    person_dir = os.path.join(image_dir, person_name)
    if not os.path.isdir(person_dir):
        continue

    for image_name in os.listdir(person_dir):
        image_path = os.path.join(person_dir, image_name)
        image = cv2.imread(image_path)
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5)

        for (x, y, w, h) in faces:
            face = gray_image[y:y+h, x:x+w]
            resized_face = cv2.resize(face, (128, 128))
            processed_images.append(resized_face)
            labels.append(label)
    label_name.append(person_name)
    
labels = to_categorical(labels)

KeyboardInterrupt: 

In [None]:
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 256

# 前処理
def preprocess(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    label = tf.cast(label, tf.int32)
    return image, label

# データ拡張
def augmentation(image, label):   
    image = tf.image.resize_with_crop_or_pad(image, 40, 40)
    image = tf.image.random_crop(image, size = [32, 32, 3])
    image = tf.image.random_brightness(image, max_delta = 0.5)
    image = tf.clip_by_value(image, 0.0, 1.0)
    return image, label

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))

train_data = train_data.shuffle(1024)
train_data = train_data.map(preprocess, num_parallel_calls = AUTOTUNE)
train_data = train_data.map(augmentation, num_parallel_calls = AUTOTUNE)
train_data = train_data.batch(BATCH_SIZE)
train_data = train_data.prefetch(AUTOTUNE)

test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.map(preprocess, num_parallel_calls = AUTOTUNE)
test_data = test_data.batch(BATCH_SIZE)
test_data = test_data.prefetch(AUTOTUNE)



In [None]:
import tensorflow_addons as tfa

class ConvNeXt_Block(tf.keras.Layer):
    def __init__(self, in_channels, out_channels, factor):
        super().__init__()

        #ConvNeXtブロック1層目
        #ConvからDepthwiseConvへ変更しています。
        self.layer_1 = layers.DepthwiseConv2D(kernel_size = (7, 7), strides = (1, 1), padding = 'same', use_bias = False)
        
        #ConvNeXtブロック2層目
        #正規化をBatchNormalizationからLayerNormalizationへ変更しています。
        self.layer_2 = layers.LayerNormalization(epsion = 1e-6)
        #チャンネル数が4倍大きくなるInverted Bottleneck構造に変更しています。
        self.layer_3 = layers.Conv2D(4 * out_channels, kernel_size = 1, strides = 1, padding = 'valid', use_bias = False)

        #ConvNeXtブロック3層目
        #活性化関数をReLUからGELUに変更しています。
        self.layer_4 = layers.Activation(tf.nn.gelu)
        self.layer_5 = layers.Conv2D(out_channels, kernel_size = 1, strides = 1, padding = 'valid', use_bias = False)

        self.layer_6 = layers.LayerNormalization(epsilon = 1e-6)
        self.layer_7 = layers.Activation('linear')

        self.shortcut = self.short_cut(in_channels, out_channels)
        #StochasticDepthでランダムにショートカットのみとしています。
        self.stochastic = tfa.layers.StochasticDepth(factor)

    def short_cut(self, in_channels, out_channels):
        #ショートカットとの残差出力の際にチャンネル数が異なる場合は、ショートカットと合わせます。
        if in_channels != out_channels:
            self.ln_sc = layers.LayerNormalization()
            self.conv_sc = layers.Conv2D(out_channels, kernel_size = 1, strides = 1, padding = 'same', use_bias = False)
            return self.conv_sc
        else:
            return lambda x: x

    def call(self, x):
        shortcut = self.shortcut(x)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        x = self.layer_5(x)
        x = self.layer_6(x)
        x = self.layer_7(x)
        x = self.stochastic([x, shortcut])
        return x

# 最後に、ConvNeXt Blockを用いてConvNeXtモデルを構築します。ConvNeXtの階層構造は、ステージごとにConvNeXt Blockが（3, 3, 9, 3）となります。また、各ステージ間にダウンサンプリング層を追加しています。

In [None]:
class ConvNeXt(tf.keras.Model):
    def __init__(self, input_shape, output_dim):
        super().__init__()

        #STEM
        self.ln_pre = layers.LayerNormalization(epsilon = 1e-6)
        self.stem = layers.Conv2D(96, kernel_size = 4, strides = 4, use_bias = False, input_shape = input_shape)

        #ステージ1
        self.stage_1 = [ConvNeXt_Block(96, 96, 0.1) for _ in range(3)]

        #ダウンサンプリング層
        self.ln_1 = layers.LayerNormalization(epsilon = 1e-6)
        self.ds_1 = layers.Conv2D(192, kernel_size = 2, strides = 2, use_bias = False)

        #ステージ2
        self.stage_2 = [ConvNeXt_Block(192, 192, 0.1) for _ in range(3)]

        #ダウンサンプリング層
        self.ln_2 = layers.LayerNormalization(epsilon = 1e-6)
        self.ds_2 = layers.Conv2D(384, kernel_size = 2, strides = 2, use_bias = False)
        
        #ステージ3
        self.stage_3 = [ConvNeXt_Block(384, 384, 0.2) for _ in range(9)]

        #ダウンサンプリング層
        self.ln_3 = layers.LayerNormalization(epsilon = 1e-6)
        self.ds_3 = layers.Conv2D(768, kernel_size = 2, strides = 2, use_bias = False)

        #ステージ4
        self.stage_4 = [ConvNeXt_Block(768, 768, 0.3) for _ in range(3)]

        self.pooling = layers.GlobalAveragePooling2D()
        self.ln_4 = layers.LayerNormalization(epsilon = 1e-6)
        self.activation = layers.Dense(output_dim, activation = 'softmax')


    def call(self, x):
        x = self.stem(self.ln_pre(x))
        for layer in self.stage_1:
            x = layer(x)
        x = self.ds_1(self.ln_1(x))
        for layer in self.stage_2:
            x = layer(x)
        x = self.ds_2(self.ln_2(x))
        for layer in self.stage_3:
            x = layer(x)
        x = self.ds_3(self.ln_3(x))
        for layer in self.stage_4:
            x = layer(x)
        x = self.activation(self.ln_4(self.pooling(x)))
        
        return x


In [None]:
model = ConvNeXt((32, 32, 3), 10)
model.build(input_shape = (None, 32, 32, 3))
model.compile(
    optimizer = AdamW(learning_rate = 0.001, weight_decay = 0.004),
    loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.1),
    metrics = ['accuracy']
)


In [None]:
model.fit(
    train_data,
    epochs = 200
)


In [None]:
result = model.evaluate(test_data)
print('test accuracy:{}%'.format(round(result[1] * 100,2)))
