In [3]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2024-01-07 12:26:19.441597: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-07 12:26:19.441714: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 7287517470496325965
 xla_global_id: -1]

In [2]:
import os
# os.chdir('/code/janken')
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt

from tensorflow.keras.applications import MobileNetV2

import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

# from janken_train import plot_result

SEED = 1234
# IMG_DIR = './images'
IMG_DIR = '/content/drive/MyDrive/sbq/images'
BATCH_SIZE = 128
IMAGE_SIZE = 224
L1_FACTOR = 0
L2_FACTOR = 1e-2
NUM_CLASSES = 3
N_EPOCHS = 100
LR = 1e-2

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=10, verbose=1, min_lr=int(1e-6)),
]

def check_augmantation(train_dataset):
    for images, _ in train_dataset.take(1):  # データセットから1バッチ取得
        plt.figure(figsize=(10, 125))
        for x in range(1,2):
            for i in range(10):  # 9枚の画像を表示すると仮定
                ax = plt.subplot(10, 1, i*x + 1)
                # print(images[i])
                plt.imshow(images[i].numpy().astype("float32"))  # imshow用にuint8型に変換
                plt.axis("off")
            plt.show()


def prepare_dataset():
    augment_layers = tf.keras.Sequential([
        layers.RandomBrightness(factor = 0.3),
        layers.RandomContrast(factor = 0.2),
        layers.RandomRotation(factor = 0.1, fill_mode='nearest'),
        layers.RandomTranslation(height_factor = 0.2, width_factor = 0.2, fill_mode='nearest', interpolation='nearest'),
        layers.RandomZoom(height_factor = 0.2, width_factor = 0.2, fill_mode='nearest', interpolation='nearest'),
    ])
    preprocess_layers = tf.keras.Sequential([
        layers.Resizing(IMAGE_SIZE, IMAGE_SIZE),
        layers.Rescaling(1./255),
    ])

    # @tf.function
    # def augment(image, label):
    #     image = augment_layers(image, training=True)
    #     return image, label
    def prepare(ds, shuffle=False, augment=False):
        if augment:
          ds = ds.map(lambda x, y: (augment_layers(x, training=True), y),
                      num_parallel_calls=tf.data.AUTOTUNE)

        ds = ds.map(lambda x, y: (preprocess_layers(x), y),
                    num_parallel_calls=tf.data.AUTOTUNE)

        if shuffle:
          ds = ds.shuffle(1000)

        return ds.prefetch(buffer_size=tf.data.AUTOTUNE)

    # train_dataset, valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_dataset, valid_dataset = tf.keras.utils.image_dataset_from_directory(
                                            directory=IMG_DIR,
                                            label_mode='categorical',
                                            batch_size=BATCH_SIZE,
                                            image_size=(IMAGE_SIZE, IMAGE_SIZE),
                                            seed=SEED,
                                            validation_split=0.2,
                                            subset='both'
                                        )

    # train_dataset = train_dataset.map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    train_dataset = prepare(train_dataset, shuffle=True, augment=True)
    valid_dataset = prepare(valid_dataset)

    check_augmantation(train_dataset)
    return train_dataset, valid_dataset

def define_model():
    base_model = MobileNetV2(
        include_top=False,
        alpha=1.0,
        weights='imagenet',
    )
    base_model.trainable = False

    return tf.keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.L1L2(l1=L1_FACTOR,l2=L2_FACTOR)),
        layers.Dropout(0.4),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])

# TODO: compare with https://github.com/osmr/imgclsmob/blob/master/tensorflow_/tensorflowcv/models/mobilenet.py
def main():
    train_dataset, valid_dataset = prepare_dataset()
    # TODO: prefetch

    model = define_model()

    model.compile(
        optimizer= tf.keras.optimizers.Adam(learning_rate=LR),
        loss = tf.keras.losses.CategoricalCrossentropy(),
        metrics = ['accuracy'],
    )

    history = model.fit(
        train_dataset,
        validation_data = valid_dataset,
        epochs = N_EPOCHS,
        verbose = 1,
        # class_weight=class_weight,
        callbacks = callbacks,
    )

    plot_result(history)
    logger.info(model.summary())
    model.save('/content/drive/MyDrive/sbq/test_model_l2=1e-2.h5')

if __name__ == '__main__':
    main()

Found 2485 files belonging to 3 classes.
Using 1988 files for training.
Using 497 files for validation.


2024-01-03 11:11:08.906300: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2024-01-03 11:11:08.906321: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: 428b269d765f
2024-01-03 11:11:08.906325: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: 428b269d765f
2024-01-03 11:11:08.906370: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 545.23.6
2024-01-03 11:11:08.906380: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 535.129.3
2024-01-03 11:11:08.906382: E external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:244] kernel version 535.129.3 does not match DSO version 545.23.6 -- cannot find working devices in this configuration


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
Epoch 1/100

KeyboardInterrupt: 