In [16]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib import pyplot as plt

Matplotlib is building the font cache; this may take a moment.


In [17]:
# 対応表のパスを設定
csv_path = '/Users/sota/Documents/Nishika/日本絵画 顔分類コンペ コード/data/train.csv'
data_dir = '/Users/sota/Documents/Nishika/日本絵画 顔分類コンペ コード/data/train'

# 対応表を読み込み
df = pd.read_csv(csv_path)

df_filtered = df[df['gender_status'] != 5]

In [20]:
# データとラベルを格納するリスト
x_data = []
y_data = []

# データの読み込み
for index, row in df_filtered.iterrows():
    img_path = os.path.join(data_dir, row['image'])  # 'image' 列を使用
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img = cv2.resize(img, (100, 100))
        x_data.append(img)
        y_data.append(row['gender_status'])  # 'gender_status' 列を使用
    else:
        print(f"画像が見つからないか、読み込めませんでした: {img_path}")

# NumPy配列に変換
x_data = np.array(x_data).reshape(-1, 100, 100, 1)
y_data = np.array(y_data)

# クラス重みを計算
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_data), y=y_data)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

# データをトレーニングセットと検証セットに分割
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.3, random_state=42, stratify=y_data)

# データのシャッフル
x_train, y_train = shuffle(x_train, y_train, random_state=42)
x_val, y_val = shuffle(x_val, y_val, random_state=42)

# ラベルをカテゴリカルデータに変換
y_train_categorical = to_categorical(y_train, num_classes=8)
y_val_categorical = to_categorical(y_val, num_classes=8)

In [23]:
# モデルの定義
inputs = Input(shape=(100, 100, 1))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.5)(x)

x = Conv2D(64, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.5)(x)

x = Conv2D(128, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.5)(x)

x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Flatten()(x) 
outputs = Dense(8, activation='softmax')(x)

history_model = Model(inputs=inputs, outputs=outputs)

# モデルのコンパイル
history_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# 早期終了のコールバックを設定
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [24]:
# モデルの訓練
history = history_model.fit(x_train, y_train_categorical
                                     , epochs=50, batch_size=64
                                     , validation_data=(x_val, y_val_categorical)
                                     , class_weight=class_weights_dict, callbacks=[early_stopping]
                                     , shuffle=True
                                     )

Epoch 1/50


2025-05-30 15:26:55.620729: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 69ms/step - accuracy: 0.2209 - loss: 2.2186 - val_accuracy: 0.2406 - val_loss: 1.9210
Epoch 2/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - accuracy: 0.3665 - loss: 1.6413 - val_accuracy: 0.2052 - val_loss: 2.1722
Epoch 3/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.3767 - loss: 1.6382 - val_accuracy: 0.3601 - val_loss: 1.6273
Epoch 4/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.4090 - loss: 1.5352 - val_accuracy: 0.1706 - val_loss: 2.1188
Epoch 5/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.4441 - loss: 1.2956 - val_accuracy: 0.2862 - val_loss: 2.0157
Epoch 6/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 36ms/step - accuracy: 0.4596 - loss: 1.3308 - val_accuracy: 0.4057 - val_loss: 1.6911
Epoch 7/50
[1m47/47[0m [32m━━━━━━━━━━━━━━━

In [None]:
# テストデータのディレクトリパスを設定
test_data_dir = '/Users/sota/Documents/Nishika/日本絵画 顔分類コンペ コード/data/test'

# テストデータの画像を読み込み、前処理
x_test = []
image_names = []

for img_name in os.listdir(test_data_dir):
    img_path = os.path.join(test_data_dir, img_name)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img = cv2.resize(img, (100, 100))
        x_test.append(img)
        image_names.append(img_name)
    else:
        print(f"画像が見つからないか、読み込めませんでした: {img_path}")

# NumPy配列に変換
x_test = np.array(x_test).reshape(-1, 100, 100, 1)

# 予測を行う
predictions = history_model.predict(x_test)
predicted_classes = np.argmax(predictions, axis=1)

# 結果をDataFrameにまとめる
results_df = pd.DataFrame({'image': image_names, 'gender_status': predicted_classes})

# 結果をCSVファイルに保存
results_csv_path = '/Users/sota/Documents/Nishika/日本絵画 顔分類コンペ コード/data//predictions.csv'
results_df.to_csv(results_csv_path, index=False)

print(f"予測結果が {results_csv_path} に保存されました。")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
予測結果が /Users/sota/Documents/Nishika/日本絵画 顔分類コンペ コード/data//predictions.csv に保存されました。
