ルール違反ではないので手動でtest_master.tsvを作るのはあり

In [2]:

import os
import pandas as pd
from PIL import Image
import zipfile
from io import BytesIO
import numpy as np
from google.colab import drive
from sklearn.preprocessing import LabelEncoder


# Google Driveをマウント
drive.mount('/content/drive')

# Google Drive内のファイルリストを確認
file_list = os.listdir('/content/drive/My Drive/Colab Notebooks/【練習問題】モノクロ顔画像の感情分類')
print(file_list)

# train_master.tsv の読み込み
train_master_path = '/content/drive/My Drive/Colab Notebooks/【練習問題】モノクロ顔画像の感情分類/train_master.tsv'
train_master = pd.read_csv(train_master_path, sep='\t')



# ラベルエンコーダーのインスタンスを作成
le = LabelEncoder()

# 各列に対してラベルエンコーディングを実行
train_master['userid'] = le.fit_transform(train_master['userid'])
train_master['pose'] = le.fit_transform(train_master['pose'])
train_master['eyes'] = le.fit_transform(train_master['eyes'])

# 置換マッピングを定義
replace_dict = {'angry': 0, 'sad': 1, 'neutral': 2, 'happy': 3}

# 置換処理
train_master['expression'] = train_master['expression'].replace(replace_dict)

print(train_master.head())


Mounted at /content/drive
['train.zip', 'test.zip', 'train_master.tsv', '.ipynb_checkpoints', '【練習問題】モノクロ顔画像の感情分類']
               id  userid  pose  expression  eyes
0  train_0000.jpg       6     2           0     0
1  train_0001.jpg      11     2           0     0
2  train_0002.jpg      15     1           1     1
3  train_0003.jpg      10     0           2     1
4  train_0004.jpg       0     0           3     1


In [11]:
# ベーシックなモデル
import zipfile
from PIL import Image
from io import BytesIO
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

# データの読み込み
def load_data(label_col):
    with zipfile.ZipFile(train_zip_path, 'r') as z:
        image_files = z.namelist()
        images = []
        labels = []

        for index, row in train_master.iterrows():
            file_name = 'train/' + row['id']
            if file_name in image_files:
                with z.open(file_name) as img_file:
                    img = Image.open(BytesIO(img_file.read()))
                    img = img.resize((128, 120))
                    img = np.array(img)
                    images.append(img)
                    labels.append(row[label_col])

    X = np.array(images)
    y = np.array(labels)
    X = X / 255.0
    return X, y

# モデルの構築と訓練
def build_and_train_model(X, y, num_classes, epochs):
    y_categorical = to_categorical(y, num_classes=num_classes)

    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 120, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(X, y_categorical, epochs=epochs, batch_size=32, validation_split=0.2)
    return model

# 訓練データの読み込み
train_zip_path = '/content/drive/My Drive/Colab Notebooks/【練習問題】モノクロ顔画像の感情分類/train.zip'


# userid モデルのデータの読み込みとモデルの訓練
print('userid_training: (1/4)')
X_userid, y_userid = load_data('userid')
num_classes_userid = len(np.unique(y_userid))
model_userid = build_and_train_model(X_userid, y_userid, num_classes_userid, 7)

# pose モデルのデータの読み込みとモデルの訓練
print('pose_training: (2/4)')
X_pose, y_pose = load_data('pose')
num_classes_pose = len(np.unique(y_pose))
model_pose = build_and_train_model(X_pose, y_pose, num_classes_pose, 10)

# eyes モデルのデータの読み込みとモデルの訓練
print('eyes_training: (3/4)')
X_eyes, y_eyes = load_data('eyes')
num_classes_eyes = len(np.unique(y_eyes))
model_eyes = build_and_train_model(X_eyes, y_eyes, num_classes_eyes, 11)

# expression モデルのデータの読み込みとモデルの訓練
print('expression_training: (4/4)')
X_expression, y_expression = load_data('expression')
num_classes_expression = len(np.unique(y_expression))
model_expression = build_and_train_model(X_expression, y_expression, num_classes_expression, 63)



from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

# ベースモデルの予測を取得
def get_base_model_predictions(model, X):
    return model.predict(X)

# ベースモデルの予測を統合
X_userid_preds = get_base_model_predictions(model_userid, X_userid)
X_pose_preds = get_base_model_predictions(model_pose, X_pose)
X_eyes_preds = get_base_model_predictions(model_eyes, X_eyes)
X_expression_preds = get_base_model_predictions(model_expression, X_expression)

# ベースモデルの予測を統合
X_meta = np.hstack([X_userid_preds, X_pose_preds, X_eyes_preds, X_expression_preds])

# train_masterからexpression列を取り出す
expression = train_master['expression'].values

# メタモデルの訓練
print('meta_training:')
X_train_meta, X_test_meta, y_train_meta, y_test_meta = train_test_split(X_meta, expression, test_size=0.2, random_state=42)
meta_model = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
)
meta_model.fit(X_train_meta, y_train_meta)

# メタモデルの評価
accuracy = meta_model.score(X_test_meta, y_test_meta)
print(f"Meta Model Accuracy: {accuracy}")


userid_training: (1/4)
Epoch 1/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 377ms/step - accuracy: 0.0565 - loss: 2.9985 - val_accuracy: 0.0317 - val_loss: 2.9543
Epoch 2/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 334ms/step - accuracy: 0.0589 - loss: 2.9364 - val_accuracy: 0.0476 - val_loss: 2.9196
Epoch 3/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 627ms/step - accuracy: 0.0937 - loss: 2.8776 - val_accuracy: 0.0635 - val_loss: 2.8779
Epoch 4/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 363ms/step - accuracy: 0.2940 - loss: 2.8255 - val_accuracy: 0.2698 - val_loss: 2.8317
Epoch 5/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 357ms/step - accuracy: 0.3840 - loss: 2.7636 - val_accuracy: 0.2540 - val_loss: 2.7826
Epoch 6/7
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 634ms/step - accuracy: 0.4021 - loss: 2.7161 - val_accuracy: 0.3333 - val_loss: 2.7374
Epoch 7/7
[1m8/8[0m 

KeyboardInterrupt: 

In [14]:
# fine-tuning
import zipfile
from PIL import Image
from io import BytesIO
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

# データの読み込み
def load_data(label_col):
    with zipfile.ZipFile(train_zip_path, 'r') as z:
        image_files = z.namelist()
        images = []
        labels = []

        for index, row in train_master.iterrows():
            file_name = 'train/' + row['id']
            if file_name in image_files:
                with z.open(file_name) as img_file:
                    img = Image.open(BytesIO(img_file.read()))
                    img = img.resize((128, 120))
                    img = np.array(img)
                    images.append(img)
                    labels.append(row[label_col])

    X = np.array(images)
    y = np.array(labels)
    X = X / 255.0
    return X, y

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

def build_and_train_model(X, y, num_classes, epochs):
    y_categorical = to_categorical(y, num_classes=num_classes)

    # モデルの構築
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(128, 120, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])

    # 最初のいくつかの層を固定（ファインチューニングしない）
    for layer in model.layers[:-2]:  # 最後の2層以外を固定
        layer.trainable = False

    # モデルのコンパイル (ファインチューニング用に学習率を低めに設定)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # モデルの訓練
    history = model.fit(X, y_categorical, epochs=epochs, batch_size=32, validation_split=0.2)

    return model


# 訓練データの読み込み
train_zip_path = '/content/drive/My Drive/Colab Notebooks/【練習問題】モノクロ顔画像の感情分類/train.zip'


# userid モデルのデータの読み込みとモデルの訓練
print('userid_training: (1/4)')
X_userid, y_userid = load_data('userid')
num_classes_userid = len(np.unique(y_userid))
model_userid = build_and_train_model(X_userid, y_userid, num_classes_userid, 50)

# pose モデルのデータの読み込みとモデルの訓練
print('pose_training: (2/4)')
X_pose, y_pose = load_data('pose')
num_classes_pose = len(np.unique(y_pose))
model_pose = build_and_train_model(X_pose, y_pose, num_classes_pose, 80)

# eyes モデルのデータの読み込みとモデルの訓練
print('eyes_training: (3/4)')
X_eyes, y_eyes = load_data('eyes')
num_classes_eyes = len(np.unique(y_eyes))
model_eyes = build_and_train_model(X_eyes, y_eyes, num_classes_eyes, 80)

# expression モデルのデータの読み込みとモデルの訓練
print('expression_training: (4/4)')
X_expression, y_expression = load_data('expression')
num_classes_expression = len(np.unique(y_expression))
model_expression = build_and_train_model(X_expression, y_expression, num_classes_expression, 300)



from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

# ベースモデルの予測を取得
def get_base_model_predictions(model, X):
    return model.predict(X)

# ベースモデルの予測を統合
X_userid_preds = get_base_model_predictions(model_userid, X_userid)
X_pose_preds = get_base_model_predictions(model_pose, X_pose)
X_eyes_preds = get_base_model_predictions(model_eyes, X_eyes)
X_expression_preds = get_base_model_predictions(model_expression, X_expression)

# ベースモデルの予測を統合
X_meta = np.hstack([X_userid_preds, X_pose_preds, X_eyes_preds, X_expression_preds])

# train_masterからexpression列を取り出す
expression = train_master['expression'].values

# メタモデルの訓練
print('meta_training:')
X_train_meta, X_test_meta, y_train_meta, y_test_meta = train_test_split(X_meta, expression, test_size=0.2, random_state=42)
meta_model = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
)
meta_model.fit(X_train_meta, y_train_meta)

# メタモデルの評価
accuracy = meta_model.score(X_test_meta, y_test_meta)
print(f"Meta Model Accuracy: {accuracy}")

userid_training: (1/4)
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 403ms/step - accuracy: 0.0346 - loss: 2.9919 - val_accuracy: 0.1111 - val_loss: 2.9435
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 338ms/step - accuracy: 0.2311 - loss: 2.8996 - val_accuracy: 0.1746 - val_loss: 2.8902
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 570ms/step - accuracy: 0.3096 - loss: 2.8157 - val_accuracy: 0.2063 - val_loss: 2.8375
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 442ms/step - accuracy: 0.3984 - loss: 2.7345 - val_accuracy: 0.2222 - val_loss: 2.7774
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 315ms/step - accuracy: 0.4088 - loss: 2.6711 - val_accuracy: 0.3016 - val_loss: 2.7214
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 338ms/step - accuracy: 0.5020 - loss: 2.5970 - val_accuracy: 0.3810 - val_loss: 2.6579
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [15]:
from re import X
# 提出
import zipfile
from PIL import Image
from io import BytesIO
import numpy as np
import pandas as pd

def load_test_data():
    test_zip_path = '/content/drive/My Drive/Colab Notebooks/【練習問題】モノクロ顔画像の感情分類/test.zip'
    with zipfile.ZipFile(test_zip_path, 'r') as z:
        image_files = z.namelist()
        images = []

        for file_name in image_files:
            with z.open(file_name) as img_file:
                img = Image.open(BytesIO(img_file.read()))
                img = img.resize((128, 120))
                img = np.array(img)
                images.append(img)

    X_test = np.array(images)
    X_test = X_test / 255.0
    return X_test, image_files

# テストデータの読み込み
X_test, image_files = load_test_data()

# ベースモデルの予測
X_userid_test_preds = get_base_model_predictions(model_userid, X_test)
X_pose_test_preds = get_base_model_predictions(model_pose, X_test)
X_eyes_test_preds = get_base_model_predictions(model_eyes, X_test)
X_expression_test_preds = get_base_model_predictions(model_expression, X_test)

# ベースモデルの予測を統合
X_meta_test = np.hstack([X_userid_test_preds, X_pose_test_preds, X_eyes_test_preds, X_expression_preds])

# メタモデルで最終的な予測を行う
y_meta_test_preds = meta_model.predict(X_meta_test)

# 逆マッピングを作成
reverse_replace_dict = {v: k for k, v in replace_dict.items()}

# 予測結果を Pandas Series に変換
y_meta_test_preds_series = pd.Series(y_meta_test_preds)

# 逆マッピングを適用
y_meta_test_preds_replaced = y_meta_test_preds_series.map(reverse_replace_dict)

# リスト内の各文字列に対して test/ を削除する
image_files = [filename.replace('test/', '') for filename in image_files]

# 予測結果を投稿用のフォーマットに変換
results_df = pd.DataFrame({
    'ファイル名': image_files,
    '予測した感情': y_meta_test_preds_replaced
})

# CSVファイルとして保存
output_csv_path = '/content/sample_submit.csv'
results_df.to_csv(output_csv_path, index=False, encoding='utf-8-sig')

print(f'予測結果が{output_csv_path}に保存されました。')


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 411ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 461ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 566ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 437ms/step
予測結果が/content/sample_submit.csvに保存されました。
