In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

# データの読み込み
train = pd.read_csv('/home/haruki/kaggle/mnist/data/train.csv')
test = pd.read_csv('/home/haruki/kaggle/mnist/data/test.csv')

# 特徴量とラベルに分割
X = train.drop('label', axis=1).values
y = train['label'].values

# データの前処理
X = X.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test = test.values.reshape(-1, 28, 28, 1).astype('float32') / 255.0
y = to_categorical(y, 10)

# 学習・検証データの分割
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# データ拡張
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)

# モデルの構築
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(28, 28, 1)),  # input_shapeをInputレイヤーに
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# コンパイル
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# コールバック
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

# 学習
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=64),
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=[early_stop, lr_schedule]
)

# 予測
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)


Epoch 1/50


  self._warn_if_super_not_called()


[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 41ms/step - accuracy: 0.7000 - loss: 0.9043 - val_accuracy: 0.9725 - val_loss: 0.0875 - learning_rate: 0.0010
Epoch 2/50
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 42ms/step - accuracy: 0.9533 - loss: 0.1547 - val_accuracy: 0.9773 - val_loss: 0.0671 - learning_rate: 0.0010
Epoch 3/50
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 47ms/step - accuracy: 0.9657 - loss: 0.1075 - val_accuracy: 0.9825 - val_loss: 0.0588 - learning_rate: 0.0010
Epoch 4/50
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 41ms/step - accuracy: 0.9718 - loss: 0.0884 - val_accuracy: 0.9848 - val_loss: 0.0452 - learning_rate: 0.0010
Epoch 5/50
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 42ms/step - accuracy: 0.9773 - loss: 0.0720 - val_accuracy: 0.9906 - val_loss: 0.0326 - learning_rate: 0.0010
Epoch 6/50
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2

In [7]:
# 結果をCSVに保存
submission = pd.DataFrame({
    'ImageId': np.arange(1, len(predicted_labels) + 1),
    'Label': predicted_labels
})
submission.to_csv('/home/haruki/kaggle/mnist/submission.csv', index=False)
print("✅ submission.csv が出力されました！")

✅ submission.csv が出力されました！
