In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

# データの読み込み
train = pd.read_csv('/home/haruki/kaggle/mnist/data/train.csv')
test = pd.read_csv('/home/haruki/kaggle/mnist/data/test.csv')

# 最初の5行を表示
print(train.head())
print(test.head())

# train.csvから特徴量とラベルを分ける
X_train = train.drop('label', axis=1).values  # ピクセルデータ
y_train = train['label'].values  # ラベルデータ

# test.csvから特徴量を取り出す
X_test = test.values  # ピクセルデータ

# データの前処理
X_train = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255  # 画像サイズ28x28x1に変換し、正規化
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255

# ラベルをone-hotエンコーディング
y_train = to_categorical(y_train, 10)

# データの確認
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")


2025-04-22 02:12:31.867000: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-22 02:12:31.868638: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-22 02:12:31.874683: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-22 02:12:31.887888: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745255551.910718   10144 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745255551.92

   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0         

In [16]:
# CNNモデルの構築（前回のコードと同じ）
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# モデルのコンパイル
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# モデルの訓練
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# 予測を行う（y_testはないので、y_trainに基づいて評価はできません）
predictions = model.predict(X_test)

# 予測結果を出力（最初の10件の予測結果を表示）
print("Predictions (first 10):")
print(np.argmax(predictions[:10], axis=1))  # 各画像に対する予測ラベルを表示

# 必要なら、予測結果を送信用に保存する
# output = pd.DataFrame({'ImageId': np.arange(1, len(X_test) + 1), 'Label': np.argmax(predictions, axis=1)})
# output.to_csv('submission.csv', index=False)


Epoch 1/5
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 17ms/step - accuracy: 0.8100 - loss: 0.5840 - val_accuracy: 0.9590 - val_loss: 0.1308
Epoch 2/5
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.9755 - loss: 0.0764 - val_accuracy: 0.9836 - val_loss: 0.0506
Epoch 3/5
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.9851 - loss: 0.0456 - val_accuracy: 0.9837 - val_loss: 0.0532
Epoch 4/5
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - accuracy: 0.9885 - loss: 0.0362 - val_accuracy: 0.9892 - val_loss: 0.0388
Epoch 5/5
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.9917 - loss: 0.0276 - val_accuracy: 0.9879 - val_loss: 0.0414
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step
Predictions (first 10):
[2 0 9 9 3 7 0 3 0 3]


In [17]:
import pandas as pd
import numpy as np

# 予測を行う（X_testに対して）
predictions = model.predict(X_test)

# 予測結果をラベルに変換
predicted_labels = np.argmax(predictions, axis=1)

# ImageIdとLabelを含むデータフレームを作成
output = pd.DataFrame({'ImageId': np.arange(1, len(X_test) + 1), 'Label': predicted_labels})

# 提出用ファイルとして保存
output.to_csv('/home/haruki/kaggle/mnist/submission.csv', index=False)

print("提出用ファイルが作成されました！")


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step
提出用ファイルが作成されました！
