In [6]:
#import ...

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Input 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split


In [7]:
# Daten laden
train_data = pd.read_csv("data/train.csv")
test_data = pd.read_csv("data/test.csv")
sample_submission = pd.read_csv("data/sample_submission.csv")
train_data

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# Trainingsdaten aufteilen
X = train_data.iloc[:, 1:].values  # Pixelwerte
y = train_data.iloc[:, 0].values   # Labels

# Normalisierung der Pixelwerte
X = X / 255.0
X_test = test_data.values / 255.0

In [9]:
# Reshape für CNN (28x28 Bilder mit 1 Kanal)
X = X.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# Labels in One-Hot-Encoding umwandeln
y = to_categorical(y, num_classes=10)

In [10]:
# Daten aufteilen in Training und Validierung
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN-Modell erstellen
model = Sequential([
    Input(shape=(28, 28, 1)),  
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

In [11]:
# Modell kompilieren
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])


In [14]:
# Modell trainieren
history = model.fit(X_train, y_train, 
                    validation_data=(X_val, y_val), 
                    epochs=10, 
                    batch_size=64)



Epoch 1/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9846 - loss: 0.0457 - val_accuracy: 0.9901 - val_loss: 0.0286
Epoch 2/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9878 - loss: 0.0417 - val_accuracy: 0.9901 - val_loss: 0.0312
Epoch 3/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9865 - loss: 0.0430 - val_accuracy: 0.9918 - val_loss: 0.0287
Epoch 4/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9873 - loss: 0.0379 - val_accuracy: 0.9926 - val_loss: 0.0247
Epoch 5/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9892 - loss: 0.0346 - val_accuracy: 0.9921 - val_loss: 0.0256
Epoch 6/10
[1m525/525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9893 - loss: 0.0374 - val_accuracy: 0.9918 - val_loss: 0.0253
Epoch 7/10
[1m525/525[0m 

In [15]:
# Vorhersagen für den Testdatensatz
predictions = np.argmax(model.predict(X_test), axis=1)


[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [None]:
# Submission-Datei erstellen
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(predictions) + 1),
    "Label": predictions
})
submission.to_csv("submission.csv", index=False)