In [12]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [13]:
train_df = pd.read_csv("train.csv", on_bad_lines='skip')
test_df = pd.read_csv("test.csv", on_bad_lines='skip')
submission_df = pd.read_csv("sample_submission.csv")

In [14]:
# Separate features (X) and labels (y)
X_train_full = train_df.drop(train_df.columns[0], axis=1)
y_train_full = train_df[train_df.columns[0]]
X_test = test_df.values

In [15]:
#  2. Preprocess Data

# Convert labels to integer
y_train_full_numeric = pd.to_numeric(y_train_full, errors='coerce')

valid_indices = y_train_full_numeric.dropna().index

X_train_full = X_train_full.loc[valid_indices].values
y_train_full = y_train_full_numeric.dropna().values


X_test = test_df.apply(pd.to_numeric, errors='coerce').dropna().values


X_train_full = X_train_full.astype(float) / 255.0
X_test = X_test.astype(float) / 255.0


# Reshape the data for CNN input: (samples, height, width, channels)
# The MNIST digits are 28x28 grayscale images, so channel is 1.
IMG_WIDTH = 28
IMG_HEIGHT = 28
CHANNELS = 1
NUM_CLASSES = 10

X_train_full = X_train_full.reshape(-1, IMG_WIDTH, IMG_HEIGHT, CHANNELS)
X_test = X_test.reshape(-1, IMG_WIDTH, IMG_HEIGHT, CHANNELS)

# One-hot encode the labels
y_train_full = to_categorical(y_train_full, num_classes=NUM_CLASSES)

X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.1, random_state=42
)

In [16]:

print("Defining CNN model...")
model = Sequential([
    # First Convolutional Block
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS)),
    Conv2D(32, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    # Second Convolutional Block
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    # Fully Connected Layers
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax') # Output layer for 10 classes (0-9)
])

Defining CNN model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:

print("Compiling and training model...")

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(
    X_train, y_train,
    batch_size=128,
    epochs=15,
    validation_data=(X_val, y_val),
    verbose=2
)

Compiling and training model...
Epoch 1/15
296/296 - 38s - 128ms/step - accuracy: 0.8878 - loss: 0.3436 - val_accuracy: 0.9776 - val_loss: 0.0747
Epoch 2/15
296/296 - 41s - 140ms/step - accuracy: 0.9716 - loss: 0.0917 - val_accuracy: 0.9848 - val_loss: 0.0462
Epoch 3/15
296/296 - 31s - 103ms/step - accuracy: 0.9795 - loss: 0.0669 - val_accuracy: 0.9883 - val_loss: 0.0364
Epoch 4/15
296/296 - 30s - 101ms/step - accuracy: 0.9831 - loss: 0.0546 - val_accuracy: 0.9902 - val_loss: 0.0310
Epoch 5/15
296/296 - 41s - 138ms/step - accuracy: 0.9851 - loss: 0.0460 - val_accuracy: 0.9917 - val_loss: 0.0289
Epoch 6/15
296/296 - 30s - 103ms/step - accuracy: 0.9869 - loss: 0.0408 - val_accuracy: 0.9912 - val_loss: 0.0301
Epoch 7/15
296/296 - 30s - 101ms/step - accuracy: 0.9888 - loss: 0.0342 - val_accuracy: 0.9910 - val_loss: 0.0281
Epoch 8/15
296/296 - 30s - 100ms/step - accuracy: 0.9897 - loss: 0.0325 - val_accuracy: 0.9890 - val_loss: 0.0289
Epoch 9/15
296/296 - 41s - 140ms/step - accuracy: 0.9906

In [18]:
print("Making predictions on test data...")

predictions = model.predict(X_test)

predicted_labels = np.argmax(predictions, axis=1)

Making predictions on test data...
[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step


In [19]:


print("Generating submission file...")
# The ImageId column should start at 1
image_ids = np.arange(1, len(predicted_labels) + 1)

submission = pd.DataFrame({
    'ImageId': image_ids,
    'Label': predicted_labels
})

submission.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' successfully created!")

Generating submission file...
Submission file 'submission.csv' successfully created!


In [23]:
import tensorflow as tf
# Save the model as a pickle file
joblib.dump(model, 'cnn_digit_model.keras')


['cnn_digit_model.keras']