In [41]:
import pandas as pd
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import RMSprop
from sklearn.model_selection import train_test_split

# Loading data
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

test = test.values.reshape(-1, 28, 28, 1)

In [42]:
# Preprocessing
X = train.drop(labels=['label'], axis=1)
y = train['label']

# One hot encode labels
Y = to_categorical(y, num_classes=10)

# Normalize pixel data
X = X/255.0

# Reshape image in 3 dimensions (height= 28px, width= 28px, canal= 1)
X = X.values.reshape(-1, 28, 28, 1)


# Split the data in training and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [43]:
# Define the model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(10, activation='softmax')
])

# Train the model
model.compile(optimizer=RMSprop(), loss='categorical_crossentropy', metrics=['accuracy'])

# Fitting the model
#history = model.fit(X, Y, batch_size=100, epochs=1)
history = model.fit(X_train, Y_train, batch_size=100, epochs=1, validation_data=(X_test, Y_test))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m336/336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.8332 - loss: 0.5065 - val_accuracy: 0.9764 - val_loss: 0.0711


In [44]:
import numpy as np

# Predict results
results = model.predict(test)

# One hot decoding: Select the index with the maximum probability
results = np.argmax(results, axis = 1)

# Add columns ImageID and Label
results = pd.Series(results, name="Label")
submission = pd.concat([pd.Series(range(1, len(results)+1), name = "ImageId"), results], axis = 1)

# Generate CSV file
submission.to_csv('data/submission.csv', index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step
