In [2]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from functools import partial
import math
import matplotlib.pyplot as plt

In [3]:
def scale(image, label):
    return (tf.cast(image, tf.float32) / 255.0, label)

def scale_whitout_label(image):
    return tf.cast(image, tf.float32) / 255.0

def lrn_layer(x):
    return tf.nn.local_response_normalization(x)

# Load data

In [4]:
digit = pd.read_csv('/kaggle/input/my-mnist/digit.csv').to_numpy()
test = pd.read_csv('/kaggle/input/my-mnist/test.csv').to_numpy()

labels = digit[:, -1]
digit = np.delete(digit, -1, axis=1)

train_X, test_X, train_Y, test_Y = train_test_split(digit, labels, test_size=0.2)

# Preparing data

In [5]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_Y))

train_dataset = train_dataset.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train_dataset.cache()

train_dataset = train_dataset.shuffle(len(train_dataset))
train_dataset = train_dataset.batch(64)

train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)

In [6]:
valid_dtatset = tf.data.Dataset.from_tensor_slices((test_X, test_Y))

valid_dtatset = valid_dtatset.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_dtatset = valid_dtatset.batch(64)
valid_dtatset = valid_dtatset.cache()

valid_dtatset = valid_dtatset.prefetch(tf.data.experimental.AUTOTUNE)

test_scaled = test / 255.0

# Create basic neural network

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(600),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(300),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(100),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(10, activation="softmax")
])

In [8]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=2e-2), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_dataset, epochs=100,
                    validation_data=valid_dtatset,
                    callbacks=[early_stopping_cb])

In [None]:
model.evaluate(valid_dtatset)

Standart neural networks,with normalization. Good accuracy on MNIST, but not effective for more serious computer vision challenges. Accuracy on kaggle: 0.99532

# Preparing data for CNN

In [23]:
train_dataset_2d = tf.data.Dataset.from_tensor_slices((train_X.reshape(-1, 28, 28, 1), train_Y))


train_dataset_2d = train_dataset_2d.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset_2d = train_dataset_2d.cache()

train_dataset_2d = train_dataset_2d.shuffle(len(train_dataset_2d))
train_dataset_2d = train_dataset_2d.batch(64)

train_dataset_2d = train_dataset_2d.prefetch(tf.data.experimental.AUTOTUNE)

# test data
valid_dtatset_2d = tf.data.Dataset.from_tensor_slices((test_X.reshape(-1, 28, 28, 1), test_Y))

valid_dtatset_2d = valid_dtatset_2d.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE)
valid_dtatset_2d = valid_dtatset_2d.batch(64)
valid_dtatset_2d = valid_dtatset_2d.cache()

valid_dtatset_2d = valid_dtatset_2d.prefetch(tf.data.experimental.AUTOTUNE)

test_2d = scale_whitout_label(test.reshape(-1, 28, 28, 1))


# Data augmentation

In [24]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.Input([28, 28, 1]),
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05)
])

# Use LeNet5 CNN architecture

In [None]:
model_LeNet5 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=6,kernel_size=5, activation='tanh', padding='same',strides=1),
    tf.keras.layers.AvgPool2D(pool_size=(2,2), strides=2),
    tf.keras.layers.Conv2D(filters=16,kernel_size=5, activation='tanh', padding='valid',strides=1),
    tf.keras.layers.AvgPool2D(pool_size=(2,2), strides=2),
    tf.keras.layers.Conv2D(filters=120,kernel_size=5, activation='tanh', padding='valid',strides=1),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(84, activation='tanh'),
    tf.keras.layers.Dense(10, activation='softmax'),
])

model_LeNet5.compile(optimizer=tf.keras.optimizers.Nadam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model_LeNet5.fit(train_dataset_2d, epochs=100,
                    validation_data=valid_dtatset_2d,
                    callbacks=[early_stopping_cb])

In [None]:
model_LeNet5.evaluate(valid_dtatset_2d)

Clasic CNN architecture, it was created by Yann LeCun.It shows good results, although it is a bit outdated today. Accuracy on kaggle: 0.99342

# CNN my implementation

In [1]:
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same",
                        activation="relu", kernel_initializer="he_normal")
model_cnn = tf.keras.Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[28, 28, 1]), 
    tf.keras.layers.MaxPool2D(),
    
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    tf.keras.layers.MaxPool2D(),
    
    DefaultConv2D(filters=256),
    DefaultConv2D(filters=256),
    # tf.keras.layers.MaxPool2D(),
    tf.keras.layers.GlobalAvgPool2D(),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=256, kernel_initializer="he_normal"),# 128 - 256
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=128,kernel_initializer="he_normal"), # 64 - 128
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=10, activation="softmax")
])

model_cnn.compile(optimizer=tf.keras.optimizers.Nadam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model_cnn.fit(train_dataset_2d, epochs=100,
                    validation_data=valid_dtatset_2d,
                    callbacks=[early_stopping_cb])

NameError: name 'partial' is not defined

My implemetatin of CNN. Accuracy on kaggle: 0.99875 (The Best).

In [22]:
model_cnn.evaluate(valid_dtatset_2d)

[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9693 - loss: 0.1047


[0.101453498005867, 0.9700000286102295]

# Use AlexNet

In [None]:
model_alexNet = tf.keras.Sequential([
    tf.keras.layers.Conv2D(96, kernel_size=7, activation='relu', padding='valid', strides=1),
    tf.keras.layers.MaxPool2D((3,3), strides=2, padding='valid'),
    tf.keras.layers.Lambda(lrn_layer),  # Додавання LRN після другої згортки
    
    tf.keras.layers.Conv2D(256, (3,3), padding="same", activation="relu", strides=1),
    tf.keras.layers.MaxPool2D((3,3), strides=2, padding='valid'),
    tf.keras.layers.Lambda(lrn_layer),  # Додавання LRN після другої згортки
    
    tf.keras.layers.Conv2D(384, (3,3), padding="same", activation="relu", strides=1),
    tf.keras.layers.Conv2D(384, (3,3), padding="same", activation="relu", strides=1),
    tf.keras.layers.Conv2D(256, (3,3), padding="same", activation="relu", strides=1),
    tf.keras.layers.MaxPool2D((3,3), strides=2, padding='valid'),
    tf.keras.layers.GlobalAvgPool2D(),
    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dense(units=4096, kernel_initializer="he_normal"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=4096, kernel_initializer="he_normal"), 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=10, activation='softmax')
])

model_alexNet.compile(optimizer=tf.keras.optimizers.Nadam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model_alexNet.fit(train_dataset_2d, epochs=100,
                    validation_data=valid_dtatset_2d,
                    callbacks=[early_stopping_cb])

 Accuracy on kaggle: 0.99771

In [None]:
model_alexNet.evaluate(valid_dtatset_2d)

# Save model

In [13]:
preds = model_cnn.predict(test_2d)
ans = np.argmax(preds, axis=1)
df = pd.DataFrame({
    'ImageId': np.arange(1, len(ans) + 1),
    'Label': ans
})
df.to_csv("ans.csv", index=False)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
