# [Kannada MNIST](https://www.kaggle.com/c/Kannada-MNIST/data)

In [1]:
import os
import numpy as np
import tensorflow as tf

In [2]:
IW = 28
IH = 28
TP = 10
seed = 0
np.random.seed(seed)

class MNISTLoader():
    def __init__(self):
        
        root = 'data/Kannada-MNIST'
        dig_file = os.path.join(root, 'Dig-MNIST.csv')
        sample_file = os.path.join(root, 'sample_submission.csv')
        train_file = os.path.join(root, 'train.csv')
        test_file = os.path.join(root, 'test.csv')

        for file in [dig_file, sample_file, train_file, test_file]:
            assert os.path.exists(file), 'Please download dataset and save to "data/Kannada-MNIST/" before boot'

        self.X_train, self.Y_train = self.read_csv(train_file, type='train')
        self.X_test, _ = self.read_csv(test_file, type='test')
        self.num_train_data, self.num_test_data = self.X_train.shape[0], self.X_test.shape[0]

    @staticmethod
    def read_csv(file_path, type='train'):
        X = []
        Y = []
        with open(file_path, 'r') as f:
            for i, line in enumerate(f):
                print(f'get data {i}', end='\r')
                if i == 0:
                    continue
                line = line.rstrip()
                items = line.split(',')
                if type == 'train':
                    Y.append(np.array(items[0], dtype=np.int32))
                X.append(np.array(items[1: ], dtype=np.float32).reshape((IH, IW, 1)) / 255.0)

        X = np.array(X)
        Y = np.eye(TP)[Y] if type == 'train' else None
        return X, Y

    def random_mini_batches(self, batch_size = 64):
        """ 切分训练集为 mini_batch """
        
        data_size = len(self.X_train)
        permutation = list(np.random.permutation(data_size))
        batch_permutation_indices = [permutation[i: i + batch_size] for i in range(0, data_size, batch_size)]
        for batch_permutation in batch_permutation_indices:
            yield self.X_train[batch_permutation], self.Y_train[batch_permutation]

In [3]:
class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            filters=32,             # 卷积层神经元（卷积核）数目
            kernel_size=[3, 3],     # 感受野大小
            padding='same',         # padding策略（vaild 或 same）
            activation=tf.nn.relu   # 激活函数
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.conv2 = tf.keras.layers.Conv2D(
            filters=64,
            kernel_size=[3, 3],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
        self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.conv1(inputs)                  # [batch_size, 28, 28, 32]
        x = self.pool1(x)                       # [batch_size, 14, 14, 32]
        x = self.conv2(x)                       # [batch_size, 14, 14, 64]
        x = self.pool2(x)                       # [batch_size, 7, 7, 64]
        x = self.flatten(x)                     # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)                      # [batch_size, 1024]
        x = self.dense2(x)                      # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [5]:
num_epochs = 5
batch_size = 50
print_step = 100
learning_rate = 0.001

In [6]:
model = CNN()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [32]:
for i in range(num_epochs):
    num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
    for j, (X, Y) in enumerate(data_loader.random_mini_batches(batch_size)):
        with tf.GradientTape() as tape:
            Y_ = model(X)
            # loss = tf.reduce_mean(tf.square(Y - Y_))
            loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(
                y_true=Y,
                y_pred=Y_
            ))
            if (i * data_loader.num_train_data + j) % print_step == 0:
                print(f"{i} - {j * batch_size: 5}: loss {loss.numpy()}")
        grads = tape.gradient(loss, model.variables)
        optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))