In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation,Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

# Loading data to data-frame

In [2]:
train_folder = '/kaggle/input/digit-recognizer/train.csv'
test_folder = '/kaggle/input/digit-recognizer/test.csv'

train = pd.read_csv(train_folder)
test = pd.read_csv(test_folder)

# Dividing label

In [3]:
label = train.label
train.drop('label', axis=1, inplace=True, errors='ignore')

# Converting pandas to numpy

In [4]:
X = train.to_numpy()
y = label.to_numpy()

# Shuffling data

In [5]:
X, y = shuffle(X, y, random_state=0)

# Model creation

In [6]:
model = Sequential(
    [
        Dense(units=392, input_shape=(784,), activation='relu'),
        Dense(units=10, activation='softmax')
    ]
)


User settings:

   KMP_AFFINITY=granularity=fine,verbose,compact,1,0
   KMP_BLOCKTIME=0
   KMP_DUPLICATE_LIB_OK=True
   KMP_INIT_AT_FORK=FALSE
   KMP_SETTINGS=1

Effective settings:

   KMP_ABORT_DELAY=0
   KMP_ADAPTIVE_LOCK_PROPS='1,1024'
   KMP_ALIGN_ALLOC=64
   KMP_ALL_THREADPRIVATE=128
   KMP_ATOMIC_MODE=2
   KMP_BLOCKTIME=0
   KMP_CPUINFO_FILE: value is not defined
   KMP_DETERMINISTIC_REDUCTION=false
   KMP_DEVICE_THREAD_LIMIT=2147483647
   KMP_DISP_NUM_BUFFERS=7
   KMP_DUPLICATE_LIB_OK=true
   KMP_ENABLE_TASK_THROTTLING=true
   KMP_FORCE_REDUCTION: value is not defined
   KMP_FOREIGN_THREADS_THREADPRIVATE=true
   KMP_FORKJOIN_BARRIER='2,2'
   KMP_FORKJOIN_BARRIER_PATTERN='hyper,hyper'
   KMP_GTID_MODE=3
   KMP_HANDLE_SIGNALS=false
   KMP_HOT_TEAMS_MAX_LEVEL=1
   KMP_HOT_TEAMS_MODE=0
   KMP_INIT_AT_FORK=true
   KMP_LIBRARY=throughput
   KMP_LOCK_KIND=queuing
   KMP_MALLOC_POOL_INCR=1M
   KMP_NUM_LOCKS_IN_BLOCK=1
   KMP_PLAIN_BARRIER='2,2'
   KMP_PLAIN_BARRIER_PATTERN='hyper,hype

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 392)               307720    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                3930      
Total params: 311,650
Trainable params: 311,650
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [9]:
model.fit(x=X, y=y, batch_size=50, validation_split=0.1, epochs=10, shuffle=True, verbose=2)

2022-01-18 18:10:21.883487: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/10
756/756 - 3s - loss: 10.0667 - accuracy: 0.8088 - val_loss: 4.0237 - val_accuracy: 0.8855
Epoch 2/10
756/756 - 3s - loss: 2.5983 - accuracy: 0.9175 - val_loss: 2.8226 - val_accuracy: 0.9140
Epoch 3/10
756/756 - 3s - loss: 1.5137 - accuracy: 0.9416 - val_loss: 2.2608 - val_accuracy: 0.9243
Epoch 4/10
756/756 - 3s - loss: 0.9444 - accuracy: 0.9561 - val_loss: 1.9556 - val_accuracy: 0.9321
Epoch 5/10
756/756 - 2s - loss: 0.6253 - accuracy: 0.9660 - val_loss: 1.8325 - val_accuracy: 0.9381
Epoch 6/10
756/756 - 3s - loss: 0.4193 - accuracy: 0.9732 - val_loss: 1.7570 - val_accuracy: 0.9388
Epoch 7/10
756/756 - 3s - loss: 0.3558 - accuracy: 0.9771 - val_loss: 1.6215 - val_accuracy: 0.9426
Epoch 8/10
756/756 - 3s - loss: 0.2715 - accuracy: 0.9802 - val_loss: 1.5813 - val_accuracy: 0.9476
Epoch 9/10
756/756 - 2s - loss: 0.2152 - accuracy: 0.9838 - val_loss: 1.5953 - val_accuracy: 0.9469
Epoch 10/10
756/756 - 2s - loss: 0.1822 - accuracy: 0.9869 - val_loss: 1.6796 - val_accuracy: 0.943

<keras.callbacks.History at 0x7fd3061de150>

# Predicting testing data

In [10]:
test = test.to_numpy()
test_prediction = model.predict(x=test, batch_size=100, verbose=0)

# Argmax 

In [11]:
test_prediction = np.argmax(test_prediction, axis=-1)

# Generating labels for submission file

In [12]:
test_label = np.arange(start=1, stop=len(test_prediction)+1, step=1)

# Generating csv file

In [13]:
output = pd.DataFrame({'ImageId': test_label, 'Label': test_prediction})
output.to_csv('submission.csv', index=False)