# Digit Recognizer (Computer Vision)

## Importing the libraries

In [94]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

In [95]:
tf.__version__

'2.14.0'

## Part 1 - Data Preprocessing

### Importing the training set

In [96]:
trainDF = pd.read_csv("../Datasets/train.csv")

In [97]:
print(trainDF.head())

   label  pixel0  pixel1  pixel2  ...  pixel780  pixel781  pixel782  pixel783
0      1       0       0       0  ...         0         0         0         0
1      0       0       0       0  ...         0         0         0         0
2      1       0       0       0  ...         0         0         0         0
3      4       0       0       0  ...         0         0         0         0
4      0       0       0       0  ...         0         0         0         0

[5 rows x 785 columns]


### Importing the testing set

In [98]:
testDF = pd.read_csv("../Datasets/test.csv")

In [99]:
print(testDF.head())

   pixel0  pixel1  pixel2  pixel3  ...  pixel780  pixel781  pixel782  pixel783
0       0       0       0       0  ...         0         0         0         0
1       0       0       0       0  ...         0         0         0         0
2       0       0       0       0  ...         0         0         0         0
3       0       0       0       0  ...         0         0         0         0
4       0       0       0       0  ...         0         0         0         0

[5 rows x 784 columns]


### Splitting the training set to features and target variables

In [100]:
x_train = trainDF.iloc[:, 1:].values
y_train = trainDF.iloc[:, 0].values

In [101]:
print(x_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [102]:
print(y_train)

[1 0 1 ... 7 6 9]


### Assigning testing set to a variable

In [103]:
x_test = testDF.values

In [104]:
print(x_test)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


### Normalize the pixel values to be between 0 and 1 for training set

In [105]:
x_train = x_train.astype("float32") / 255

### Normalize the pixel vaues to bet between 0 and 1 for testing set

In [106]:
x_test = x_test.astype("float32") / 255

### Reshaping the training and testing data to fit the keras model

In [107]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

### Converting target variable to binary class matrices

In [108]:
y_train = tf.keras.utils.to_categorical(y_train, 10)

## Part 2 - Building the CNN

### Initialize the CNN

In [109]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [110]:
cnn.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, activation = "relu"))

### Step 2 - Pooling

In [111]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2))

### Adding a 2nd Convolutional layer

In [112]:
cnn.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = 3, activation = "relu"))
cnn.add(tf.keras.layers.MaxPool2D(pool_size = 2, strides = 32))

### Step 3 - Flattening

In [113]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [114]:
cnn.add(tf.keras.layers.Dense(units = 128, activation = "relu"))

### Step 5 - Output Layer

In [118]:
cnn.add(tf.keras.layers.Dense(units = 10, activation = "sigmoid"))

## Part 3 - Training the CNN

### Compiling the CNN

In [119]:
cnn.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

### Training the CNN on the training set and evaluating it on the test set

In [120]:
cnn.fit(x = x_train, y = y_train, epochs = 25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x1e025fd6890>

## Part 4 - Predicting the test set results

In [121]:
result = cnn.predict(x_test)



In [124]:
print(result)

[[0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]
 [0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]
 [0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]
 ...
 [0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]
 [0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]
 [0.09903133 0.11246407 0.0991756  ... 0.10469843 0.09646314 0.10012168]]


In [125]:
predicted_digits = np.argmax(result, axis = 1)

In [126]:
print(predicted_digits)

[1 1 1 ... 1 1 1]


## Part 5 - Converting the result to a CSV file

In [135]:
resultDF = pd.DataFrame({"ImageId": [i + 1 for i in range(len(predicted_digits))], "Label": predicted_digits})

In [137]:
resultDF.to_csv("../Solution/result_submission.csv", index = False)