# Digit Recognizer (Computer Vision)

## Importing the libraries

In [56]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

In [57]:
tf.__version__

'2.14.0'

## Part 1 - Data Preprocessing

### Importing the training set

In [58]:
trainDF = pd.read_csv("../Datasets/train.csv")

In [59]:
print(trainDF.head())

   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0         

### Importing the testing set

In [60]:
testDF = pd.read_csv("../Datasets/test.csv")

In [61]:
print(testDF.head())

   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0       0       0       0       0       0       0       0       0       0   
1       0       0       0       0       0       0       0       0       0   
2       0       0       0       0       0       0       0       0       0   
3       0       0       0       0       0       0       0       0       0   
4       0       0       0       0       0       0       0       0       0   

   pixel9  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0   

### Splitting the training set to features and target variables

In [62]:
x_train = trainDF.iloc[:, 1:].values
y_train = trainDF.iloc[:, 0].values

In [63]:
print(x_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [64]:
print(y_train)

[1 0 1 ... 7 6 9]


### Assigning testing set to a variable

In [65]:
x_test = testDF.values

In [66]:
print(x_test)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


### Normalize the pixel values to be between 0 and 1 for training set

In [67]:
x_train = x_train.astype("float32") / 255

### Normalize the pixel vaues to bet between 0 and 1 for testing set

In [68]:
x_test = x_test.astype("float32") / 255

### Reshaping the training and testing data to fit the keras model

In [69]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

### Converting target variable to binary class matrices

In [70]:
y_train = tf.keras.utils.to_categorical(y_train, 10)

## Part 2 - Building the CNN

### Initialize the CNN

In [71]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [72]:
cnn.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = "relu", input_shape = (28, 28, 1)))

### Step 2 - Pooling

In [73]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))

### Adding a 2nd Convolutional layer

In [74]:
cnn.add(tf.keras.layers.Conv2D(filters = 128, kernel_size = (3, 3), activation = "relu"))
cnn.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))

### Step 3 - Flattening

In [75]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [76]:
cnn.add(tf.keras.layers.Dense(units = 256, activation = "relu"))

### Step 5 - Output Layer

In [77]:
cnn.add(tf.keras.layers.Dense(units = 10, activation = "softmax"))

## Part 3 - Training the CNN

### Compiling the CNN

In [78]:
cnn.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

### Training the CNN on the training set and evaluating it on the test set

In [79]:
cnn.fit(x = x_train, y = y_train, epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x28d00569050>

## Part 4 - Predicting the test set results

In [80]:
result = cnn.predict(x_test)



In [81]:
print(result)

[[2.3854924e-34 8.5900470e-30 1.0000000e+00 ... 7.1921627e-30
  6.3157531e-32 1.3610806e-34]
 [1.0000000e+00 2.6142489e-35 5.7857754e-21 ... 2.8072445e-23
  5.2546252e-27 3.6202084e-26]
 [7.1517573e-29 6.3058546e-26 3.4725335e-15 ... 7.1221931e-21
  5.0270423e-19 1.0000000e+00]
 ...
 [0.0000000e+00 2.6443358e-37 0.0000000e+00 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [1.3215131e-24 4.0484258e-20 3.9089609e-19 ... 3.5943840e-16
  2.8176786e-19 1.0000000e+00]
 [0.0000000e+00 1.2599509e-35 1.0000000e+00 ... 4.3151692e-35
  2.4284466e-32 1.0147720e-37]]


In [82]:
predicted_digits = np.argmax(result, axis = 1)

In [83]:
print(predicted_digits)

[2 0 9 ... 3 9 2]


## Part 5 - Converting the result to a CSV file

In [84]:
resultDF = pd.DataFrame({"ImageId": [i + 1 for i in range(len(predicted_digits))], "Label": predicted_digits})

In [85]:
resultDF.to_csv("../Solution/result_submission.csv", index = False)