# Digit Recognizer (Computer Vision)

## Importing the libraries

In [26]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

In [27]:
tf.__version__

'2.14.0'

## Part 1 - Data Preprocessing

### Importing the training set

In [28]:
trainDF = pd.read_csv("../Datasets/train.csv")

In [29]:
print(trainDF.head())

   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0         

### Importing the testing set

In [30]:
testDF = pd.read_csv("../Datasets/test.csv")

In [31]:
print(testDF.head())

   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0       0       0       0       0       0       0       0       0       0   
1       0       0       0       0       0       0       0       0       0   
2       0       0       0       0       0       0       0       0       0   
3       0       0       0       0       0       0       0       0       0   
4       0       0       0       0       0       0       0       0       0   

   pixel9  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0   

### Splitting the training set to features and target variables

In [32]:
x_train = trainDF.iloc[:, 1:].values
y_train = trainDF.iloc[:, 0].values

In [33]:
print(x_train)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [34]:
print(y_train)

[1 0 1 ... 7 6 9]


### Assigning testing set to a variable

In [35]:
x_test = testDF.values

In [36]:
print(x_test)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


### Normalize the pixel values to be between 0 and 1 for training set

In [37]:
x_train = x_train.astype("float32") / 255

### Normalize the pixel vaues to bet between 0 and 1 for testing set

In [38]:
x_test = x_test.astype("float32") / 255

### Reshaping the training and testing data to fit the keras model

In [39]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

### Converting target variable to binary class matrices

In [40]:
y_train = tf.keras.utils.to_categorical(y_train, 10)

## Part 2 - Building the CNN

### Initialize the CNN

In [41]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [42]:
cnn.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (3, 3), activation = "relu", input_shape = (28, 28, 1)))

### Step 2 - Pooling

In [43]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))

### Adding a 2nd Convolutional layer

In [44]:
cnn.add(tf.keras.layers.Conv2D(filters = 64, kernel_size = (3, 3), activation = "relu"))
cnn.add(tf.keras.layers.MaxPool2D(pool_size = (2, 2)))

### Step 3 - Flattening

In [45]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [46]:
cnn.add(tf.keras.layers.Dense(units = 128, activation = "relu"))

### Step 5 - Output Layer

In [47]:
cnn.add(tf.keras.layers.Dense(units = 10, activation = "softmax"))

## Part 3 - Training the CNN

### Compiling the CNN

In [48]:
cnn.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

### Training the CNN on the training set and evaluating it on the test set

In [49]:
cnn.fit(x = x_train, y = y_train, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x28d0009bb90>

## Part 4 - Predicting the test set results

In [50]:
result = cnn.predict(x_test)



In [51]:
print(result)

[[2.3580191e-31 7.8622056e-26 1.0000000e+00 ... 1.0378414e-24
  2.1263722e-30 1.9214336e-24]
 [1.0000000e+00 3.3526588e-25 3.8518887e-16 ... 6.4050724e-22
  4.5144164e-20 5.3403743e-17]
 [1.5643428e-19 3.0703751e-22 3.2761606e-15 ... 2.3901703e-15
  2.6882223e-15 1.0000000e+00]
 ...
 [7.0337874e-38 1.7922491e-26 1.9617691e-31 ... 2.9223007e-25
  5.4943753e-28 1.5591481e-26]
 [4.4786788e-16 1.0112300e-19 1.4969959e-17 ... 5.4697380e-14
  2.2852263e-16 1.0000000e+00]
 [1.4402050e-35 2.1673945e-29 1.0000000e+00 ... 2.6146687e-27
  1.0017976e-24 4.0962381e-29]]


In [52]:
predicted_digits = np.argmax(result, axis = 1)

In [53]:
print(predicted_digits)

[2 0 9 ... 3 9 2]


## Part 5 - Converting the result to a CSV file

In [54]:
resultDF = pd.DataFrame({"ImageId": [i + 1 for i in range(len(predicted_digits))], "Label": predicted_digits})

In [55]:
resultDF.to_csv("../Solution/result_submission.csv", index = False)