# Reconnaissance de digits manuscrits par CNN

## Importation des Bibliothèques 

In [1]:
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm 

## Partie 1 : Base de données, Analyse et Préparation

In [2]:
path_test = os.path.join(".", "DataSet", "testing")

Data_test = []
Label_test = []

# Récursive méthode
# for root, dir_file, file in os.walk(path_test):
#     for f in file:
#         image_path = os.path.join(root,f)
#         Data_test.append(cv2.imread(image_path))

for folder in tqdm(os.listdir(path_test)):
    for file in os.listdir(os.path.join(path_test, folder)):
        image_path = os.path.join(path_test,folder,file)
        Data_test.append(cv2.imread(image_path))
        Label_test.append(int(folder))

 40%|████████████████████████████████                                                | 4/10 [00:29<00:43,  7.33s/it]


KeyboardInterrupt: 

In [None]:
path_train = os.path.join(".", "DataSet", "training")

Data_train = []
Label_train = []
# for root, dir_file, file in os.walk(path_train):
#    for f in tqdm(file):
#        image_path = os.path.join(root,f)
#        Data_train.append(cv2.imread(image_path))
#        Label_train.append(root.split("/")[-1])

for folder in tqdm(os.listdir(path_train)):
    for file in os.listdir(os.path.join(path_train, folder)):
        image_path = os.path.join(path_train,folder,file)
        Data_train.append(cv2.imread(image_path))
        Label_train.append(int(folder))

In [None]:
print(f"Train : {len(Data_train)} | Test : {len(Data_test)}")

In [None]:
print(f"Train : {len(Label_train)} | Test : {len(Label_test)}")

## Partie 2 : Architecture CNN sur Tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import numpy as np

In [None]:
X_train, X_test = np.array(Data_train)/255, np.array(Data_test)/255
y_train, y_test = np.array(Label_train), np.arrya(Label_train)

In [None]:

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i])
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
    plt.title(str(Label_train[i]), color="w")
plt.show()


In [None]:
image_shape = X_train[0].shape
print(image_shape)

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='sigmoid', input_shape=image_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='sigmoid'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='sigmoid'))

# Ajout des couches denses
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='sigmoid'))
model.add(layers.Dense(10))

# Résumé du model
model.summary()

## Compilation et entraînement du modèle

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=10, 
                    validation_data=(X_test, y_test))

## Evaluation du modèle

In [None]:
plt.plot(history.history['val_loss'], label = "val_loss")
plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.legend(loc = "upper right")
plt.title("Loss value by epoch")

plt.show()

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.92, 1])
plt.legend(loc='lower right')
plt.title("Accuracy value by epoch")

plt.show()

test_loss, test_acc = model.evaluate(X_test,  y_test, verbose=2)


In [None]:
y_pred = model.predict(X_test)

- Accuracy
- Matrice de confusion
voir  : np.expend