In [11]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import pickle

CATEGORIES = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
DATADIR = "C:/Users/Enzo/Dataset/mnist_png/training"
IMG_SIZE = 28
TESTDIR = "C:/Users/Enzo/Dataset/mnist_png/testing"

def create_training_data():
    """
    Create training data
    """
    training_data = []
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                training_data.append([new_array, class_num])
            except Exception as e:
                pass
    random.shuffle(training_data)
    return training_data

def create_test_data():
    """
    Create test data
    """
    test_data = []
    number_of_images = 0
    for category in CATEGORIES:
        path = os.path.join(TESTDIR, category)
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                test_data.append([new_array, class_num])
                number_of_images += 1
            except Exception as e:
                pass
            if number_of_images == 5:
                number_of_images = 0
                break
    random.shuffle(test_data)
    return test_data

def train_model(training_data):
    X = []
    y = []
    for features, label in training_data:
        X.append(features)
        y.append(label)
    X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    y = tf.keras.utils.to_categorical(y, num_classes=10)
    model = create_model()
    model.fit(X, y, batch_size=32, epochs=5)
    return model

def test_model(test_data, model):
    X = []
    y = []
    for features, label in test_data:
        X.append(features)
        y.append(label)
    X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    y = tf.keras.utils.to_categorical(y, num_classes=10)
    score = model.evaluate(X, y, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

def create_model():
    """
    Create model
    """
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

def save_model(model):
    model.save('model.h5')

def load_model():
    model = tf.keras.models.load_model('model.h5')
    return model

def predict_image(image, model):
    image = image.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    prediction = model.predict(image)
    return prediction

def create_test_directory():
    """
    Create test directory with 5 images of each number and save it in a directory named 'temp_test'
    Take 5 random numbers from the test directory for each categories, and name 0000.png etc... 
    """
    if not os.path.exists('temp_test'):
        os.makedirs('temp_test')
    for category in CATEGORIES:
        path = os.path.join(TESTDIR, category)
        class_num = CATEGORIES.index(category)
        number_of_images = 0
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                cv2.imwrite('temp_test/' + str(class_num) + str(number_of_images) + '.png', new_array)
                number_of_images += 1
            except Exception as e:
                pass
            if number_of_images == 5:
                number_of_images = 0
                break

def check_prediction_file():
    """
    for each line of the file, split the line and check if the 2 first chars are the same
    if they are the same add 1 to counter
    """
    counter = 0
    with open('prediction.txt', 'r') as f:
        for line in f:
            if line.split()[0][0] == line.split()[1][0]:
                counter += 1
    print(str(counter) + ' correct predictions out of 50')
    print(str(counter / 50 * 100) + '%')

def clean_prediction_file():
    if os.path.exists('prediction.txt'):
        os.remove('prediction.txt')

def main():
    training_data = create_training_data()
    test_data = create_test_data()
    print("Train Model")
    model = train_model(training_data)
    print("Test Model")
    test_model(test_data, model)
    create_test_directory()
    print("Predict Image")
    clean_prediction_file()
    for img in os.listdir('temp_test'):
        try:
            img_array = cv2.imread(os.path.join('temp_test', img), cv2.IMREAD_GRAYSCALE)
            prediction = predict_image(img_array, model)
            print(np.argmax(prediction))
            with open('prediction.txt', 'a') as file:
                file.write(img.split('.')[0] + " " + str(np.argmax(prediction)) + '\n')
        except Exception as e:
            pass
    check_prediction_file()

main()

Train Model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Model
Test loss: 0.3687411844730377
Test accuracy: 0.9399999976158142
Predict Image
0
0
0
0
0
1
1
1
1
1
2
2
2
2
2
3
3
3
3
3
4
4
4
4
4
5
5
5
5
5
6
5
8
6
6
7
7
7
7
7
8
8
8
8
2
9
9
9
9
9
47 correct predictions out of 50
94.0%
