In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        continue
# /kaggle/input/digits/digits_jpeg/digits_jpeg/7/img008-00044.jpeg
# /kaggle/input/digits/digits_jpeg/digits_jpeg/7/img008-00671.jpeg
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import cv2
from sklearn.model_selection import train_test_split 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, Conv2D, MaxPooling2D
from keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


2024-08-20 05:02:21.526807: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 05:02:21.526909: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 05:02:21.658194: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Data Preparation

In [3]:
images = []
labelNum = [] # list with labels of images

print("Extracting Data from Label #'s:", end=" ")
for num in range(10):
    picList = os.listdir("/kaggle/input/digits/digits_jpeg/digits_jpeg" + '/' + str(num)) 

    for image in picList:
        curImage = cv2.imread("/kaggle/input/digits/digits_jpeg/digits_jpeg/" + '/' + str(num) + '/' + image) 

        if curImage is not None:
            curImage = cv2.resize(curImage, (32, 32)) 
            images.append(curImage)
            labelNum.append(num)
        else:
            print(f"Failed to load image: {image}")

    print(num, end = " ")

print(" ")

images = np.array(images)
labelNum = np.array(labelNum)

Extracting Data from Label #'s: 0 1 2 3 4 5 6 7 8 9  


# Splitting Data into Training/Testing/Validating

In [4]:
print(f"Original Set of Data: {images.shape} \n")


X_train, X_test, y_train, y_test = train_test_split(images, labelNum, test_size=0.2) # Train 80%, Test 20%, function ensures data is randomized
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2) # Train 64%, Test 20%, Validation 16%
print(labelNum)
print(f"Data to Train: {X_train.shape}")
print(f"Data to Test: {X_test.shape}")
print(f"Data to Validate: {X_validation.shape}")

print(y_train.shape)
print(y_validation.shape)
print(y_test.shape)
# np.where(y_train==x) outputs an np array, first element is an array with all numbers with respective label
numOfImages = []
for x in range(10):
    numOfImages.append(len(np.where(y_train==x)[0]))
    print(f"How many [{x}] labels?: {len(np.where(y_train==x)[0])}") 

Original Set of Data: (10160, 32, 32, 3) 

[0 0 0 ... 9 9 9]
Data to Train: (6502, 32, 32, 3)
Data to Test: (2032, 32, 32, 3)
Data to Validate: (1626, 32, 32, 3)
(6502,)
(1626,)
(2032,)
How many [0] labels?: 653
How many [1] labels?: 635
How many [2] labels?: 667
How many [3] labels?: 669
How many [4] labels?: 662
How many [5] labels?: 643
How many [6] labels?: 651
How many [7] labels?: 649
How many [8] labels?: 624
How many [9] labels?: 649


# Image Preprocessing

In [5]:
def Preprocessing(image):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    image = cv2.equalizeHist(image) 
    image = image/255
    return image
    

# map each value into function -> into a list -> into an np array 
X_train = np.array(list(map(Preprocessing, X_train)))
X_test = np.array(list(map(Preprocessing, X_test)))
X_validation = np.array(list(map(Preprocessing, X_validation)))

# Add a depth of 1 for the CNN, tells that only 1 color channel per image. If RGB, put 3

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
X_validation = X_validation.reshape(X_validation.shape[0], X_validation.shape[1], X_validation.shape[2], 1)

AugmentedData = ImageDataGenerator(width_shift_range=0.1, # %
                             height_shift_range=0.1, # %
                             zoom_range=0.2, # %
                             shear_range=0.1, # %
                             rotation_range=10) # °
AugmentedData.fit(X_train) # Generating augmented images as batches that get sent back to X_train while training

# CNN

In [6]:
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
y_validation = to_categorical(y_validation, 10)

def CNN_Model():
    numOfFilters = 60
    filterSize1 = (5,5)
    filterSize2 = (3,3)
    poolSize = (2,2)
    numOfNode = 500

    model = Sequential() # Keras Sequential model

    model.add((Conv2D(numOfFilters, filterSize1, input_shape=(32, 32, 1), activation='relu'))) # 28x28x60
    model.add((Conv2D(numOfFilters, filterSize1, activation='relu'))) # 24x24x60
    model.add(MaxPooling2D(pool_size=poolSize)) # 12x12x60
    model.add((Conv2D(numOfFilters//2, filterSize2, activation='relu'))) # 21x21x30
    model.add((Conv2D(numOfFilters//2, filterSize2, activation='relu'))) # 18x18x30
    model.add(MaxPooling2D(pool_size=poolSize)) # 9x9x30
    model.add(Dropout(0.5)) # half the neurons are inactive, only used in training
    model.add(Flatten()) # 9x9x30 = 2430
    model.add(Dense(numOfNode, activation='relu')) 
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax')) # 10 nodes for 10 classes
    model.compile(Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = CNN_Model()
print(model.summary())

history = model.fit(AugmentedData.flow(X_train, y_train,
                                 batch_size = 50), # number of training samples used in one iteration
                                 steps_per_epoch = len(X_train)//50, # total number of training samples / batch size
                                 epochs = 50, # one full cycle through training dataset
                                 validation_data = (X_validation, y_validation),
                                 shuffle = 1)

test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy:.4f}')
print(f'Test loss: {test_loss:.4f}')


2024-08-20 05:02:59.685125: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


None
Epoch 1/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 174ms/step - accuracy: 0.3087 - loss: 1.8763 - val_accuracy: 0.9483 - val_loss: 0.2007
Epoch 2/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6800 - loss: 0.7576 - val_accuracy: 0.9490 - val_loss: 0.2004
Epoch 3/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 169ms/step - accuracy: 0.7998 - loss: 0.6194 - val_accuracy: 0.9680 - val_loss: 0.1026
Epoch 4/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7600 - loss: 0.6779 - val_accuracy: 0.9680 - val_loss: 0.1037
Epoch 5/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 167ms/step - accuracy: 0.8826 - loss: 0.3743 - val_accuracy: 0.9779 - val_loss: 0.0750
Epoch 6/50
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.9600 - loss: 0.1482 - val_accuracy: 0.9754 - val_loss: 0.0815
Epoch 7/50
[