### Importing the Dataset

In [1]:
import os,json
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
from glob import glob
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras import backend as K
from sklearn.model_selection import KFold

In [2]:
print(tf.keras.__version__)
print(tf.__version__)

2.7.0
2.7.0


### Processing the Data

In [10]:
path = "C:/Users/shrey/OneDrive - Nanyang Technological University/Desktop/Dataset/trainingSet/trainingSet/FinalSet/"
path2 = "C:/Users/shrey/OneDrive - Nanyang Technological University/Desktop/Dataset/testSet/final_test/"

In [11]:
img_width, img_height = 28,28

data = []
for root,dirs,files in os.walk(path,topdown=True):
    for name in files:
        if '.jpeg' not in name:
            continue
        filename = os.path.abspath(os.path.join(root,name))
        class_name = name[0]
        data.append((filename,class_name))

df = pd.DataFrame(data,columns=['filename','class_name'])
df['class_name'] = df['class_name'].astype('category')
df['class'] = df['class_name'].cat.codes

df = df.sample(frac=1).reset_index(drop=True)
print(df.head())
print(len(df))
df.dtypes

                                            filename class_name  class
0  C:\Users\shrey\OneDrive - Nanyang Technologica...          7      7
1  C:\Users\shrey\OneDrive - Nanyang Technologica...          3      3
2  C:\Users\shrey\OneDrive - Nanyang Technologica...          7      7
3  C:\Users\shrey\OneDrive - Nanyang Technologica...          3      3
4  C:\Users\shrey\OneDrive - Nanyang Technologica...          7      7
60000


filename        object
class_name    category
class             int8
dtype: object

In [12]:
df.shape

(60000, 3)

In [14]:
#for training set
data2 = []
for root,dirs,files in os.walk(path2,topdown=True):
    for name in files:
        if'.jpeg' not in name:
            continue
        filename = os.path.abspath(os.path.join(root,name))
        class_name = name[10]
        data2.append((filename, class_name))
        
df2 = pd.DataFrame(data2, columns=['filename','class_name'])
df2['class_name'] = df2['class_name'].astype('category')
df2['class'] = df2['class_name'].cat.codes

#shuffle
df2 = df2.sample(frac=1).reset_index(drop=True)

print(df2.head())
print(len(df2))
df2.dtypes

                                            filename class_name  class
0  C:\Users\shrey\OneDrive - Nanyang Technologica...          4      4
1  C:\Users\shrey\OneDrive - Nanyang Technologica...          3      3
2  C:\Users\shrey\OneDrive - Nanyang Technologica...          8      8
3  C:\Users\shrey\OneDrive - Nanyang Technologica...          2      2
4  C:\Users\shrey\OneDrive - Nanyang Technologica...          9      9
10000


filename        object
class_name    category
class             int8
dtype: object

In [15]:
df2.shape

(10000, 3)

### Make the pipeline for laoding and resizing the images

#### Image Resizing and Decoding

In [16]:
img_width, img_height = 28,28
num_class = 10
batch_size = 32

def _parse_function(filename,label):
    image_string = tf.io.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string,channels=1)
    image_resize = tf.image.resize(image_decoded,[img_width,img_height])
    image_resized = tf.ensure_shape(image_resize,shape=(img_width,img_height,1))
    label = tf.one_hot(label,num_class)
    return image_resized,label

In [18]:
train_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df['filename'].values, tf.string), tf.cast(df['class'].values, tf.int32)))

train_dataset = train_dataset.map(_parse_function)
train_dataset = train_dataset.apply(tf.data.experimental.ignore_errors())
train_dataset = train_dataset.shuffle(30)
train_dataset = train_dataset.repeat(10)
train_dataset = train_dataset.batch(16,drop_remainder=True)

train_dataset

<BatchDataset shapes: ((16, 28, 28, 1), (16, 10)), types: (tf.float32, tf.float32)>

In [19]:
valid_dataset = tf.data.Dataset.from_tensor_slices((tf.cast(df2['filename'].values, tf.string),
                                                    tf.cast(df2['class'].values, tf.int32) ))
valid_dataset = valid_dataset.map(_parse_function)
valid_dataset = valid_dataset.apply(tf.data.experimental.ignore_errors())
valid_dataset = valid_dataset.batch(16)

valid_dataset

<BatchDataset shapes: ((None, 28, 28, 1), (None, 10)), types: (tf.float32, tf.float32)>

### Creating the model