# Import modules

In [1]:
import os, shutil
from os import listdir
from os.path import isfile, join, isdir

from keras import applications
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


# Configuring Dataset

## Dataset classes

In [2]:
classes = {}
base_dir = 'data/documents/'
classes_dirs = [f for f in listdir(base_dir) if isdir(join(base_dir, f))]
print('Classes Found:')
for image_class in classes_dirs:
    classes[image_class] = {}
    print(image_class)

Classes Found:
CDI
CNH
C_NASCIMENTO
PASSAPORTE
RG
TITULO_ELEITORIAL


## Create Train and Validation folders for each class

In [3]:
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)

validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)

for image_class in classes_dirs:
    
    classes[image_class]['train_dir'] = os.path.join(train_dir, image_class)
    os.mkdir(classes[image_class]['train_dir'])

    classes[image_class]['validation_dir'] = os.path.join(validation_dir, image_class)
    os.mkdir(classes[image_class]['validation_dir'])

## Get images file name for each class

In [4]:
for image_class in classes_dirs:
    images_path = base_dir + '{}/'.format(image_class)
    classes[image_class]['images'] = [f for f in listdir(images_path) if isfile(join(images_path, f))]

## Set Validation and Train size 

In [5]:
train_size = 0.7
validation_size = 1 - train_size
downsample_factor = 1

for image_classe, class_data in classes.items():
    class_data['len_train'] = int(len(class_data['images']) * train_size * downsample_factor)
    class_data['len_validation'] = int(len(class_data['images']) * validation_size * downsample_factor)

## Move files inside class folders to their respective folder inside train and validation folder 

In [6]:
for image_class, class_data in classes.items():
    original_dir = base_dir + image_class + '/'
    images = class_data['images']
    len_train = class_data['len_train']
    len_validation = class_data['len_validation']
    for file in images[:len_train]:        
        src = os.path.join(original_dir, file)
        dst = os.path.join(class_data['train_dir'], file)
        shutil.copyfile(src, dst)
    
    for file in images[len_train:len_train + len_validation]:
        src = os.path.join(original_dir, file)
        dst = os.path.join(class_data['validation_dir'], file)
        shutil.copyfile(src, dst)

In [7]:
for image_class, class_data in classes.items():
    print('\ntotal training {} images:'.format(image_class), len(os.listdir(class_data['train_dir'])))
    print('total validation {} images:'.format(image_class), len(os.listdir(class_data['validation_dir'])))


total training CDI images: 12
total validation CDI images: 5

total training CNH images: 12
total validation CNH images: 5

total training C_NASCIMENTO images: 7
total validation C_NASCIMENTO images: 3

total training PASSAPORTE images: 12
total validation PASSAPORTE images: 5

total training RG images: 12
total validation RG images: 5

total training TITULO_ELEITORIAL images: 12
total validation TITULO_ELEITORIAL images: 5


# Resnet

## Parameters

In [8]:
image_height,image_width = 128,128 
num_classes = len(classes_dirs)

## Model 

In [9]:
base_model = applications.resnet50.ResNet50(weights= None, include_top=False, input_shape= (image_height,image_width,1))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)



In [10]:
adam = Adam(lr=0.0001)
model.compile(optimizer= adam, loss='categorical_crossentropy', metrics=['accuracy'])

## Training

In [11]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_height, image_width),
    class_mode='categorical',
    color_mode='grayscale')

validation_generator = train_datagen.flow_from_directory(
    validation_dir,
    target_size=(image_height, image_width),
    class_mode='categorical',
    color_mode='grayscale')

Found 67 images belonging to 6 classes.
Found 28 images belonging to 6 classes.


In [12]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 128, 1)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 134, 134, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 64, 64, 64)   3200        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 64, 64, 64)   256         conv1[0][0]                      
____________________________________________________________________________________________

In [13]:
model.fit_generator(train_generator, 
                    steps_per_epoch=train_generator.samples, 
                    epochs = 10, 
                    validation_data=validation_generator, 
                    validation_steps=validation_generator.samples,
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
 8/67 [==>...........................] - ETA: 6:26 - loss: 0.7389 - accuracy: 0.9467

KeyboardInterrupt: 