In [28]:
import numpy as np
import pickle
import os
from sklearn.model_selection import train_test_split
from collections import namedtuple

import keras
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from keras.optimizers import Adam
from keras.utils import to_categorical

In [29]:
Config = namedtuple('Config', ['batch_size', 'image_shape', 'num_classes', 'epochs'])
active_config = Config(32, (28,28,1), 4, 10)

In [59]:
class Dataset():
    DATA_DIR_NAME = 'data'
    TRAINING_IMG_FILENAME = 'train_image.pkl' 
    TRAINING_LABEL_FILENAME = 'train_label.pkl'
    TESTING_IMG_FILENAME = 'test_image.pkl'
    
    def __init__(self, validation_size=0.2):
        self.validation_size = validation_size
        self.build()
    
    @property
    def training_set_size(self):
        return len(self.training_images)
    
    @property
    def validation_set_size(self):
        return len(self.validation_images)
    
    @property
    def test_set_size(self):
        return len(self.test_set)
    
    
    def build(self):
        self.images = self.load_pickle(os.path.join(self.DATA_DIR_NAME, self.TRAINING_IMG_FILENAME))
        self.labels = self.load_pickle(os.path.join(self.DATA_DIR_NAME, self.TRAINING_LABEL_FILENAME))
        self.training_images, self.validation_images, self.training_labels, self.validation_labels = train_test_split(self.images, self.labels, test_size=self.validation_size)
        
        self.training_set = self.build_set(self.training_images, self.training_labels)
        self.validation_set = self.build_set(self.validation_images, self.validation_labels)
        self.test_set = self.load_pickle(os.path.join(self.DATA_DIR_NAME, self.TESTING_IMG_FILENAME))
        
    def build_set(self, images, labels):
        return [images, labels]
        
    def load_pickle(self, filename, mode='rb'):
        data = pickle.load(open(filename, mode))
        data = np.array(data, dtype='float32')
        return data

In [60]:
#TODO

dataset = Dataset()
print(dataset.training_set[0].shape, dataset.validation_set[0].shape)
print(dataset.training_set[1].shape, dataset.validation_set[1].shape)
print(dataset.training_set_size, dataset.validation_set_size, dataset.test_set_size)

(6400, 784) (1600, 784)
(6400,) (1600,)
6400 1600 2000


In [32]:
class DatasetProvider():
    def __init__(self, augment=False, validation_size=0.2, batch_size=None, num_classes=None):
        self.dataset = Dataset(validation_size)
        self.augment = augment
        self.batch_size = batch_size or active_config.batch_size
        self.num_classes = num_classes or active_config.num_classes
    
    @property
    def training_steps(self):
        return int(dataset.training_set_size/ self.batch_size)
    
    @property
    def validation_steps(self):
        return int(dataset.test_set_size/ self.batch_size)
    
    def training_set(self):
        for batch in self.generate_batch(self.dataset.training_set):
            yield batch
    
    def validation_set(self):
        for batch in self.generate_batch(self.dataset.validation_set):
            yield batch
    
    def generate_batch(self, img_label_set):
        while True:
            all_idx = np.arange(len(img_label_set[0]))
            random_idx = np.random.choice(all_idx, size=self.batch_size)
            random_img = img_label_set[0][random_idx]
            random_label = img_label_set[1][random_idx]
            yield self.preprocess_batch([random_img, random_label])
            
    def preprocess_batch(self, img_label_batch):
        return self.preprocess_images(img_label_batch[0]), self.preprocess_labels(img_label_batch[1])
    
    def preprocess_images(self, imgs):
        #TODO add augment code
        if self.augment:
            imgs = imgs/255.
        return np.reshape(imgs, (imgs.shape[0], 28, 28, 1))
    
    def preprocess_labels(self, labels):
        labels[labels == 2] = 1
        labels[labels == 3] = 2
        labels[labels == 6] = 3
        return to_categorical(labels, num_classes=self.num_classes)

In [33]:
dataset_provider = DatasetProvider()
batch = next(dataset_provider.training_set())
batch[0].shape, batch[1].shape

((32, 28, 28, 1), (32, 4))

In [34]:
class ImageClassifierModel():
    
    def __init__(self, learning_rate=0.001, print_summary=False, image_shape=None, num_classes=None):
        #TODO
        #initialize params and hyperparams
        self.image_shape = image_shape or active_config.image_shape
        self.num_classes = num_classes or active_config.num_classes
        self.optimizer = Adam(lr=learning_rate)
        self.print_summary = print_summary
        self.model = self.build()
    
    def build(self):
        #TODO
        #define model and build model
        input_layer = Input(shape=self.image_shape)
        x = Conv2D(filters=32, kernel_size=(3,3), activation='relu')(input_layer)
        x = Flatten()(x)
        x = Dense(units=1024, activation='relu')(x)
        x = Dense(units=512, activation='relu')(x)
        output_layer = Dense(units=self.num_classes, activation='softmax')(x)

        model = Model(inputs=input_layer, outputs=output_layer)
        
        if self.print_summary:
            print(model.summary())
        
        model.compile(optimizer=self.optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
        
        return model    

In [37]:
class Training():
    def __init__(self, augment=False, learning_rate=0.001, print_summary=False, epochs=None):
        
        self.dataset_provider = DatasetProvider(augment)
        self.model = ImageClassifierModel(learning_rate, print_summary).model
        self.epochs = epochs or active_config.epochs
        
    def callbacks(self):
        #TODO Define Callbacks
        callbacks = []
        return callbacks
    
    def run(self):
        self.history = self.model.fit_generator(self.dataset_provider.training_set(), 
                                                steps_per_epoch=self.dataset_provider.training_steps, 
                                                epochs=self.epochs, 
                                                validation_data=self.dataset_provider.validation_set(), 
                                                validation_steps=self.dataset_provider.validation_steps)
    
    def visualize(self):
        #TODO
        # 1. Vizualize augmented images
        # 2. vizualize model history

In [38]:
train = Training(print_summary=True)
train.run()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
flatten_7 (Flatten)          (None, 21632)             0         
_________________________________________________________________
dense_19 (Dense)             (None, 1024)              22152192  
_________________________________________________________________
dense_20 (Dense)             (None, 512)               524800    
_________________________________________________________________
dense_21 (Dense)             (None, 4)                 2052      
Total params: 22,679,364
Trainable params: 22,679,364
Non-trainable params: 0
________________________________________________________________