In [6]:
import os

from socketserver import ThreadingMixIn
import numpy as np
import pandas as pd
import sys

from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense, Dropout, Conv2D, MaxPooling2D,
                                     Flatten)
from tensorflow.keras.utils import to_categorical

In [2]:
# User-defined. Later to be used in a conf file

CLEANED_DATA_DIR = os.path.join('..', 'data_cleaning')  # Change at will
TRAINING_INSTANCES_FILE = 'training_samples_X.npy'
TRAINING_LABEL_FILE = 'training_samples_X.npy'

In [None]:
# System constants

RANDOM_SEED = 42
DEPTH_RGB = 3
DEPTH_GREYSCALE = 1

TRAINING_SAMPLES = os.path.join(CLEANED_DATA_DIR, TRAINING_INSTANCES_FILE)
TRAINING_LABELS= os.path.join(CLEANED_DATA_DIR, TRAINING_LABEL_FILE)

X_train_np = np.load(TRAINING_SAMPLES, allow_pickle=True)
Y_train_np = np.load(TRAINING_LABELS, allow_pickle=True)

In [7]:
## In which we bring the training data to 0..1
#training_data = training_data_np/255.0
cleaned_X = X_train_np/255.0
cleaned_Y = to_categorical(Y_train_np)

In [None]:
TRAIN_P = 50
VALI_P = 20
TEST_P = 30
def split_data(cleaned_X, cleaned_Y):
    X_rem, X_test, Y_rem, Y_test = train_test_split(
        cleaned_X, cleaned_Y, test_size=TEST_P/100,
        random_state=RANDOM_SEED
    )

    X_train, X_val, Y_train, Y_val = train_test_split(
        X_rem, Y_rem, test_size=VALI_P/(TRAIN_P + VALI_P),
        random_state=RANDOM_SEED
    )
    return X_val, X_test, X_train, Y_val, Y_test, Y_train

X_val, X_test, X_train, Y_val, Y_test, Y_train = split_data(cleaned_X, cleaned_Y)

In [9]:
DEPTH = RGB
BATCH_SIZE = 1000  # 35 iterations for 35000 training instances
FILTERS = [64, 128, 256]
KERNEL_SIZE = [7, 3, 3]
STRIDES = [(1, 1), (1, 1), (1, 1)]
PADDING = 'same'  # 'valid' or 'same'. No 'mirror'
DILATION_RATE = [1, 1, 1]
KERNEL_INITIALIZER = 'glorot_uniform'  # Default
INPUT_SHAPE = (256, 256, DEPTH)  # MNIST images
POOL_SIZE = (2, 2)
def build_model():
    model = keras.Sequential(
        [
            keras.layers.Conv2D(FILTERS[0], KERNEL_SIZE[0],
                                input_shape=INPUT_SHAPE,
                                activation='relu', strides=STRIDES[0],
                                padding=PADDING, dilation_rate=DILATION_RATE[0],
                                kernel_initializer=KERNEL_INITIALIZER,
                                name='c_layer_one'),
            keras.layers.MaxPooling2D(pool_size=POOL_SIZE, padding=PADDING),
            keras.layers.Conv2D(FILTERS[1], KERNEL_SIZE[1],
                                activation='relu', strides=STRIDES[1],
                                padding=PADDING, dilation_rate=DILATION_RATE[1],
                                kernel_initializer=KERNEL_INITIALIZER,
                                name='c_layer_two'),
            keras.layers.Conv2D(FILTERS[1], KERNEL_SIZE[1],
                                activation='relu', strides=STRIDES[1],
                                padding=PADDING, dilation_rate=DILATION_RATE[1],
                                kernel_initializer=KERNEL_INITIALIZER,
                                name='c_layer_three'),
            keras.layers.MaxPooling2D(pool_size=POOL_SIZE, padding=PADDING),
            keras.layers.Conv2D(FILTERS[2], KERNEL_SIZE[2],
                                activation='relu', strides=STRIDES[2],
                                padding=PADDING, dilation_rate=DILATION_RATE[2],
                                kernel_initializer=KERNEL_INITIALIZER,
                                name='c_layer_four'),
            keras.layers.Conv2D(FILTERS[2], KERNEL_SIZE[2],
                                activation='relu', strides=STRIDES[2],
                                padding=PADDING, dilation_rate=DILATION_RATE[2],
                                kernel_initializer=KERNEL_INITIALIZER,
                                name='c_layer_five'),
            keras.layers.MaxPooling2D(pool_size=POOL_SIZE, padding=PADDING),
            # End Convolution/Pooling, go to Dense
            keras.layers.Flatten(),
            keras.layers.Dense(128, activation='relu', name='h_layer_one'),
            keras.layers.Dropout(0.2, seed=RANDOM_SEED),
            keras.layers.Dense(64, activation='relu', name='h_layer_two'),
            keras.layers.Dropout(0.2, seed=RANDOM_SEED),
            keras.layers.Dense(10, activation='softmax', name='output_layer'),
        ]
    )
    return model

In [10]:
def compile_model(model):
    model.compile(
        optimizer='rmsprop',  # Default
        loss='categorical_crossentropy',  # Cross entropy from class
        metrics=['accuracy']
    )
    return model

In [11]:
PROGRESS_BAR = 1
ONE_LINE_PER = 2
def train_model(model, X_train, Y_train, X_val, Y_val):
    history = model.fit(
        x=X_train,
        y=Y_train,
        validation_data=(X_val, Y_val),
        batch_size=128,
        epochs=12,
        verbose=ONE_LINE_PER
    )
    return model, history

In [12]:
def eval_model(model, X_test, Y_test):
    test_loss, test_accuracy = model.evaluate(x=X_test,
                                              y=Y_test,
                                              batch_size=128,
                                              verbose=1)
    return test_loss, test_accuracy

In [13]:
# Summarize the model to avoid > 2000000 parameters
model = build_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 c_layer_one (Conv2D)        (None, 256, 256, 64)      9472      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 64)     0         
 )                                                               
                                                                 
 c_layer_two (Conv2D)        (None, 128, 128, 128)     73856     
                                                                 
 c_layer_three (Conv2D)      (None, 128, 128, 128)     147584    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 64, 64, 128)      0         
 2D)                                                             
                                                                 
 c_layer_four (Conv2D)       (None, 64, 64, 256)       2

In [14]:
## You may use this space (and add additional cells for exploration)

model = compile_model(model)
model, history = train_model(model, X_train, Y_train, X_val, Y_val)
test_loss, test_accuracy = eval_model(model, X_test, Y_test)


print(f"Test loss {test_loss:.4f}, Test accuracy: {test_accuracy:.4f}")

NameError: name 'X_train' is not defined