In [18]:
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

import os
import datetime

%load_ext tensorboard

import matplotlib.pyplot as plt
from skimage import color, exposure

from sklearn.metrics import accuracy_score

from hyperopt import hp, STATUS_OK, tpe, Trials, fmin

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [19]:
cd '/content/drive/My Drive/Colab Notebooks/Matrix/dw_matrix-/matrix_tree'

/content/drive/My Drive/Colab Notebooks/Matrix/dw_matrix-/matrix_tree


In [0]:
train = pd.read_pickle('data/train.p')
test = pd.read_pickle('data/test.p')

X_train, y_train = train['features'], train['labels']
X_test, y_test = test['features'], test['labels']

In [0]:
if y_train.ndim == 1: y_train = to_categorical(y_train)
if y_test.ndim == 1: y_test = to_categorical(y_test)

In [0]:
input_shape = X_train.shape[1:]
num_classes = y_train.shape[1]

In [0]:
def train_model(model, X_train, y_train, params_fit={}):
  model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

  logdir = os.path.join('logs', datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
  tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

  model.fit(X_train, 
            y_train,
            batch_size=params_fit.get('batch_size', 128),
            epochs=params_fit.get('epochs', 5),
            verbose=params_fit.get('verbose', 1),
            validation_data=params_fit.get('validation_data', (X_train, y_train)),
            callbacks=[tensorboard_callback]
            )
  
  return model

def predict(model_trained, X_test, y_test, scoring=accuracy_score):

  y_test_norm = np.argmax(y_test, axis=1)

  y_pred_prob = model_trained.predict(X_test)
  y_pred = np.argmax(y_pred_prob, axis=1)

  return scoring(y_test_norm, y_pred)

In [0]:
def get_cnn_v5(input_shape, num_classes):
  return Sequential([
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=input_shape),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(),
    Dropout(0.3),

    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
    MaxPool2D(),
    Dropout(0.3),

    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    MaxPool2D(),
    Dropout(0.3),

    Flatten(),
    
    Dense(1024, activation='relu'),
    Dropout(0.3),

    Dense(1024, activation='relu'),
    Dropout(0.3),

    Dense(num_classes, activation='softmax')  
  ])

In [49]:
model = get_cnn_v5(input_shape, num_classes)
model_trained = train_model(model, X_train, y_train)
predict(model_trained, X_test, y_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


0.9616780045351474

In [50]:
model_trained.evaluate(X_test, y_test)[1]



0.9616780281066895

In [0]:
def get_model(params):
  return Sequential([
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=input_shape),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(),
    Dropout(params['dropout_cnn_block_one']),

    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(),
    Dropout(params['dropout_cnn_block_two']),

    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(),
    Dropout(params['dropout_cnn_block_tree']),

    Flatten(),
    
    Dense(1024, activation='relu'),
    Dropout(params['dropout_dense_block_one']),

    Dense(1024, activation='relu'),
    Dropout(params['dropout_dense_block_two']),

    Dense(num_classes, activation='softmax')  
  ])

In [0]:
def func_obj(params):
  model = get_model(params)
  model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

  model.fit(
      X_train, 
      y_train,
      batch_size= int(params.get('batch_size', 128)),
      epochs= 5,
      verbose=0,
     
  )
  
  score = model.evaluate(X_test, y_test, verbose=0)
  accuracy = score[1]
  print(params, 'accuracy={}'.format(accuracy))

  return {'loss':-accuracy, 'status': STATUS_OK, 'model': model}

In [46]:
space = {
    'batch_size': hp.quniform('batch_size', 100, 200, 10),
    'dropout_cnn_block_one': hp.uniform('dropout_cnn_block_one', 0.3, 0.5),
    'dropout_cnn_block_two': hp.uniform('dropout_cnn_block_two', 0.3, 0.5),
    'dropout_cnn_block_tree': hp.uniform('dropout_cnn_block_tree', 0.3, 0.5),

    'dropout_dense_block_one': hp.uniform('dropout_dense_block_one', 0.3, 0.7),
    'dropout_dense_block_two': hp.uniform('dropout_dense_block_two', 0.3, 0.7)
}

best = fmin(
    func_obj,
    space,
    tpe.suggest,
    30,
    Trials()
    
)

{'batch_size': 200.0, 'dropout_cnn_block_one': 0.3036663417052082, 'dropout_cnn_block_tree': 0.3311630030728248, 'dropout_cnn_block_two': 0.322642546910468, 'dropout_dense_block_one': 0.4491911603443769, 'dropout_dense_block_two': 0.3613292665554358}
accuracy=0.9519274234771729
{'batch_size': 200.0, 'dropout_cnn_block_one': 0.3911672615234459, 'dropout_cnn_block_tree': 0.43714286235718336, 'dropout_cnn_block_two': 0.4059341431412958, 'dropout_dense_block_one': 0.6341260149400714, 'dropout_dense_block_two': 0.3387617762448081}
accuracy=0.9292517304420471
{'batch_size': 120.0, 'dropout_cnn_block_one': 0.44608647448138933, 'dropout_cnn_block_tree': 0.44294404755901473, 'dropout_cnn_block_two': 0.40693491748056176, 'dropout_dense_block_one': 0.5485936824234372, 'dropout_dense_block_two': 0.41515793256112205}
accuracy=0.9337868690490723
{'batch_size': 120.0, 'dropout_cnn_block_one': 0.43828371047720627, 'dropout_cnn_block_tree': 0.4832936106645897, 'dropout_cnn_block_two': 0.456717646131417

In [57]:
space = {
    'batch_size': hp.quniform('batch_size', 100, 200, 10),
    'dropout_cnn_block_one': hp.uniform('dropout_cnn_block_one', 0.3, 0.5),
    'dropout_cnn_block_two': hp.uniform('dropout_cnn_block_two', 0.3, 0.5),
    'dropout_cnn_block_tree': hp.uniform('dropout_cnn_block_tree', 0.3, 0.5),

    'dropout_dense_block_one': hp.uniform('dropout_dense_block_one', 0.3, 0.7),
    'dropout_dense_block_two': hp.uniform('dropout_dense_block_two', 0.3, 0.7)
}

best = fmin(
    func_obj,
    space,
    tpe.suggest,
    30,
    Trials()
    
)

{'batch_size': 140.0, 'dropout_cnn_block_one': 0.38306425692769397, 'dropout_cnn_block_tree': 0.35902224765096513, 'dropout_cnn_block_two': 0.3097515179405164, 'dropout_dense_block_one': 0.43183593122773695, 'dropout_dense_block_two': 0.3782647180359416}
accuracy=0.9671201705932617
{'batch_size': 100.0, 'dropout_cnn_block_one': 0.40284678334897545, 'dropout_cnn_block_tree': 0.47262121085433884, 'dropout_cnn_block_two': 0.3391935123355201, 'dropout_dense_block_one': 0.34617386500963876, 'dropout_dense_block_two': 0.4734785302262725}
accuracy=0.9519274234771729
{'batch_size': 100.0, 'dropout_cnn_block_one': 0.35127401991692303, 'dropout_cnn_block_tree': 0.34586536324366396, 'dropout_cnn_block_two': 0.42324503975984484, 'dropout_dense_block_one': 0.41706251690733015, 'dropout_dense_block_two': 0.6808494415250831}
accuracy=0.9124716520309448
{'batch_size': 100.0, 'dropout_cnn_block_one': 0.35412305783800263, 'dropout_cnn_block_tree': 0.3026291062841846, 'dropout_cnn_block_two': 0.340772691