[View in Colaboratory](https://colab.research.google.com/github/Chewbaccamaster/hyperopt_codes/blob/master/hyperopt_codes.ipynb)

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
import os 
import sys
import pandas as pd
import numpy as np

from sklearn import model_selection
from __future__ import absolute_import
os.chdir("/content/drive/Hamming")
sys.path.append("Hamming")

In [0]:
!pip install hyperopt
!pip install networkx==1.11


In [0]:
TRAIN_PATH = './dataset_files/hamming_small.txt'  # "/content/drive/Hamming/dataset_files/hamming.txt"

COLUMN_NAMES = ['plainword', 'codeword', 
                'id_error', 'bin_error', 'defective_codeword']
def load_data():
    return pd.read_csv(TRAIN_PATH, sep=';', names=COLUMN_NAMES)

In [0]:
def make_features():
#     data['dec_defective_codeword'] = data['defective_codeword'][:].apply(lambda x: int(x, 2))

    for j in range(len(data['codeword'][0])):
        data['cod_' + str(j)] = data['codeword'][:].apply(lambda x: int(x[j]))

    for j in range(len(data['defective_codeword'][0])):
        data['def_' + str(j)] = data['defective_codeword'][:].apply(lambda x: int(x[j]))

#     for j in range(len(data['bin_error'][0])):
#         data['mask_' + str(j)] = data['bin_error'][:].apply(lambda x: int(x[j]))
        
    for j in range(len(data['plainword'][0])):
        data['pln_' + str(j)] = data['plainword'][:].apply(lambda x: int(x[j]))
#     return data

In [0]:
def split_data(test_size): 
  train_data, test_data, train_labels, test_labels = \
    model_selection.train_test_split(data.loc[:, 'def_0':'def_30'], 
      data.loc[:, 'cod_0':'cod_30'], # 'mask_0':'pln_25' 
      test_size = test_size) 
  return np.array(train_data), np.array(test_data), np.array(train_labels), np.array(test_labels)

In [0]:
%%time
data = load_data()
print(data.shape)
make_features()
print(data.shape)
train_data, test_data, train_labels, test_labels = split_data(test_size=0.3)

In [0]:
def probs_to_labels(predicted_probs):
    return [1 if x > 0.5 else 0 for x in predicted_probs]
def count_errors(y, y_pred):
  count = 0
  for i in range (0,31):
    labelBit = y[i]
    resultBit = y_pred[i]
    if labelBit != resultBit:
      count += 1
  return count

In [0]:
# errorStats = {'0': 0}
# for i in range(0, y_pred.shape[0]):
#   resultArray = probs_to_labels(y_pred[i])
#   errorNum = count_errors(test_labels[i], resultArray)
#   if errorStats.get(str(errorNum)) == None:
#     errorStats[str(errorNum)] = 0
#   errorStats[str(errorNum)] += 1
# print(errorStats)
def binary_accuracy(y, y_pred):
    return sum(int(np.array_equal(a, probs_to_labels(b))) for (a,b) in zip(y, y_pred)) / y.shape[0]


In [0]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
import sys

X = train_data
y = train_labels
X_val = test_data
y_val = test_labels
activations = ['relu', 'tanh', 'softmax', 'elu']
space = {'choice': hp.choice('num_layers',
                    [ {'layers':'three', },
                    {'layers':'four',
                      'units4': hp.choice('units4', [128, 256, 512, 1024]), 
                      'activation4': hp.choice('activation4', activations),
                      'dropout4': 1},#hp.choice('dropout4', [.75, 1])}
                    { 'layers': 'five', 
                      'units4_5': hp.choice('units4_5', [128, 256, 512, 1024]), 
                      'activation4_5': hp.choice('activation4_5', activations),
                      'dropout4_5': 1,
                      'units5': hp.choice('units5', [128, 256, 512, 1024]), 
                      'activation5': hp.choice('activation5', activations),
                      'dropout5': 1
                     }
                    ]),

            'units1': hp.choice('units1', [128, 256, 512, 1024]),
            'units2': hp.choice('units2', [128, 256, 512, 1024]),
            'units3': hp.choice('units3', [128, 256, 512, 1024]), 

            'dropout1': 1,#hp.choice('dropout1', [.75, 1]),
            'dropout2': 1,#hp.choice('dropout2', [.75, 1]),
            'dropout3': 1,#hp.choice('dropout3', [.75, 1]),
         
            'activation1': hp.choice('activation1', activations),
            'activation2': hp.choice('activation2', activations),
            'activation3': hp.choice('activation3', activations),

            'batch_size' : hp.choice('batch_size', [32, 64, 128]),

            'nb_epochs' :  hp.choice('nb_epochs', [5, 7, 10]),
            'optimizer': hp.choice('optimizer', ['adadelta','adam','rmsprop']),
            #'activation': hp.choice('activation', ['sigmoid', 'tanh']),
            'last_activation': hp.choice('last_activation', ['sigmoid', 'hard_sigmoid'])
        }

In [0]:
scores = []
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import Adadelta, Adam, rmsprop
import json


In [0]:
def f_nn(params):   
    print ('Params testing: ', params)
    model = Sequential()
    model.add(Dense(units=int(params['units1']), input_dim = X.shape[1])) 
    model.add(Activation(params['activation1']))
    model.add(Dropout(params['dropout1']))

    model.add(Dense(units=int(params['units2']), kernel_initializer = "glorot_uniform")) 
    model.add(Activation(params['activation2']))
    model.add(Dropout(params['dropout2']))

    model.add(Dense(units=int(params['units3']), kernel_initializer = "glorot_uniform")) 
    model.add(Activation(params['activation3']))
    model.add(Dropout(params['dropout3']))    

    if params['choice']['layers'] == 'four':
        model.add(Dense(units=int(params['choice']['units4']), kernel_initializer = "glorot_uniform")) 
        model.add(Activation(params['choice']['activation4']))
        model.add(Dropout(params['choice']['dropout4']))  
        
    if params['choice']['layers'] == 'five':
      model.add(Dense(units=int(params['choice']['units4_5']), kernel_initializer = "glorot_uniform")) 
      model.add(Activation(params['choice']['activation4_5']))
      model.add(Dropout(params['choice']['dropout4_5']))  
      model.add(Dense(units=int(params['choice']['units5']), kernel_initializer = "glorot_uniform")) 
      model.add(Activation(params['choice']['activation5']))
      model.add(Dropout(params['choice']['dropout5']))  
        
    

    model.add(Dense(31))
    model.add(Activation(params['last_activation']))
    model.compile(loss='binary_crossentropy', optimizer=params['optimizer'])

    model.fit(X, y, epochs=params['nb_epochs'], batch_size=int(params['batch_size']), verbose = 0)

    pred_auc = model.predict(X_val)
#     acc = roc_auc_score(y_val, pred_auc)
    acc = binary_accuracy(y_val, pred_auc)
    print('ACCURACY:', acc)
    params['acuracy'] = acc
    testing_params = params
    testing_params['accuracy'] = acc
    jsonResult = testing_params
    file = open('scores.json', 'a')
    json.dump(testing_params, file, indent = 4)
    file.write(',\n')
    file.close()
    sys.stdout.flush() 
    scores.append((acc, params))
    return {'loss': -acc, 'status': STATUS_OK}

In [0]:
trials = Trials()
file = open('scores.json', 'w')
file.write('[\n')
file.close()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=350, trials=trials)
print('best: ')
print(best)
file = open('scores.json', 'a')
file.write(']\n')
file.close()

In [0]:
print(max(scores))