In [1]:
import time
import argparse
import pprint
import numpy as np 
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from model.MDBN import MDBN
from parameters import HYPERPARAMS, OPTIMIZER, DATASET,TRAINING
from preprocess import *

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix,precision_recall_curve,auc
from sklearn.preprocessing import normalize as nz
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier


In [2]:
# define the search space
fspace = {
    #'optimizer': hp.choice('optimizer', OPTIMIZER.optimizer),
    #'optimizer_param': hp.uniform('optimizer_param', OPTIMIZER.optimizer_param['min'], OPTIMIZER.optimizer_param['max']),
    'learning_rate': hp.uniform('learning_rate', OPTIMIZER.learning_rate['min'], OPTIMIZER.learning_rate['max']),    
    'nb_hiden_node1': scope.int(hp.quniform('nb_hiden_node1', OPTIMIZER.nb_hiden_node1['min'], OPTIMIZER.nb_hiden_node1['max'], q=5)),
    'nb_hiden_node2': scope.int(hp.quniform('nb_hiden_node2', OPTIMIZER.nb_hiden_node2['min'], OPTIMIZER.nb_hiden_node2['max'],  q=5)),    
    'epochs': scope.int(hp.quniform('epochs', OPTIMIZER.epochs['min'], OPTIMIZER.epochs['max'],  q=5)), 
    'batch_size': scope.int(hp.quniform('batch_size', OPTIMIZER.batch_size['min'], OPTIMIZER.batch_size['max'], q=5)),  
    'C': hp.uniform('C', OPTIMIZER.C['min'], OPTIMIZER.C['max'])
}

print(fspace["nb_hiden_node1"])


0 int
1   float
2     hyperopt_param
3       Literal{nb_hiden_node1}
4       quniform
5         Literal{10}
6         Literal{18}
7        q =
8         Literal{5}


In [3]:
#binarized data
""" transform features in binary format to train in the DBN.
@Input: Hint: matrix of preferences
@Output: xp : binarized data to be trained by te model
"""
def binarized_features(data):
    enc = {}
    (m,n) = data.shape
    nb_features = 3*n
    enc[0] = [0, 0, 1]
    enc[1] = [0, 1, 0]
    enc[2] = [0, 1, 1]
    enc[3] = [1, 0, 0]
    xp = np.zeros((data.shape[0],nb_features))
    for i,row in enumerate(data):
        l = [enc[elt] for elt in row]
        xp[i] = np.hstack(l)
    return xp

In [22]:
# parse arguments
#parser = argparse.ArgumentParser()
#parser.add_argument("-m", "--max_evals", required=True, help="Maximum number of evaluations during hyperparameters search")
#args = parser.parse_args()
#max_evals = int(args.max_evals)
current_eval = 1
train_history = []
SEED = 12345

dbn_feats_sg = np.load("./output/data/phili2017/Hint_sg.npy")
data = binarized_features(dbn_feats_sg) #np.load(DATASET.path_data2[DATASET.LS]["data"])
target = np.load("./output/data/phili2017/labels/label_sg.npy")#np.load(DATASET.path_data2[DATASET.LS]["label"])
if (np.shape(target)!=2):
    target = np.expand_dims(target,1)
print("shape data ",np.shape(data))
print("shape target ",np.shape(target))
X_train, X_test, y_train, y_test = train_test_split(data, target,test_size=0.2,random_state=SEED)
n = np.shape(X_train)[0]
m = np.shape(X_train)[1]
print(n,m)
print (data[:9,:])
print(target[:9,:])

shape data  (507, 6)
shape target  (507, 1)
405 6
[[0. 1. 0. 0. 1. 1.]
 [1. 0. 0. 0. 1. 1.]
 [0. 1. 1. 0. 1. 1.]
 [0. 1. 1. 0. 0. 1.]
 [0. 0. 1. 0. 1. 1.]
 [1. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 1. 0.]
 [0. 1. 1. 0. 1. 0.]
 [1. 0. 0. 0. 1. 1.]]
[[1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]]


In [16]:
# defint the fucntion to minimize (will train the model using the specified hyperparameters)
def function_to_minimize(hyperparams,
        nb_hiden_node1=HYPERPARAMS.nb_hiden_node1, nb_hiden_node2=HYPERPARAMS.nb_hiden_node2, 
        learning_rate=HYPERPARAMS.learning_rate,
        batch_size=HYPERPARAMS.batch_size,
        epochs=HYPERPARAMS.epochs,
        C=HYPERPARAMS.C):
    if 'learning_rate' in hyperparams: 
        learning_rate = hyperparams['learning_rate']
    if 'batch_size' in hyperparams: 
        batch_size = hyperparams['batch_size']
    if 'nb_hiden_node1' in hyperparams: 
        nhid_param1 = hyperparams['nb_hiden_node1']
    if 'nb_hiden_node2' in hyperparams: 
        nhid_param2 = hyperparams['nb_hiden_node2']   
    if 'C' in hyperparams: 
        C = hyperparams['C'] 
    if 'epochs' in hyperparams: 
        epochs = hyperparams['epochs']        
    if 'optimizer' in hyperparams:
        optimizer = hyperparams['optimizer']
    if 'optimizer_param' in hyperparams:
        optimizer_param = hyperparams['optimizer_param']
    global current_eval 
    global max_evals
    print( "#################################")
    print( "       Evaluation {} of {}".format(current_eval, max_evals))
    print( "#################################")
    start_time = time.time()
    try:
        dbn = MDBN(input_data=X_train, label=y_train, input_size=m, hidden_layer_sizes=[nhid_param1, nhid_param2],batch_size=batch_size, learning_rate=learning_rate,epochs=epochs, C=C)
        start_time = time.time()
        print ("Training the Hybrid Deep Belief Net model \n................")
        #Unsupervized Train the RBM
        dbn.train(X_train)
        #Fine tuning training SVM
        dbn.finetune(X_train,y_train.ravel(),"finetuning Train data",rng=SEED,bytraining=True)
        aucc = dbn.evaluate(X_test,y_test.ravel(),"finetuning data",verbose=False)
        training_time = int(round(time.time() - start_time))
        current_eval += 1
        train_history.append({'accuracy':aucc, 'learning_rate':learning_rate, 'nb_hiden_node1':nhid_param1, 'c':nhid_param2, 'batch_size':batch_size,'epochs':epochs,'C':C,
                                   'time':training_time})
    except Exception as e:
        # exception occured during training, saving history and stopping the operation
        print( "#################################")
        print( "Exception during training: {}".format(str(e)))
        print( "Saving train history in train_history.npy")
        np.save("train_history.npy", train_history)
        exit()
    return {'loss': -aucc, 'time': training_time, 'status': STATUS_OK}

In [17]:
# lancer la recherche des  hyperparametres
max_evals =5
trials = Trials()
best_trial = fmin(fn=function_to_minimize, space=fspace, algo=tpe.suggest, max_evals=max_evals, trials=trials)

#################################                    
       Evaluation 1 of 5                             
#################################                    
Training the Hybrid Deep Belief Net model            
................
Training with SVM                                    
Finetuning with SVM                                  
[LibSVM]                                             
Time fitting the data:                               
0.017590999603271484                                 
#################################                                               
       Evaluation 2 of 5                                                        
#################################                                               
Training the Hybrid Deep Belief Net model                                       
................
Training with SVM                                                               
Finetuning with SVM                                                        

In [23]:
# get some additional information and print the best parameters
for trial in trials.trials:
    if trial['misc']['vals']['learning_rate'][0] == best_trial['learning_rate'] and \
            trial['misc']['vals']['nb_hiden_node1'][0] == best_trial['nb_hiden_node1'] and \
            trial['misc']['vals']['nb_hiden_node2'][0] == best_trial['nb_hiden_node2'] and \
            trial['misc']['vals']['batch_size'][0] == best_trial['batch_size'] and \
            trial['misc']['vals']['epochs'][0] == best_trial['epochs'] and \
            trial['misc']['vals']['C'][0] == best_trial['C'] :
        best_trial['aucc'] = -trial['result']['loss'] * 100
        best_trial['time'] = trial['result']['time']
print( "#################################")
print( "      Best parameters found")
print( "#################################")
pprint.pprint(best_trial)
print( "#################################")

#################################
      Best parameters found
#################################
{'C': 0.8890180945734224,
 'aucc': 95.54719016793321,
 'batch_size': 10.0,
 'epochs': 560.0,
 'learning_rate': 0.8695228235014989,
 'nb_hiden_node1': 15.0,
 'nb_hiden_node2': 15.0,
 'time': 115}
#################################
