In [1]:
import numpy as np

import keras
from keras.layers import *
from keras.models import Sequential
from keras.utils import np_utils

from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
        
def create_model(x_train, y_train, x_test, y_test):
    ####### PARA MEDIR F1 AL FINA LDE EPOCH ###########
    class Metrics(keras.callbacks.Callback):
        def on_train_begin(self, logs={}):
            self.val_f1s = []

        def on_epoch_end(self, batch, logs={}):
            predict = np.squeeze(self.model.predict_classes(self.validation_data[0]))
            targ = np.squeeze(self.validation_data[1])
            f1s = f1_score(targ, predict, average='macro')
            self.val_f1s.append(f1s)
            #print(" - val_f1: %f " %(f1s))
            return

    from keras import backend as K
    def f1(y_true, y_pred):
        def recall(y_true, y_pred):
            """Recall metric.

            Only computes a batch-wise average of recall.

            Computes the recall, a metric for multi-label classification of
            how many relevant items are selected.
            """
            true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
            possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
            recall = true_positives / (possible_positives + K.epsilon())
            return recall

        def precision(y_true, y_pred):
            """Precision metric.

            Only computes a batch-wise average of precision.

            Computes the precision, a metric for multi-label classification of
            how many selected items are relevant.
            """
            true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
            predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
            precision = true_positives / (predicted_positives + K.epsilon())
            return precision
        precision = precision(y_true, y_pred)
        recall = recall(y_true, y_pred)
        return 2*((precision*recall)/(precision+recall+K.epsilon()))

    model = Sequential()
    model.add(Conv2D({{choice([32,64,128,256, 512])}}, (3, 3), input_shape=x_train.shape[1:], strides=(1, 1), padding="valid"))
    #model.add(Activation({{choice(['relu', 'sigmoid'])}}))
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout({{uniform(0, 1)}}))

    model.add(Conv2D({{choice([32,64,128,256, 512])}}, (3, 3), strides=(1, 1), padding="valid"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout({{uniform(0, 1)}}))

    if {{choice(['two', 'three'])}} == 'three': #if three add a third conv layer
        model.add(Conv2D({{choice([32,64,128,256, 512])}}, (3, 3), strides=(1, 1), padding="valid"))
        model.add(Activation('relu'))
        model.add(MaxPooling2D((2,2)))
        model.add(Dropout({{uniform(0, 1)}}))

    model.add(Flatten())
    #model.add(GlobalAveragePooling2D()) ## en otro kernel..

    model.add(Dense({{choice([64,128,256, 512,1024])}}))
    model.add(Activation('relu'))
    model.add(Dropout({{uniform(0, 1)}}))
    
    if {{choice(['one', 'two'])}} == 'two': #if two add a second dense layer on flatten
        model.add(Dense({{choice([64,128,256, 512,1024])}}))
        model.add(Activation('relu'))
        model.add(Dropout({{uniform(0, 1)}}))
        
    model.add(Dense(1, activation='sigmoid'))
    model.summary()

    model.compile(loss='binary_crossentropy',optimizer='Adam')#, metrics=['acc'])
    metrics = Metrics()
    result = model.fit(x_train, y_train,
                  batch_size={{choice([64, 128,256])}},
                  epochs={{choice([60,200])}},
                  verbose=0,
                  validation_split=0.1, callbacks=[metrics])

    #get the highest validation accuracy of the training epochs
    validation_acc = np.amax(metrics.val_f1s) #np.amax(result.history['val_f1']) 
    print('Best validation metric of epoch:', validation_acc)
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}

Using TensorFlow backend.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score,f1_score, confusion_matrix, mean_absolute_error,mean_squared_error, median_absolute_error
from sklearn.preprocessing import normalize
import time
import matplotlib.pyplot as plt

def plot_df(df):
    try:
        from IPython.display import display
        display(df)
    except:
        print(df)
    
def plot_confusion_matrix(cm, target_names, title='Confusion matrix (f1-score)',cmap=None, normalize=True):
    import itertools
    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy
    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap, vmin=0.0, vmax=1.0)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()
    
def calculate_metrics(y_true,y_pred,plot=True, title=""):
    dic_return = {}
    dic_return["Precision raw"] = precision_score(y_true,y_pred,average=None,labels=[0,1])
    dic_return["Recall raw"] = recall_score(y_true,y_pred,average=None,labels=[0,1])
    #dic_return["Precision"] = precision_score(y_true,y_pred,average=None,labels=[0,1])
    #dic_return["Recall"] = recall_score(y_true,y_pred,average=None,labels=[0,1])
    dic_return["F1 raw"] = f1_score(y_true,y_pred,average=None,labels=[0,1])
    dic_return["F1 weighted"] = f1_score(y_true,y_pred,average="weighted",labels=[0,1])
    dic_return["F1 macro"] = f1_score(y_true,y_pred,average="macro",labels=[0,1])
    dic_return["F1 micro"] = f1_score(y_true,y_pred,average="micro",labels=[0,1])
    matriz=confusion_matrix(y_true,y_pred, labels=[0,1])
    if plot:
        df = pd.DataFrame(dic_return)
        df.index = ["False Positive","Confirmed"]
        plot_df(df)
        plot_confusion_matrix(normalize(matriz,axis=1,norm='l1'),["False Positive","Confirmed"],title)
    dic_return["Confusion Matrix"] = matriz
    return dic_return

In [3]:
def data():
    """
    Data providing function:

    This function is separated from create_model() so that hyperopt
    won't reload data for each evaluation run.
    """
    channel_1 = np.load('/work/work_teamEXOPLANET/MTF_margarita/no_invertidos/MTF_u15_d15.npy')  
    channel_2 = np.load('/work/work_teamEXOPLANET/MTF_margarita/invertidos/MTF_u15_d15.npy')  
    channel_3 = np.load('/work/work_teamEXOPLANET/MTF_gabo/npys/time_channel_30.npy')  

    x_all = []
    for i in range(channel_1.shape[0]):
        combined_image = np.dstack((channel_1[i],channel_2[i],channel_3[i]))
        x_all.append(combined_image)
    x_all = np.asarray(x_all)

    fold ="../../KOI_Data/"
    df_sets = pd.read_csv(fold+"/koi_sets.csv") 
    mask_train = (df_sets["Set"] == "Train").values
    mask_test = (df_sets["Set"] == "Test").values

    df_meta = pd.read_csv(fold+"/koi_metadata.csv")
    df_meta_train = df_meta[mask_train]
    df_meta_test = df_meta[mask_test]
    y_train= (df_meta_train['NExScI Disposition'].values=="CONFIRMED")*1
    y_val = (df_meta_test['NExScI Disposition'].values=="CONFIRMED")*1

    unique_train, counts_train = np.unique(y_train, return_counts=True)
    unique_val, counts_val = np.unique(y_val, return_counts=True)

    x_train = x_all[mask_train]
    x_val = x_all[mask_test]
    
    return x_train, y_train, x_val, y_val

In [None]:
best_run, best_model = optim.minimize(model=create_model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=60,
                                          trials=Trials(),
                                     notebook_name='Hyperkeras_f1fxed')
print("Evalutation of best performing model:")
print(best_model.evaluate(x_val, y_val))
print("Best performing model chosen hyper-parameters:")
print(best_run)

>>> Imports:
#coding=utf-8

try:
    import numpy as np
except:
    pass

try:
    import keras
except:
    pass

try:
    from keras.layers import *
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.utils import np_utils
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    from keras import backend as K
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from sklearn.model_selection import KFold
except:
    pass

try:
    from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, f1_score, confusion_matrix, mean_absolute_error, mean_squared_error, median_absolute_error
except:
    pass

try:
    from sklearn.preprocessing import normalize
except:
    pass

try:
    impor




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "sequential_1"                               
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 256)       7168      
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 256)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 256)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 256)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 256)       590080    
_________________________________________________________________
activation_2 (Activation)    (None, 1

  'precision', 'predicted', average, warn_for)



In [None]:
best_run

In [None]:
best_model.summary()

In [None]:
x_train, y_train, x_val, y_val = data()

predichos=best_model.predict_classes(x_val)
calculate_metrics(y_val, predichos,'[VAL] Confusion Matrix 50 epochs CNN + reverse LC')