In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/aml4

In [None]:
# %rm -rf logs
# %mkdir logs

In [None]:
import sys
import glob
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import KFold
from keras.utils import np_utils
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import fbeta_score, make_scorer

# scoring
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import auc, plot_precision_recall_curve
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,roc_auc_score,auc, classification_report
import matplotlib.pyplot as plt
from matplotlib import style
from collections import defaultdict
# configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
# %matplotlib inline
style.use('fivethirtyeight')

# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
from snapshots.snapshot_ens import *
from baseline.baseline import Baseline
from improve_snapshot.elrs import EarlyLRFindStopping

In [None]:
column_names = ["Dataset Name", "Algorithm Name", "Cross Validation","Hyper-Parameters Values", "Accuracy", "TPR", "FPR", "Precision", "AUC", "AUC-PR", "Train Time", "Inference Time"]
EVAL_DF = pd.DataFrame(columns = column_names)

In [None]:
def model_fit_with_grid_search_nested_cv(model_name, parameters,fit_params, X, y, df_name, folds = 10, score = 'accuracy', verbose = 0):
    cv_outer = KFold(n_splits=10, shuffle=True, random_state=1)
    x_inout = (X.shape[1], len(np.unique(y))-1) 
    bl = Baseline(*x_inout)
    outer_results = list()
    for i, (train_ix, test_ix) in enumerate(cv_outer.split(X)):
        start = time.time()
        X_train, X_test = X.iloc[train_ix], X.iloc[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        cv_inner = KFold(n_splits=3, shuffle=True, random_state=1)
        model = KerasClassifier(build_fn=bl.get_model)
        search = GridSearchCV(model, parameters, scoring=score, cv=cv_inner, refit=True) 
        result = search.fit(X_train, y_train, callbacks=fit_params)
        train_time = time.time() - start
        best_model = result.best_estimator_
        evalute_model_performance(best_model, result.best_params_, model_name, X_test, y_test, df_name, i, train_time)
        
        print("Best: %f using %s" % (result.best_score_, result.best_params_))
        means = result.cv_results_['mean_test_score']
        stds = result.cv_results_['std_test_score']
        params = result.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
def load_ensemble(folder, x_in, x_out, keep_last=2):
    print('Loading ensemble...')
    paths = glob.glob(os.path.join(folder, 'weights_cycle_*.h5'))
    print('Found:', ', '.join(paths))
    if keep_last is not None:
        paths = sorted(paths)[-keep_last:]
    print('Loading:', ', '.join(paths))
    outputs = []

    for i, path in enumerate(paths):
        bl = Baseline(x_in, x_out)
        m = bl.get_model()
        m.load_weights(path)
        outputs.append(m.output)

    shape = outputs[0].get_shape().as_list()
    x = Lambda(lambda x: K.mean(K.stack(x, axis=0), axis=0),
               output_shape=lambda _: shape)(outputs)
    x_in = Input(shape=(x_in,))
    model = tf.keras.Model(inputs=x_in, outputs=x)
    return model

In [None]:
def predict(model,X,y):
    df_result = pd.DataFrame(columns = ['TrueClass','Predicted'])
    df_result.TrueClass = y.values.ravel()
    yhat= model.predict(X)
    df_result.Predicted = (model.predict(X) > 0.5).astype("int32")
    return df_result

In [None]:
def plot_ROC(model,model_name,df_name, model_fpr, model_tpr, naive_fpr, naive_tpr):
    plt.plot(naive_fpr, naive_tpr, linestyle='--', label='Naive')
    plt.plot(model_fpr, model_tpr, marker='.', label=model_name)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    # plt.savefig(f'logs/{df_name.lower()}_{model_name.lower()}.png')
    plt.show()

In [None]:
def evalute_model_performance(best_model, parameters, model_name, X_test, y_test, df_name, i, train_time):
  global EVAL_DF
  start = time.time()
  df_result = predict(best_model, X_test, y_test)
  inf_time = time.time() - start
  class_report = classification_report(df_result.TrueClass, df_result.Predicted, output_dict= True)
  accuracy = class_report['accuracy']
  # Data to plot precision - recall curve
  precision, recall, thresholds = precision_recall_curve(df_result.TrueClass, df_result.Predicted)
  # Use AUC function to calculate the area under the curve of precision recall curve
  auc_pr = auc(recall, precision)
  recall = class_report['macro avg']['recall']
  precision = class_report['macro avg']['precision']
  f1 = class_report['macro avg']['f1-score']
  f2 = fbeta_score(df_result.TrueClass, df_result.Predicted,beta = 2, average='macro')

  naive_probs = [0 for _ in range(len(y_test))]

  probs = best_model.predict_proba(X_test)
  probs = probs[:, 1]

  naive_auc = roc_auc_score(y_test, naive_probs)
  model_auc = roc_auc_score(y_test, probs)

  print('No Skill: ROC AUC=%.3f' % (naive_auc))
  print(model_name,': ROC AUC=%.3f' % (model_auc))

  naive_fpr, naive_tpr, _ = roc_curve(y_test, naive_probs)
  model_fpr, model_tpr, _ = roc_curve(y_test, probs)

  print('')
  print('Performance Report: ')

  print('Accuracy: %1.3f' % accuracy) #a
  print('TPR: %1.3f' % np.mean(model_tpr)) #b
  print('FPR: %1.3f' % np.mean(model_fpr)) #c
  print('Precision: %1.3f' % precision) #d
  print('AUC: %1.3f' % model_auc) #e
  print('AUC-PR: %1.3f' % auc_pr) #f
  print('Recall: %1.3f' % recall)
  print('F1: %1.3f' % f1)
  print('F2: %1.3f' % f2)

  # plot_confusion_matrix(df_result, model_name)

  try:
    plot_ROC(best_model, model_name,df_name, model_fpr, model_tpr, naive_fpr, naive_tpr)
  except:
    print('Could not print ROC AUC curve.')

  new_row = {"Dataset Name":df_name, "Algorithm Name":model_name, "Cross Validation":i+1, "Hyper-Parameters Values":parameters, "Accuracy":accuracy, "TPR":np.mean(model_tpr), "FPR":np.mean(model_fpr), "Precision":precision, "AUC":model_auc, "AUC-PR":auc_pr, "Train Time":train_time, "Inference Time":inf_time}

  EVAL_DF = EVAL_DF.append(new_row, ignore_index=True)

In [None]:
log_dir = "logs/fit/" + pd.datetime.now().strftime("%H%M%S")

tensor_board = TensorBoard(
    log_dir=log_dir, histogram_freq=0, write_graph=True,
    write_images=True, update_freq='epoch', profile_batch=2,
    embeddings_freq=0, embeddings_metadata=None
)

# define snapshot callback
snp = Snapshot('snapshots', nb_epochs=6, verbose=1, nb_cycles=2)

LR_SCHEDULE = [
    # (epoch to start, learning rate) tuples
    (3, 0.05),
    (6, 0.01),
    (9, 0.005),
    (12, 0.001),
]

def lr_schedule(epoch, lr):
    """Helper function to retrieve the scheduled learning rate based on epoch."""
    if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]:
        return lr
    for i in range(len(LR_SCHEDULE)):
        if epoch == LR_SCHEDULE[i][0]:
            return LR_SCHEDULE[i][1]
    return lr

elrs = EarlyLRFindStopping(patience=5, schedule=lr_schedule)

callbacks = [
             tensor_board,
             snp,
             elrs
             ]

=====================================

In [None]:
datasets_dict = defaultdict(list)
ds = os.listdir('datasets')[:] 
for df_name in ds:
    # make dataset
    df = pd.read_csv('datasets/%s' % df_name)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    datasets_dict[df_name[:-4]] = (X, y)

In [None]:
models_param_grid = dict()
fit_params = {}
best_models_dict = defaultdict(list)

In [None]:
def init_model_params(): 
        global models_param_grid, fit_params   
        # we choose the initializers that came at the top in our previous cross-validation!!
        init_mode = ['glorot_uniform', 'uniform', 'lecun_uniform'] 
        batches = [128, 256]
        epochs = [5, 10]

        models_param_grid = dict(epochs=epochs, batch_size=batches)

        fit_params = [
              callbacks[:-2], #baseline
              callbacks[:-1], #snapshots-ensemble
              callbacks[:] #stochastic-weight-averaging
        ]

In [None]:
def run_training_main_loop(X, y, df_name):
    teminal_stdout = sys.stdout

    global models_param_grid, fit_params, outer_scores, nested_scores
    init_model_params()
    models_to_run = ["BASELINE", "SNAPSHOTS", "ELRS"]
    print('='*20,df_name,18*'=','Shapes:', X.shape[1], len(np.unique(y)))
    for i,model_name in enumerate(models_to_run):
        # sys.stdout = open(f'logs/{df_name.lower()}_{model_name.lower()}_stdout.txt', 'w')
        print('='*20,df_name,18*'=','Shapes:',X.shape[1], len(np.unique(y)))
        print('-'*10,model_name,10*'-')

        model_fit_with_grid_search_nested_cv(  model_name,
                                              models_param_grid,
                                              fit_params[i],
                                              X,
                                              y,
                                              df_name,
                                              folds = 10,
                                              score = 'accuracy',
                                              verbose = 0
                                              )
            # inner loop
        # plot_nested_cv(model_name, nested_scores, outer_scores)
        print('-'*30)
    print(80*'=')
    print(3*'\n') 
# runs main loop
[run_training_main_loop(X, y, df_name) for df_name, (X, y) in datasets_dict.items()]

In [None]:
EVAL_DF.to_csv("EVAL_"+ds[0])
EVAL_DF.head(100)

# Stat

In [None]:
def get_best_AUC(algo_name):
  idx = EVAL_DF.groupby(['Algorithm Name'])['AUC'].transform(max) == EVAL_DF['AUC']
  temp = EVAL_DF[EVAL_DF['Algorithm Name'] == algo_name][idx]
  return temp[idx].drop_duplicates(subset=['Dataset Name','Algorithm Name','AUC'], keep="last")

In [None]:
from scipy import stats

res = []
# create 3 lists, for each algorithm: auc for each dataset
for algo_name in ['BASELINE', 'SNAPSHOTS', 'ELRS']:
  res.append(get_best_AUC(algo_name)['AUC'])

stats.friedmanchisquare(res[0], res[1], res[2])

In [None]:
# assign data of lists.  
temp = EVAL_DF.groupby(['Dataset Name', 'Algorithm Name'])['AUC'].mean().reset_index()
temp = temp['AUC'][temp['Algorithm Name'] == 'BASELINE'].tolist() + temp['AUC'][temp['Algorithm Name'] == 'SNAPSHOTS'].tolist() + temp['AUC'][temp['Algorithm Name'] == 'ELRS'].tolist()
temp

# Create DataFrame  
df = pd.DataFrame(temp, columns=['AUC'])  
df

In [None]:
BASELINE = ['BASELINE' for i in range(0,20)]
SNAPSHOTS = ['SNAPSHOTS' for i in range(0,20)]
ELRS = ['ELRS' for i in range(0,20)]
Algos = BASELINE + SNAPSHOTS + ELRS
df['Algorithm'] = Algos

In [None]:
!pip install scikit_posthocs
import scikit_posthocs

In [None]:
scikit_posthocs.posthoc_ttest(df, val_col='AUC', group_col='Algorithm', p_adjust='holm')