In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
sys.path.append('/content/drive/MyDrive/lit/lit_lit/')

In [3]:
import matplotlib.pyplot as plt
#from matplotlib.mlab import griddata
from scipy.interpolate import griddata

from mpl_toolkits.mplot3d import Axes3D
import tensorflow as tf
from neural_network import *
from ensembling_methods import *
# from figure_grid import *

# Use autograd to get easy manifold tangents; could also do this in Tensorflow but less conveniently.
import autograd.numpy as np
from autograd.scipy.special import expit
from autograd import elementwise_grad

In [4]:
from __future__ import print_function
import six
import time
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split

def l1_loss(x):
  return tf.reduce_sum(tf.abs(x))

def l2_loss(x):
  return tf.nn.l2_loss(x)

class cachedproperty(object):
  """Simplified https://github.com/pydanny/cached-property"""
  def __init__(self, function):
    self.__doc__ = getattr(function, '__doc__')
    self.function = function

  def __get__(self, instance, klass):
    if instance is None: return self
    value = instance.__dict__[self.function.__name__] = self.function(instance)
    return value

def isint(x):
  return isinstance(x, (int, np.int32, np.int64))

def onehot(Y, K=None):
  if K is None:
    K = np.unique(Y)
  elif isint(K):
    K = list(range(K))
  data = np.array([[y == k for k in K] for y in Y]).astype(int)
  return data

def minibatch_indexes(lenX, batch_size=256, num_epochs=50, **kw):
  n = int(np.ceil(lenX / batch_size))
  for epoch in range(num_epochs):
    for batch in range(n):
      i = epoch*n + batch
      
      yield i, epoch, slice((i%n)*batch_size, ((i%n)+1)*batch_size)

def train_feed(idx, models, **kw):
  """Convert a set of models, a set of indexes, and numpy arrays given by the
  keyword arguments to a set of feed dictionaries for each model."""
  feed = {}
  for m in models:
   
    feed[m.is_train] = True
    for dictionary in [kw, kw.get('feed_dict', {})]:
      for key, val in six.iteritems(dictionary):
        attr = getattr(m, key) if isinstance(key, str) and hasattr(m, key) else key
        
        if type(attr) == type(m.X):
          if len(attr.shape) >= 1:
            if attr.shape[0] is None:
              feed[attr] = val[idx]
  
  return feed

def train_batches(models, X, y, **kw):
  for i, epoch, idx in minibatch_indexes(len(X), **kw):
    yield i, epoch, train_feed(idx, models, X=X, y=y, **kw)

def reinitialize_variables(sess):
  """Construct a Tensorflow operation to initialize any variables in its graph
  which are not already initialized."""
  uninitialized_vars = []
  for var in tf.compat.v1.global_variables():
    try:
      sess.run(var)
    except tf.errors.FailedPreconditionError:
      uninitialized_vars.append(var)
  return tf.compat.v1.variables_initializer(uninitialized_vars)

def minimize(sess, loss_fn, batches, operations={}, learning_rate=0.001, print_every=None, var_list=None, **kw):
  """Minimize a loss function over the provided batches of data, possibly
  printing progress."""
  optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)
  train_op = optimizer.minimize(loss_fn, var_list=var_list)
  op_keys = sorted(list(operations.keys()))
  ops = [train_op] + [operations[k] for k in op_keys]
  t = time.time()
  sess.run(reinitialize_variables(sess))
  for i, epoch, batch in batches:

    results = sess.run(ops, feed_dict=batch)
    if print_every and i % print_every == 0:
      s = 'Batch {}, epoch {}, time {:.1f}s'.format(i, epoch, time.time() - t)
      for j,k in enumerate(op_keys, 1):
        s += ', {} {:.4f}'.format(k, results[j])
      print(s)

def tt_split(X, y, test_size=0.2):
  return train_test_split(X, y, test_size=test_size, stratify=y)

def elemwise_sq_cos_sim(v, w, eps=1e-8):
  assert(len(v.shape) == 2)
  assert(len(w.shape) == 2)
  num = np.sum(v*w, axis=1)**2
  den = np.sum(v*v, axis=1) * np.sum(w*w, axis=1)
  return num / (den + eps)

def yules_q_statistic(e1, e2, y_test):
  n = len(y_test)
  n00 = len(e1.intersection(e2))
  n01 = len(e1.difference(e2))
  n10 = len(e2.difference(e1))
  n11 = n - len(e1.union(e2))
  assert(n00+n01+n10+n11 == n)
  numer = n11*n00 - n01*n10
  denom = n11*n00 + n01*n10
  if numer == 0:
    return 0
  else:
    return numer / float(denom)

def disagreement_measure(e1, e2, y_test):
  n = len(y_test)
  n01 = len(e1.difference(e2))
  n10 = len(e2.difference(e1))
  return (n01 + n10) / n

def scoring_fun(y_pred, y_true):
  if len(y_true.shape) == 1:
    #assert(y_true.max() == 1) # binary
    if len(y_pred.shape) == 1:
      preds = y_pred
    else:
      preds = y_pred[:,1]
    
    return accuracy_fun(y_pred, y_true)
    #return roc_auc_score(y_true, preds)
  else:
    return accuracy_fun(y_pred, y_true)

def accuracy_fun(y_pred, y_true):
  if len(y_true.shape) == 1:
    #assert(y_true.max() == 1) # binary
    if len(y_pred.shape) == 1:
      preds = (y_pred > 0.5).astype(int)
    else:
      preds = np.argmax(y_pred, axis=1)
    return np.mean(y_true == preds)
  else:
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

def error_masks(y_pred, y_true):
  if len(y_true.shape) == 1:
    #assert(y_true.max() == 1) # binary
    if len(y_pred.shape) == 1:
      preds = (y_pred > 0.5).astype(int)
    else:
      preds = np.argmax(y_pred, axis=1)
    return (preds != y_true).astype(int)
  else:
    return (np.argmax(y_true, axis=1) != np.argmax(y_pred, axis=1)).astype(int)



## Initialize network

In [5]:
class Net(NeuralNetwork): 
  @property
  def x_shape(self): return [None, 24]
  @property
  def y_shape(self): return [None, 2]  

  def rebuild_model(self, X, **_):
    L0 = X
    L1 = tf.compat.v1.layers.dense(L0, 256, name=self.name+'/L1', activation=tf.nn.softplus)
    L2 = tf.compat.v1.layers.dense(L1, 256, name=self.name+'/L2', activation=tf.nn.softplus)
    L3 = tf.compat.v1.layers.dense(L2,  2, name=self.name+'/L3', activation=None)
    return [L1, L2, L3]


In [6]:
print(tf.__version__)

2.8.0


In [7]:
from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import numpy as np
import sklearn.metrics as metrics
from sklearn import metrics
from sklearn.metrics import fbeta_score, make_scorer
import sklearn
from numpy import mean

dict_results = {}
dict_sd_pc = {}
for files in glob('//content/drive/MyDrive/lit/lit_lit/reserve/processed_8/*'):
    print(files)
    pc = []
    sd_pc = []
    for items in glob("{}/*.*".format(files)):
        print(items)
        item_name = items.split(".")[0]
        print(item_name)
        ss = item_name.split("_")[4] + item_name.split("_")[5]
        print(ss)
        hh = item_name.split("_")[4]
        data = np.load(items, allow_pickle=True)
        input_list = data.tolist()  # How to get x_train
        X_train = input_list['FrameStack'][0]
        X_test = input_list['FrameStack'][1]
        y_train = input_list['FrameStack'][2]
        y_test = input_list['FrameStack'][3]
        X_val = input_list['FrameStack'][4]
        y_val = input_list['FrameStack'][5]

        y_shape = max(y_train)+1
        class Net(NeuralNetwork): 
          @property
          def x_shape(self): return [None, X_train.shape[1]]
          @property
          def y_shape(self): return [None, y_shape]
          def rebuild_model(self, X, **_):
            L0 = X
            L1 = tf.compat.v1.layers.dense(L0, 256, name=self.name+'/L1', activation=tf.nn.softplus)
            L2 = tf.compat.v1.layers.dense(L1, 256, name=self.name+'/L2', activation=tf.nn.softplus)
            L3 = tf.compat.v1.layers.dense(L2, y_shape , name=self.name+'/L3', activation=None)
            return [L1, L2, L3]



In [None]:
from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import numpy as np
import sklearn.metrics as metrics
from sklearn import metrics
from sklearn.metrics import fbeta_score, make_scorer
import sklearn
from numpy import mean

dict_results = {}
dict_sd_pc = {}
for files in glob('//content/drive/MyDrive/lit/lit_lit/reserve/processed_8/*'):
    print(files)
    pc = []
    sd_pc = []
    for items in glob("{}/*.*".format(files)):
        print(items)
        item_name = items.split(".")[0]
        print(item_name)
        ss = item_name.split("_")[4] + item_name.split("_")[5]
        print(ss)
        hh = item_name.split("_")[4]
        data = np.load(items, allow_pickle=True)
        input_list = data.tolist()  # How to get x_train
        X_train = input_list['FrameStack'][0]
        X_test = input_list['FrameStack'][1]
        y_train = input_list['FrameStack'][2]
        y_test = input_list['FrameStack'][3]
        X_val = input_list['FrameStack'][4]
        y_val = input_list['FrameStack'][5]

        y_shape = max(y_train)+1
        class Net(NeuralNetwork): 
          @property
          def x_shape(self): return [None, X_train.shape[1]]
          @property
          def y_shape(self): return [None, y_shape]
          def rebuild_model(self, X, **_):
            L0 = X
            L1 = tf.compat.v1.layers.dense(L0, 256, name=self.name+'/L1', activation=tf.nn.softplus)
            L2 = tf.compat.v1.layers.dense(L1, 256, name=self.name+'/L2', activation=tf.nn.softplus)
            L3 = tf.compat.v1.layers.dense(L2, y_shape , name=self.name+'/L3', activation=None)
            return [L1, L2, L3]

      
        tf.compat.v1.reset_default_graph()

        print("train restarts")
        random_restart_models = train_restart_models(Net, 100, X_train, y_train, num_epochs=100, print_every=50)

        print("train input-space LIT")
        input_space_models = train_diverse_models(Net, 100, X_train, y_train, num_epochs=100, print_every=50)
        for i, model in enumerate(input_space_models):
          print("dasff",type(model))
          model.save("/content/drive/MyDrive/lit/lit_lit/input_space_model"+str(i)+"_"+ss+".pkl")

          all_model_probs = np.array([ model.predict_proba(X_test)[:,1] for model in input_space_models ])
          average_probs = all_model_probs.mean(axis=0) # average predicted probability
          variance_probs = all_model_probs.var(axis=0) # variance = measure of uncertainty
          label_predictions = (average_probs >= 0.5).astype(int) # threshold average to get label prediction
          accuracy = np.mean(label_predictions == y_test)
          print(items)
          print(accuracy)
          pc.append(accuracy)
          dict_results["{}".format(files)] = '{:.4f}'.format(mean(pc))
          np.std(pc, dtype=np.float64)
          sd_pc.append(np.std(pc, dtype=np.float64))

    dict_sd_pc["{}".format(files)] = '{:.4f}'.format(mean(sd_pc))
print(dict_results)
print(dict_results.values())
print(dict_sd_pc.values())
# auc = sklearn.metrics.roc_auc_score(average_probs, y_test)




In [8]:
pip install deslib

Collecting deslib
  Downloading DESlib-0.3.5-py3-none-any.whl (158 kB)
[?25l[K     |██                              | 10 kB 17.0 MB/s eta 0:00:01[K     |████▏                           | 20 kB 22.9 MB/s eta 0:00:01[K     |██████▏                         | 30 kB 23.2 MB/s eta 0:00:01[K     |████████▎                       | 40 kB 19.0 MB/s eta 0:00:01[K     |██████████▎                     | 51 kB 9.2 MB/s eta 0:00:01[K     |████████████▍                   | 61 kB 10.7 MB/s eta 0:00:01[K     |██████████████▍                 | 71 kB 10.2 MB/s eta 0:00:01[K     |████████████████▌               | 81 kB 10.5 MB/s eta 0:00:01[K     |██████████████████▌             | 92 kB 11.6 MB/s eta 0:00:01[K     |████████████████████▋           | 102 kB 10.2 MB/s eta 0:00:01[K     |██████████████████████▊         | 112 kB 10.2 MB/s eta 0:00:01[K     |████████████████████████▊       | 122 kB 10.2 MB/s eta 0:00:01[K     |██████████████████████████▉     | 133 kB 10.2 MB/s eta 0:00

In [9]:
import deslib
from deslib.dcs import OLA, MLA
from deslib.des import METADES, KNORAU, DESMI, DESP
from deslib.des.knora_e import KNORAE
from deslib.static import Oracle
from sklearn.metrics import accuracy_score
import glob
import numpy as np
from numpy import mean
import pickle
from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import sklearn.metrics as metrics
from sklearn import metrics
from sklearn.metrics import fbeta_score, make_scorer
import sklearn
from numpy import mean

#dynamic_selection_methods
dict_final_results = {}
dict_standard_deviation = {}
for files in glob('/content/drive/MyDrive/lit/lit_lit/reserve/processed_8/*'):
    print(files)
    standard_deviation = []
    knorae_results = []
    accuracy = []
    pool = []
    list_cls = []
    for items in glob("{}/*.*".format(files)):
        print(items)
        data = np.load(items, allow_pickle=True)
        input_list = data.tolist()  # How to get x_train
        X_train = input_list['FrameStack'][0]
        X_test = input_list['FrameStack'][1]
        y_train = input_list['FrameStack'][2]
        y_test = input_list['FrameStack'][3]
        X_dsel = input_list['FrameStack'][4]
        y_dsel = input_list['FrameStack'][5]

        ###file_name = files.split("\\")[1]
        # print(file_name)
        ###item_name = items.split("\\")[2].split(".")[0].split("_")[2]
        # print(item_name)
        ###ss = file_name.split("_")[0]
        # print(ss)
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/random_forest.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("Models_FLT_f/{}/{}_{}.pkl".format(file_name,ss, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        input_space_models = [Net() for _ in range(2)]
        for i, model in enumerate(input_space_models):
          # print("a")
          # print(model)
          aaa = model.load("/content/drive/MyDrive/lit/lit_lit/input_space_model"+str(i)+"_"+ss+".pkl")   #input_space_model0.pkl
          
          print(model.load)
          print(type(model.load))
          models = list_cls.append(aaa)
          print(models)
          x_merge = np.append(X_train, X_dsel, axis=0)
          y_merge = np.append(y_train, y_dsel, axis=0)

          # pool_classifiers = model.load
          pool_classifiers = models
          # method = KNORAE
          # method = MLA
          # method = OLA
          # method = METADES
          method = KNORAU
          # method = DESMI
          # method = DESP

          ensemble = method(pool_classifiers)
          # ensemble.fit(X_train, y_train)
          ensemble.fit(x_merge, y_merge)
          # Predict new examples:
          yh = ensemble.predict(X_test)
          acc = accuracy_score(y_test, yh)
          print(acc)
          knorae_results.append(acc)

    dict_final_results["{}".format(files)] = '{:.4f}'.format(mean(knorae_results))
    print(dict_final_results)
    np.std(knorae_results, dtype=np.float64)
    standard_deviation.append(np.std(knorae_results, dtype=np.float64))
    dict_standard_deviation["{}".format(files)] = '{:.4f}'.format(mean(standard_deviation))

print(dict_final_results.values())
print(dict_standard_deviation.values())




In [395]:
#single best
import deslib
from deslib.dcs import OLA, MLA
from deslib.des import METADES, KNORAU, DESMI, DESP
from deslib.des.knora_e import KNORAE
from deslib.static import Oracle
from sklearn.metrics import accuracy_score
import glob
import numpy as np
from numpy import mean
import pickle
from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import sklearn.metrics as metrics
from sklearn import metrics
from sklearn.metrics import fbeta_score, make_scorer
import sklearn
from numpy import mean
from deslib.static.single_best import SingleBest

#dynamic_selection_methods
dict_final_results = {}
dict_standard_deviation = {}
dict_sb_results = {}
dict_standard_deviation = {}

for files in glob('/content/drive/MyDrive/lit/lit_lit/reserve/processed_8/*'):
    print(files)
    standard_deviation = []
    knorae_results = []
    accuracy = []
    pool = []
    list_cls = []
    single_best_results = []
    sd_single_best = []
    for items in glob("{}/*.*".format(files)):
        print(items)
        data = np.load(items, allow_pickle=True)
        input_list = data.tolist()  # How to get x_train
        X_train = input_list['FrameStack'][0]
        X_test = input_list['FrameStack'][1]
        y_train = input_list['FrameStack'][2]
        y_test = input_list['FrameStack'][3]
        X_dsel = input_list['FrameStack'][4]
        y_dsel = input_list['FrameStack'][5]

        ###file_name = files.split("\\")[1]
        # print(file_name)
        ###item_name = items.split("\\")[2].split(".")[0].split("_")[2]
        # print(item_name)
        ###ss = file_name.split("_")[0]
        # print(ss)
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/random_forest.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("Models_FLT_f/{}/{}_{}.pkl".format(file_name,ss, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        input_space_models = [Net() for _ in range(2)]
        for i, model in enumerate(input_space_models):
          # print("a")
          # print(model)
          aaa = model.load("/content/drive/MyDrive/lit/lit_lit/input_space_model"+str(i)+"_"+ss+".pkl")   #input_space_model0.pkl
          
          print(model.load)
          print(type(model.load))
          models = list_cls.append(aaa)
          print(models)
          x_merge = np.append(X_train, X_dsel, axis=0)
          y_merge = np.append(y_train, y_dsel, axis=0)
          pool_classifiers = models
          sb_bdt = SingleBest(pool_classifiers = pool_classifiers, scoring = None, random_state = None, n_jobs = -1)
          sb_bdt.fit(X_dsel, y_dsel)
          sb_bdt.predict(X_dsel)
          acc = sb_bdt.score(X_test, y_test, sample_weight=None)
          single_best_results.append(acc)

    dict_sb_results["{}".format(files)] = '{:.4f}'.format(mean(single_best_results))
    print(dict_sb_results)
    np.std(single_best_results, dtype=np.float64)
    sd_single_best.append(np.std(single_best_results, dtype=np.float64))
    dict_standard_deviation["{}".format(files)] = '{:.4f}'.format(mean(sd_single_best))
    print(dict_standard_deviation)

print(dict_sb_results.values())
print(dict_standard_deviation.values())




In [396]:
#oracle
import deslib
from deslib.dcs import OLA, MLA
from deslib.des import METADES, KNORAU, DESMI, DESP
from deslib.des.knora_e import KNORAE
from deslib.static import Oracle
from sklearn.metrics import accuracy_score
import glob
import numpy as np
from numpy import mean
import pickle
from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import sklearn.metrics as metrics
from sklearn import metrics
from sklearn.metrics import fbeta_score, make_scorer
import sklearn
from numpy import mean
from deslib.static import Oracle

dict_final_results = {}
dict_standard_deviation = {}
dict_sb_results = {}
dict_standard_deviation = {}
dict_final_results_oracle = {}
dict_standard_deviation = {}

for files in glob('/content/drive/MyDrive/lit/lit_lit/reserve/processed_8/*'):
    print(files)
    standard_deviation = []
    knorae_results = []
    accuracy = []
    pool = []
    list_cls = []
    single_best_results = []
    sd_single_best = []
    standard_deviation = []
    oracle_results = []
    sd_oracle = []

    for items in glob("{}/*.*".format(files)):
        print(items)
        data = np.load(items, allow_pickle=True)
        input_list = data.tolist()  # How to get x_train
        X_train = input_list['FrameStack'][0]
        X_test = input_list['FrameStack'][1]
        y_train = input_list['FrameStack'][2]
        y_test = input_list['FrameStack'][3]
        X_dsel = input_list['FrameStack'][4]
        y_dsel = input_list['FrameStack'][5]

        ###file_name = files.split("\\")[1]
        # print(file_name)
        ###item_name = items.split("\\")[2].split(".")[0].split("_")[2]
        # print(item_name)
        ###ss = file_name.split("_")[0]
        # print(ss)
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_decision_tree.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/boosting_with_perceptron.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/random_forest.pkl".format(file_name, item_name))[0]
        # model_load = glob.glob("Models_FLT_f/{}/{}_{}.pkl".format(file_name,ss, item_name))[0]
        # model_load = glob.glob("models_f/{}/split_{}/bagging_with_decision_tree.pkl".format(file_name, item_name))[0]
        input_space_models = [Net() for _ in range(2)]
        for i, model in enumerate(input_space_models):
          # print("a")
          # print(model)
          aaa = model.load("/content/drive/MyDrive/lit/lit_lit/input_space_model"+str(i)+"_"+ss+".pkl")   #input_space_model0.pkl
          
          print(model.load)
          print(type(model.load))
          models = list_cls.append(aaa)
          # print(models)
          x_merge = np.append(X_train, X_dsel, axis=0)
          y_merge = np.append(y_train, y_dsel, axis=0)
          pool_classifiers = models
          oracleeeee = deslib.static.oracle.Oracle(pool_classifiers=models, random_state=None, n_jobs=-1)

          oracleeeee.fit(X_dsel, y_dsel)
          y_pred = oracleeeee.predict(X_test, y_test)
          accuracyyy = accuracy_score(y_test, y_pred)
          print(accuracyyy)
          oracle_results.append(accuracyyy)


    dict_sb_results["{}".format(files)] = '{:.4f}'.format(mean(oracle_results))
    print(dict_sb_results)
    np.std(oracle_results, dtype=np.float64)
    sd_single_best.append(np.std(oracle_results, dtype=np.float64))
    dict_standard_deviation["{}".format(files)] = '{:.4f}'.format(mean(sd_single_best))
    print(dict_standard_deviation)

print(dict_sb_results.values())
print(dict_standard_deviation.values())


