In [1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2
from dgp_aepmcm.gp_network import DGPNetwork
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score
from sklearn.metrics import roc_auc_score
np.random.seed(5)



In [2]:
data = load_breast_cancer()
x = data['data']
y = data['target'][:, None].copy()
y[y < 0.5] = -1 # In case labels are 0 or 1 instead of -1 and 1
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=5)



In [3]:
# DGP model variables
# Number of inducing points
M = 50
# Dimension of the problem
D = x_train.shape[-1]
# Maximun of epochs for training
max_epochs = 1000
learning_rate = 0.01
minibatch_size = 100
n_samples_each_point = 10
n_samples = 20
# Inducing points locations
Z = kmeans2(x_train, M, minit='points')[0]



In [6]:
# Instantiate and train DGP-AEPMCM. with L=3 and 3 GP nodes for the first two layers
model_aepmcm = DGPNetwork(
    x_train,
    y_train,
    inducing_points=Z,
    show_debug_info=False)

model_aepmcm.add_input_layer()
# This method always assume a mean function for the prior p(u) = N(u| m(x), Kzz)
# with m(x) = X W
# For this example we disable the mean function for the prior so we set W to 0.
model_aepmcm.add_gp_layer(M, 3, W=np.zeros((D, 3)))
model_aepmcm.add_noise_layer()
model_aepmcm.add_gp_layer(M, 3, W=np.zeros((3, 3)))
model_aepmcm.add_noise_layer()
model_aepmcm.add_gp_layer(M, 1, W=np.zeros((3, 1)))
model_aepmcm.add_noise_layer()
model_aepmcm.add_output_layer_binary_classification(use_norm_cdf=True)

model_aepmcm.train_via_adam(
    max_epochs=max_epochs,    
    learning_rate=learning_rate)



Instructions for updating:
Use tf.cast instead.


In [7]:
labels, probs = model_aepmcm.predict(x_test)


In [10]:
# Print some metrics
acc_dgp = accuracy_score(y_test, labels)
recall_dgp = recall_score(y_test, labels)
auc_dgp = roc_auc_score(y_test, probs)
ll = model_aepmcm.calculate_log_likelihood(x_test, y_test)

print(f"Accuracy: {acc_dgp}")
print(f"Recall: {recall_dgp}")
print(f"Auc: {auc_dgp}")
print(f"Test log-likelihood: {ll}")

Accuracy: 0.973404255319149
Recall: 1.0
Auc: 0.9915548931942374
Test log-likelihood: -0.09835905581712723
