In [7]:
import pwd
from IPython import get_ipython

get_ipython().magic('load_ext autoreload')
get_ipython().magic('autoreload 2')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  get_ipython().magic('load_ext autoreload')
  get_ipython().magic('autoreload 2')


In [8]:
from jax import random
import jax.numpy as jnp
from scipy.io import arff

from src.dbopt.FCNN import FCNN
from src.dbopt.DB_sampler import DecisionBoundarySampler

In [9]:
seed = 24
key = random.PRNGKey(seed)

from jax.lib import xla_bridge
print(xla_bridge.get_backend().platform)

gpu


### Importing the data

In [10]:
data = arff.loadarff('data/column_2C_weka.arff')[0]

def row_void_to_array(entry):
    return jnp.array([entry[i] for i in range(6)])

key, ds_key = random.split(key)

x = jnp.array(list(map(row_void_to_array, data)))
y = jnp.array(list(map(lambda row : 0 if row[6]==b'Normal' else 1, data)))
dataset = random.permutation(ds_key, jnp.concatenate((jnp.expand_dims(y, axis=1), x), axis=1), axis=0)

train_fraction = 8/10
num_training_examples = int(jnp.ceil(dataset.shape[0]*train_fraction))
train_dataset = dataset[:num_training_examples, :]
test_dataset = dataset[num_training_examples:, :]

print("dataset shape : ", dataset.shape)
print("proportion of positives in the dataset : ", jnp.sum(dataset[:, 0])/dataset.shape[0])
print("training set shape : ", train_dataset.shape)
print("test dataset shape : ", test_dataset.shape)

dataset shape :  (310, 7)
proportion of positives in the dataset :  0.67741936
training set shape :  (248, 7)
test dataset shape :  (62, 7)


### Fitting a the network

In [11]:
model = FCNN(num_neurons_per_layer=[100, 100, 100, 2])
key, init_x_key = random.split(key)
x_init = random.uniform(init_x_key, (6,))
key, init_key = random.split(key)
params = model.init(init_key, x_init)

key, train_key = random.split(key)
params = model.train(train_key, params, train_dataset, 300, lr=0.0001, logs_frequency=10, test_set=test_dataset)

epoch 0, loss = 7.2193379402160645, training accuracy = [0.43548387], test accuracy = [0.37096775]
epoch 10, loss = 1.2778167724609375, training accuracy = [0.7096774], test accuracy = [0.6935484]
epoch 20, loss = 0.47287869453430176, training accuracy = [0.8104839], test accuracy = [0.7419355]
epoch 30, loss = 0.3217698633670807, training accuracy = [0.85483867], test accuracy = [0.80645156]
epoch 40, loss = 0.25981631875038147, training accuracy = [0.875], test accuracy = [0.82258064]
epoch 50, loss = 0.3233289122581482, training accuracy = [0.87096775], test accuracy = [0.79032254]
epoch 60, loss = 0.1480976790189743, training accuracy = [0.84677416], test accuracy = [0.7741935]
epoch 70, loss = 0.23900464177131653, training accuracy = [0.86693543], test accuracy = [0.7741935]
epoch 80, loss = 0.20400534570217133, training accuracy = [0.87903225], test accuracy = [0.82258064]
epoch 90, loss = 0.20499001443386078, training accuracy = [0.8830645], test accuracy = [0.79032254]
epoch 10

### Try the sampler

In [12]:
input_dim = dataset[:, 1:].shape[1]
min = jnp.min(dataset[:, 1:])
max = jnp.max(dataset[:, 1:])
sampler = DecisionBoundarySampler(n_points=1000,
                                  input_dim=input_dim, min=min, max=max)

print(jnp.mean(sampler._loss(sampler.get_points(), params, model)))
sampling = sampler.sample(params, model, threshold=0.01)
print(jnp.mean(sampler._loss(sampler.get_points(), params, model)))

0.92074597
2.2708673e-05
