In [None]:
import unc_classification_tab_data.models.bootstrapped_classifier as bc
import unc_classification_tab_data.models.nn_ensemble as nn
import unc_classification_tab_data.models.vae as vae

import numpy as np
import pandas as pd
import sklearn.utils
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import matplotlib.colors
import matplotlib.pyplot as plt

def generate_cluster(mean, diag_cov, size, frac_positive):
    np.random.seed(42)
    c = np.random.multivariate_normal(mean=mean, cov=[[diag_cov, 0],[0,diag_cov]], size=size)
    c = np.expand_dims(c, 2)
    y = np.random.choice([0,1], size=(c.shape[0], 1,1), p=[1-frac_positive, frac_positive])
    c = np.concatenate([c, y], axis=1)
    return c

def plot_single_grid(X, y, xx, yy, grid, alpha=0.1, s=50, title='', uncertainty=True, 
                     colorbar=True, vmin=None, vmax=None):
    if colorbar:
        plt.figure(figsize=(6,5))
    else:
        plt.figure(figsize=(5,5))
    if uncertainty:
        cmap = plt.cm.Purples
    else: 
        cmap = plt.cm.RdBu
    c = plt.contourf(xx, yy, grid.reshape(xx.shape), cmap=cmap,
                    levels=20, vmin=vmin, vmax=vmax)
    if colorbar:
        plt.colorbar(c)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cm_bright,
                edgecolors='k', alpha=alpha, s=s)
    plt.axis('off')
    plt.title(title)
    
def entropy(pk, axis):
    return -np.sum(pk * np.log2(pk), axis=axis)

## Data generation

In [None]:
positive_size, negative_size = 100, 100

pos_mean = np.array([ 2, 2])
neg_mean = np.array([-1,-1])

pos_cluster = generate_cluster(pos_mean, 4, positive_size, frac_positive=1.0)
neg_cluster = generate_cluster(neg_mean, 4, negative_size, frac_positive=0.0)
concatenated = np.concatenate([pos_cluster, neg_cluster], axis=0)
columns = ['f1', 'f2']
label = 'label'
df_model = pd.DataFrame(concatenated[:,:,0], columns=columns+[label])
df_model = sklearn.utils.shuffle(df_model, random_state=1)

X_train = df_model[columns].values
y_train = df_model[label].values

step_size = .1 
x_min, x_max = -12,12
y_min, y_max = -12,12

xx, yy = np.meshgrid(np.arange(x_min, x_max, step_size),
                     np.arange(y_min, y_max, step_size))

X_grid = np.c_[xx.ravel(), yy.ravel()]


In [None]:
np.random.seed(42)
lr_model = bc.BootstrappedClassifier(LogisticRegression(solver='lbfgs'),
                                     n_bootstraps=5, bootstrap_size=100)

lr_model.fit(X_train ,y_train)

grid_probas = lr_model.predict_proba(X_grid)
grid_uncertainties = entropy(grid_probas, axis=1)
plot_single_grid(X_train, y_train, xx, yy, grid_probas[:,1], uncertainty=False, alpha=0.5, colorbar=True)
plot_single_grid(X_train, y_train, xx, yy, grid_uncertainties, uncertainty=True, alpha=0.5, colorbar=True)

In [None]:
model_params = {'hidden_sizes': [5,5],
                'dropout_rate': 0.0,
                'input_size': 2,
                'batch_norm': False}


training_params = {'batch_size':8,
                   'n_epochs':20,
                   'early_stopping':False}

nn_model = nn.NNEnsemble(n_models=5, model_params = model_params)
nn_model.train(X_train, y_train, X_train, y_train, training_params)


In [None]:
grid_probas = nn_model.predict_proba(X_grid)
grid_uncertainties = entropy(grid_probas, axis=1)
plot_single_grid(X_train, y_train, xx, yy, grid_probas[:,1], uncertainty=False, alpha=0.5, colorbar=True)
plot_single_grid(X_train, y_train, xx, yy, grid_uncertainties, uncertainty=True, alpha=0.5, colorbar=True)