In [7]:
import numpy as np
import cmdstanpy as stan
import os

from gproc.generative import sample_at_x
from gproc.plotting import contour_2d

## Bayesian Logistic Regression

In [8]:
N = 2500 # Data size
D = 2 # Data dimension

x = np.random.uniform(-1, 1, N * D).reshape(-1, D) # Reshape to N x 2 matrix

y, prob_y, f = sample_at_x(x, kernel_params = {'variance': 3.})

In [9]:
# Write a function to organise the data into stan friendly format

def stanvert(X, Y):
    """Converts coreset data to stan friendly format
    
    Keyword arguments:
    X - matrix of feature
    Y - vector of responses
    """
    
    stanY = np.zeros(Y.shape[0])
    stanY[:] = Y
    
    # Set the -1's to 0's for stan
    stanY[stanY == -1] = 0

    # Prepare the data for stan; x = covariates, y = response, d = dimensions, n = sample size
    sampler_data = {'x': X, 'y': stanY.astype(int), 'd': X.shape[1], 'n': X.shape[0]}
    stanY = np.zeros(Y.shape[0])
    stanY[:] = Y
    
    return sampler_data

stanvert(x, y)

{'x': array([[ 0.17060021, -0.55571104],
        [-0.23435436, -0.71816728],
        [ 0.37393786, -0.58722191],
        ...,
        [-0.61153513, -0.52415122],
        [-0.39148422, -0.55518158],
        [ 0.87708986, -0.44331879]]),
 'y': array([0, 1, 0, ..., 1, 1, 0]),
 'd': 2,
 'n': 2500}

In [13]:
# Grab the Bayesian Logisitc Regression stan model from BLR.stan
stan_file = os.path.join(stan.cmdstan_path(), 'BLR')
