In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import linear_model
import torch
from scipy.special import softmax 
from sklearn.preprocessing import StandardScaler

import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO
from pyro.optim import Adam, ClippedAdam
from pyro.infer import Predictive
from sklearn.model_selection import train_test_split

from models import model__prior_mu_studentT as model 
from collections import Counter


### Data Processing

In [2]:
scaler = StandardScaler()
df = pd.read_pickle('../pickle/df.pkl')

In [3]:
df_sampled = df.sample(1400,random_state=47)

In [4]:
df_features = df_sampled.iloc[:,:-1].copy()
df_target = df_sampled.iloc[:,-1].copy()

In [5]:
df_features = scaler.fit_transform(df_features)

In [6]:
df_train_features, df_test_features,df_train_target, df_test_target =  train_test_split(df_features,df_target,stratify=df_target,random_state=47, test_size=1/7)

df_train_features, df_val_features,df_train_target, df_val_target =  train_test_split(df_train_features,df_train_target,stratify=df_train_target,random_state=47, test_size=1/6)

In [7]:
df_train_target = df_train_target.to_numpy()
df_test_target = df_test_target.to_numpy()
df_val_target = df_val_target.to_numpy()

In [8]:
D = df_train_features.shape[1]
N_train = df_train_features.shape[0]
N_test = df_test_features.shape[0]
N_val = df_val_features.shape[0]
n_cat = 11 
degF=4
tau=1

### Ancestral Sampling 

In [9]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [10]:
# sample coefficients (beta)
beta = np.random.normal(0,1,size=D)
print("beta:", beta)

beta_array = np.zeros((n_cat,D))

for i in range(n_cat):
    
    beta_array[i,:] = np.random.normal(0,1,size=D)
    
# sample observations (y's)
y = np.zeros((N_train,n_cat))
for n in range(N_train):
    
    probs = np.zeros(n_cat)
    for i in range(n_cat):
        probs[i] = np.array([(np.dot(beta_array[i,:], df_train_features[n,:]))])
        
    p =  softmax(probs)
    y[n,:] = np.random.multinomial(1, p)  #binomial with one trial is equivalent to bernoulli
    #y[n] = int(p > 0.5) # alternative version without observation noise
    #print(" p=, y[n]="  (n, p, y[n,:]))
    #print('n, p and y ', n, p, y[n,:])

beta: [ 0.40304529  0.59561818 -0.23585745 -0.24577339  0.91754147 -0.04280335
 -0.22780267 -0.52247453  0.71254979 -1.00743579 -0.12312639 -0.28973902
  0.02018443 -0.93218132]


In [11]:
label_array = np.array([]) 
for i in y:
    label_array = np.append(label_array,np.argmax(i))

In [12]:
counts = Counter(label_array)

In [13]:
print("Accuracy:", 1.0*np.sum(label_array == df_train_target.flatten()) / len(df_train_target))

Accuracy: 0.088


In [14]:
X_train = torch.tensor(df_train_features).float()
y_train = torch.tensor(df_train_target.flatten()).float()

#### Final Model training 

In [15]:
degF=4
tau=1

In [16]:
# Define guide function
guide = AutoMultivariateNormal(model)

# Reset parameter values
pyro.clear_param_store()

# Define the number of optimization steps
n_steps = 15000

learning_rates = [0.0001,0.001,0.01]
# Setup the optimizer
acc_val_lr = []
for lr in learning_rates:

    adam_params = {"lr": lr}
    optimizer = ClippedAdam(adam_params)

    # Setup the inference algorithm
    elbo = Trace_ELBO(num_particles=1)
    svi = SVI(model, guide, optimizer, loss=elbo)

    # Do gradient steps
    for step in range(n_steps):
        elbo = svi.step(X_train, n_cat, degF, tau, y_train)
        if step % 1000 == 0:
            print("[%d] ELBO: %.1f" % (step, elbo))

            #ef model(X, n_cat, degF, tau, obs=None):
    predictive = Predictive(model, guide=guide, num_samples=2000,
                    return_sites=("alpha", "beta"))
    samples = predictive(X_train, n_cat, degF, tau, y_train)
    
    
    samples_alpha = samples["alpha"].detach().squeeze()
    samples_beta = samples["beta"].detach().squeeze()
    
    
    mean_betas = samples_beta.mean(axis=0)
    mean_betas = mean_betas.T
    mean_alpha = samples_alpha.mean(axis=0)
    mean_alpha = mean_alpha.T
    
    
    y_val_pred = np.zeros((N_val,n_cat))
    
    for n in range(N_val):

        probs = np.zeros(n_cat)
        for i in range(n_cat):
            probs[i] = np.array([mean_alpha[i]+(np.dot(mean_betas[i,:], df_val_features[n,:]))])

        p =  softmax(probs)
        y_val_pred[n,:] = np.argmax(p)  #binomial with one trial is equivalent to bernoulli
        
    y_val_pred = y_val_pred[:,0]
            
    acc = 1.0*np.sum(y_val_pred == df_val_target.flatten()) / len(df_val_target)
    print({lr:acc})
    acc_val_lr.append({lr:acc})

[0] ELBO: 4055.2
[1000] ELBO: 3613.6
[2000] ELBO: 3057.9
[3000] ELBO: 2740.5
[4000] ELBO: 2516.3
[5000] ELBO: 2354.8
[6000] ELBO: 2205.5
[7000] ELBO: 2043.2
[8000] ELBO: 2009.9
[9000] ELBO: 1948.5
[10000] ELBO: 1902.0
[11000] ELBO: 1835.1
[12000] ELBO: 1801.0
[13000] ELBO: 1780.0
[14000] ELBO: 1778.6


  mean_alpha = mean_alpha.T


{0.0001: 0.475}
[0] ELBO: 1751.2
[1000] ELBO: 1685.5
[2000] ELBO: 1655.3
[3000] ELBO: 1656.5
[4000] ELBO: 1639.5
[5000] ELBO: 1638.5
[6000] ELBO: 1636.6
[7000] ELBO: 1631.2
[8000] ELBO: 1628.0
[9000] ELBO: 1619.9
[10000] ELBO: 1627.8
[11000] ELBO: 1623.5
[12000] ELBO: 1634.9
[13000] ELBO: 1635.3
[14000] ELBO: 1621.5
{0.001: 0.475}
[0] ELBO: 1628.5
[1000] ELBO: 1711.9
[2000] ELBO: 1659.0
[3000] ELBO: 1701.7
[4000] ELBO: 1746.1
[5000] ELBO: 1716.7
[6000] ELBO: 1742.4
[7000] ELBO: 1695.1
[8000] ELBO: 1712.8
[9000] ELBO: 1680.6
[10000] ELBO: 1702.2
[11000] ELBO: 1702.8
[12000] ELBO: 1719.5
[13000] ELBO: 1685.1
[14000] ELBO: 1695.6
{0.01: 0.475}


In [17]:
for p in acc_val_lr:

    lr = list(p.keys())[0]
    acc = list(p.values())[0]

    print(f'The accuracy value for learning rate {lr} is {acc}')

The accuracy value for learning rate 0.0001 is 0.475
The accuracy value for learning rate 0.001 is 0.475
The accuracy value for learning rate 0.01 is 0.475
