In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import linear_model
import torch
from scipy.special import softmax 
from sklearn.preprocessing import StandardScaler

import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO
from pyro.optim import Adam, ClippedAdam
from pyro.infer import Predictive
from sklearn.model_selection import train_test_split

### Data Processing

In [22]:
scaler = StandardScaler()

In [23]:
#Use your own local path please!
path = "./Data/"
file_name = ["train.csv", "test.csv", "submission.csv"]
df_train = pd.read_csv(path + file_name[0])
df_test = pd.read_csv(path + file_name[1])
df_submission = pd.read_csv(path + file_name[2])

In [24]:
df_train['key'] = df_train['key'].fillna(df_train['key'].mean())

In [25]:
df_train['instrumentalness'] = df_train['instrumentalness'].fillna(df_train['instrumentalness'].mean())

In [26]:
df_train['Popularity'] = df_train['Popularity'].fillna(df_train['Popularity'].mean())

In [27]:
df = df_train.drop(['Artist Name','Track Name'],axis=1).copy()

In [28]:
df_sampled = df.sample(1400,random_state=47)

In [29]:
df_features = df_sampled.iloc[:,:-1].copy()
df_target = df_sampled.iloc[:,-1].copy()

In [30]:
df_features = scaler.fit_transform(df_features)

In [31]:
df_train_features, df_test_features,df_train_target, df_test_target =  train_test_split(df_features,df_target,stratify=df_target,random_state=47, test_size=1/7)

df_train_features, df_val_features,df_train_target, df_val_target =  train_test_split(df_train_features,df_train_target,stratify=df_train_target,random_state=47, test_size=1/6)

In [32]:
df_train_target = df_train_target.to_numpy()
df_test_target = df_test_target.to_numpy()
df_val_target = df_val_target.to_numpy()

In [33]:
D = df_train_features.shape[1]
N_train = df_train_features.shape[0]
N_test = df_test_features.shape[0]
N_val = df_val_features.shape[0]
n_cat = 11 
degF=4
tau=1

### Ancestral Sampling 

In [34]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [35]:
# sample coefficients (beta)
beta = np.random.normal(0,1,size=D)
print("beta:", beta)

beta_array = np.zeros((n_cat,D))

for i in range(n_cat):
    
    beta_array[i,:] = np.random.normal(0,1,size=D)
    
# sample observations (y's)
y = np.zeros((N_train,n_cat))
for n in range(N_train):
    
    probs = np.zeros(n_cat)
    for i in range(n_cat):
        probs[i] = np.array([(np.dot(beta_array[i,:], df_train_features[n,:]))])
        
    p =  softmax(probs)
    y[n,:] = np.random.multinomial(1, p)  #binomial with one trial is equivalent to bernoulli
    #y[n] = int(p > 0.5) # alternative version without observation noise
    #print(" p=, y[n]="  (n, p, y[n,:]))
    #print('n, p and y ', n, p, y[n,:])

beta: [-0.00367606 -0.02760378  1.60360266 -1.100991   -0.840607    0.12969186
 -0.72335742  0.41421535  0.21514235 -3.32248393  0.73452844  1.15828269
 -0.13558812  0.30848633]


In [36]:
label_array = np.array([]) 
for i in y:
    label_array = np.append(label_array,np.argmax(i))


In [37]:
from collections import Counter

counts = Counter(label_array)
counts

Counter({10.0: 44,
         6.0: 118,
         9.0: 109,
         8.0: 157,
         1.0: 82,
         2.0: 129,
         3.0: 109,
         5.0: 79,
         7.0: 112,
         4.0: 45,
         0.0: 16})

In [38]:
print("Accuracy:", 1.0*np.sum(label_array == df_train_target.flatten()) / len(df_train_target))

Accuracy: 0.109


### Model implementation with Pyro

In [39]:
def model(X, n_cat, degF, tau, obs=None):
    
    input_dim = X.shape[1]
    
    
    mu_beta = pyro.sample("mu_beta", dist.StudentT(df=torch.ones(n_cat)*degF, 
                                                   loc=torch.zeros(n_cat), 
                                                   scale=torch.ones(n_cat)).to_event()) # Prior for the bias mean      
    sigma_beta  = pyro.sample("sigma_beta",  dist.HalfCauchy(tau*torch.ones(n_cat)).to_event()) # Prior for the bias standard deviation
    
    beta  = pyro.sample("beta", dist.Normal(mu_beta*torch.ones(n_cat), 
                                            sigma_beta*torch.ones(input_dim, n_cat)).to_event()) # Priors for the regression coefficents

    alpha = pyro.sample("alpha", dist.Normal(torch.zeros(1, n_cat), 
                                             5.*torch.ones(1, n_cat)).to_event())  # Prior for the bias/intercept
    
    
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Categorical(logits=alpha + X.matmul(beta)), obs=obs)
        
    return y

In [40]:
X_train = torch.tensor(df_train_features).float()
y_train = torch.tensor(df_train_target.flatten()).float()

#### Final Model training 

In [41]:
degF=4
tau=1

In [42]:
# Define guide function
guide = AutoMultivariateNormal(model)

# Reset parameter values
pyro.clear_param_store()

# Define the number of optimization steps
n_steps = 15000

learning_rates = [0.0001,0.001,0.01]
# Setup the optimizer
acc_val_lr = []
for lr in learning_rates:

    adam_params = {"lr": lr}
    optimizer = ClippedAdam(adam_params)

    # Setup the inference algorithm
    elbo = Trace_ELBO(num_particles=1)
    svi = SVI(model, guide, optimizer, loss=elbo)

    # Do gradient steps
    for step in range(n_steps):
        elbo = svi.step(X_train, n_cat, degF, tau, y_train)
        if step % 1000 == 0:
            print("[%d] ELBO: %.1f" % (step, elbo))

            #ef model(X, n_cat, degF, tau, obs=None):
    predictive = Predictive(model, guide=guide, num_samples=2000,
                    return_sites=("alpha", "beta"))
    samples = predictive(X_train, n_cat, degF, tau, y_train)
    
    
    samples_alpha = samples["alpha"].detach().squeeze()
    samples_beta = samples["beta"].detach().squeeze()
    
    
    mean_betas = samples_beta.mean(axis=0)
    mean_betas = mean_betas.T
    mean_alpha = samples_alpha.mean(axis=0)
    mean_alpha = mean_alpha.T
    
    
    y_val_pred = np.zeros((N_val,n_cat))
    
    for n in range(N_val):

        probs = np.zeros(n_cat)
        for i in range(n_cat):
            probs[i] = np.array([mean_alpha[i]+(np.dot(mean_betas[i,:], df_val_features[n,:]))])

        p =  softmax(probs)
        y_val_pred[n,:] = np.argmax(p)  #binomial with one trial is equivalent to bernoulli
        
    y_val_pred = y_val_pred[:,0]
            
    acc = 1.0*np.sum(y_val_pred == df_val_target.flatten()) / len(df_val_target)
    print({lr:acc})
    acc_val_lr.append({lr:acc})

[0] ELBO: 5604.9
[1000] ELBO: 5038.4
[2000] ELBO: 4545.6
[3000] ELBO: 3970.2
[4000] ELBO: 3543.0
[5000] ELBO: 3171.4
[6000] ELBO: 2884.2
[7000] ELBO: 2806.8
[8000] ELBO: 2558.8
[9000] ELBO: 2453.0
[10000] ELBO: 2352.1
[11000] ELBO: 2182.8
[12000] ELBO: 2112.9
[13000] ELBO: 2044.0
[14000] ELBO: 1967.9


  mean_alpha = mean_alpha.T


{0.0001: 0.455}
[0] ELBO: 1935.6
[1000] ELBO: 1689.2
[2000] ELBO: 1665.3
[3000] ELBO: 1662.1
[4000] ELBO: 1651.4
[5000] ELBO: 1646.5
[6000] ELBO: 1639.0
[7000] ELBO: 1637.3
[8000] ELBO: 1643.8
[9000] ELBO: 1640.3
[10000] ELBO: 1636.1
[11000] ELBO: 1627.8
[12000] ELBO: 1623.4
[13000] ELBO: 1634.8
[14000] ELBO: 1627.4
{0.001: 0.46}
[0] ELBO: 1625.9
[1000] ELBO: 1668.3
[2000] ELBO: 1690.8
[3000] ELBO: 1693.9
[4000] ELBO: 1691.2
[5000] ELBO: 1679.1
[6000] ELBO: 1712.0
[7000] ELBO: 1724.5
[8000] ELBO: 1698.3
[9000] ELBO: 1690.2
[10000] ELBO: 1697.3
[11000] ELBO: 1682.0
[12000] ELBO: 1696.1
[13000] ELBO: 1707.1
[14000] ELBO: 1740.3
{0.01: 0.445}


In [43]:
for p in acc_val_lr:

    lr = list(p.keys())[0]
    acc = list(p.values())[0]

    print(f'The accuracy value for learning rate {lr} is {acc}')

The accuracy value for learning rate 0.0001 is 0.455
The accuracy value for learning rate 0.001 is 0.46
The accuracy value for learning rate 0.01 is 0.445
