<a href="https://colab.research.google.com/github/OscarBedford/MLCourse_Weekly_Exercises/blob/main/Exercise_13_10_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

10- analogous to task 5, but implementing a Bayesian hierarchical regression (with
continuous outcome variable) to predict subject age (zscored).

In [None]:
%%capture
!pip install nilearn
!pip install pymc

In [None]:
%%capture
import numpy as np
from nilearn import datasets
from nilearn.input_data import NiftiLabelsMasker
from nilearn.image import index_img
from sklearn.preprocessing import StandardScaler
import nibabel as nib

NOTE: for this exercise I used PyMC version 5 instead of PyMC3 because the model ran better on the former package (ie, less errors). 

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pymc as pm
import scipy.stats as stats
import arviz as az
from pymc import Bernoulli, Model, HalfCauchy
from pymc import invlogit, sample, summary
from numpy import mean
from numpy import std
from sklearn.linear_model import LogisticRegression
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.cross_decomposition import PLSRegression as PLSR
from sklearn.cross_decomposition import CCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression as LR

In [None]:
%%capture
brain_data = datasets.fetch_oasis_vbm(n_subjects=100)
crad = datasets.fetch_atlas_craddock_2012()
atlas_nii = index_img(crad['scorr_mean'], 10) # we will set this to 10 for these exercises in order to have 106 rois, which is close to the 100 rois in Exercises 1-5
masker = NiftiLabelsMasker(labels_img= atlas_nii, standardize=True)
input_variables = masker.fit_transform(brain_data.gray_matter_maps)
output_variable = StandardScaler().fit_transform(brain_data.ext_vars.age[:, None])[:, 0] # gives subject age on standard units after z-scoring

In [None]:
# We delete the last 6 columns from "input_variables" so that its axis 1 coincides with the length of the delay variable
input_variables = input_variables[:, :-6]

In [None]:
# We will extract delays from brain_data object
delays = brain_data.ext_vars.delay
print(delays)


[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan]


NOTE: the delay column is a collection of NaNs. Therefore, we will set up the model as indicated, but we cannot really trust the following results.

In [None]:
# Now we will median-split the sample to find the value that divides people into low and high delay
delay_median = np.median(delays)

In [None]:
# We extract the sex information from the dataset and binarize it
male_female = brain_data.ext_vars.mf
male_female = np.where(np.isin(male_female, 'M'), 0, 1)

In [None]:
with pm.Model() as model:
    # We define the binary hyperprior based on delay
    delay = pm.Bernoulli('delay', p=0.5, shape=len(delays))
    low = delay[delays < delay_median]
    high = delay[delays >= delay_median]

    # We define sex as a categorical distribution with 2 categories (male, female) with unequal probabilities
    sex = pm.Categorical('sex', p=np.array([0.36, 0.64]), observed=male_female)

    # We define a HalfCauchy hyperprior for the variance component of the lower-level region slopes (betas). This time we called it "halfcauchly_hyperprior"
    halfcauchly_hyperprior = pm.HalfCauchy('halfcauchly_hyperprior', beta=1, shape=100)

    # We define the intercept term as Gaussian with prior (mu=0, sigma=1)
    intercept = pm.Normal('intercept', mu=0, sigma=1)

    # We define bottom-level region slopes (betas) as a normal distribution with priors (mu=halfcauchly_hyperprior, sigma=1)
    beta_coefficients = pm.Normal('beta_coefficients', mu=0, sigma=halfcauchly_hyperprior, observed=input_variables, shape=100)

    # We calculate the linear model using input_variables, sex, and the model parameters
    linear_model = intercept + input_variables @ beta_coefficients + sex + delay

    # We define the likelihood as another normal distribution with mean equal to probabilities and sigma=1
    likelihood = pm.Normal('likelihood', mu=linear_model, sigma=1, observed=output_variable)

    # We sample from the posterior distribution using 500 MCMC draws
     trace = pm.sample(500, return_inferencedata=True)

    # We summarize the results of the MCMC sampling by calling PyMC3 instead of arviz (again)
    stats = pm.summary(trace, hdi_prob=0.9)

  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


In [None]:
# We're ready to inspect our last summary table!
from google.colab import data_table
data_table.DataTable(stats, include_index=True, num_rows_per_page=201)

Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
delay[0],1.000,0.000,1.000,1.000,0.000,0.000,1000.0,1000.0,
delay[1],0.000,0.000,0.000,0.000,0.000,0.000,1000.0,1000.0,
delay[2],1.000,0.000,1.000,1.000,0.000,0.000,1000.0,1000.0,
delay[3],0.000,0.000,0.000,0.000,0.000,0.000,1000.0,1000.0,
delay[4],0.000,0.000,0.000,0.000,0.000,0.000,1000.0,1000.0,
...,...,...,...,...,...,...,...,...,...
halfcauchly_hyperprior[95],1.008,0.077,0.888,1.126,0.002,0.001,1777.0,690.0,1.01
halfcauchly_hyperprior[96],1.006,0.074,0.888,1.122,0.001,0.001,2925.0,715.0,1.00
halfcauchly_hyperprior[97],1.008,0.075,0.887,1.131,0.002,0.001,2141.0,407.0,1.00
halfcauchly_hyperprior[98],1.011,0.076,0.891,1.136,0.002,0.001,2040.0,662.0,1.00
