In [4]:
%load_ext autoreload
%autoreload 2

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import boto3
from tqdm.notebook import tqdm
plt.style.use('rossidata')
from sklearn.model_selection import KFold,train_test_split,LeaveOneOut


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from sklearn.datasets import load_boston

In [6]:
X, y = load_boston(return_X_y=True)
X_dataset, X_holdout, y_dataset, y_holdout = train_test_split(X, y, test_size=0.1, random_state=42)


In [None]:
def create_poly_modelspec(k=1,features=['cut2mut']):
    '''
    Convenience function:
    Create a polynomial modelspec string 
    '''
    
    formula='hdr_pct ~ '
    for n,feature in enumerate(features):
        if n==0:
            formula+=feature+' '+' '.join([' + np.power({},{})'.format(feature,j)
                                         for j in range(2,k+1)]).strip()
        else:
            formula+=' + '+feature+' '+' '.join([' + np.power({},{})'.format(feature,j)
                                         for j in range(2,k+1)]).strip()

    return formula

def model_factory(x, y):
    all_data=X.copy()
    all_data['hdr_pct']=y
    formula=create_poly_modelspec(k=1,features=['cut2mut','donor_arm_left','donor_arm_right'])
    with pm.Model() as model:

        # The prior for the data likelihood is a Normal Distribution
        family = pm.glm.families.StudentT(scale=3)

        # Creating the model requires a formula and data (and optionally a family)
        pm.GLM.from_formula(formula, data = all_data,family=family)

    return model


# Perform the training to get the trace
with model_factory(X_dataset, y_dataset) as model:
    train_trace = pm.sample(draws=2000, chains = 2, tune = 1000)
    pm.traceplot(train_trace)
    pm.plot_posterior(train_trace)
    ppc = pm.sample_posterior_predictive(train_trace)
    train_predictions=ppc['y']
    assert(np.shape(train_predictions)[1]==len(y_dataset))
  
with model_factory(X_holdout, y_holdout) as test_model:
    ppc = pm.sample_posterior_predictive(train_trace)
    holdout_predictions=ppc['y']
    assert(np.shape(holdout_predictions)[1]==len(y_holdout))