In [21]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
from pyro import clear_param_store
import pyro.contrib.gp as gp
from pyro.nn import PyroSample
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive,HMC
import torch
import arviz as az

### Importing Data

In [22]:
train=pd.read_excel(r"../data/chem_train.xlsx")
test=pd.read_excel(r"../data/chem_test.xlsx")

Warmup:   0%|▏                                        | 42/10000 [00:35,  1.20it/s, step size=1.22e-02, acc. prob=0.776]


### Preparing data to gaussion process for formaldehyd

In [23]:
# putting data on gpu else cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# remove Nan values 
train_formaldehyd=train.dropna(subset="formaldehyd")
# transfor tensor down to gpu 
X_form=torch.tensor(train_formaldehyd.corrected_week.values).float().to(device)
y_form=torch.tensor(train_formaldehyd.formaldehyd.values).float().to(device)

### Defining model and priors 

In [24]:
# clearing varibles in pyro
clear_param_store()
# creating rbf kerneld for gaussion process 
rbf = gp.kernels.RBF(input_dim=1)

# set the varience of the kernel to a half normal with mean as the std
rbf.variance = PyroSample(dist.HalfNormal(torch.tensor(train.formaldehyd.mean())))
# set low half normal prior due to farmaldehyd  being a 
rbf.lengthscale = PyroSample(dist.HalfNormal(torch.tensor(10.)))

gpr = gp.models.GPRegression(X_form,y_form, rbf).to(device)
gpr.noise = PyroSample(dist.HalfNormal(torch.tensor(10.)))

### Traning mcmc sampler and saving model 

In [25]:
nuts_kernel = NUTS(gpr.model)
mcmc = MCMC(nuts_kernel,warmup_steps=8000, num_samples=2000,num_chains=1 )
mcmc.run()
torch.save(gpr, "../models/form_seasonality");

Sample: 100%|██████████████████████████████████████| 10000/10000 [03:15, 51.20it/s, step size=7.38e-01, acc. prob=0.909]


### Creating arviz dataset and save it 

In [26]:

posterior_samples = mcmc.get_samples(500)
posterior_predictive= Predictive(gpr, posterior_samples)(X_form)
prior = Predictive(gpr, num_samples=500)(X_form)

pyro_data = az.from_pyro(mcmc,
    prior=prior,
    posterior_predictive=posterior_predictive,

)
az.to_json(pyro_data, "../Arviz_stats/mcmc_form_seasonality.json")



'../Arviz_stats/mcmc_form_seasonality.json'

### Preparing data to gaussion process for formaldehyd

In [27]:
train_acetald=train.dropna(subset="acetald")
X_acetald=torch.tensor(train_acetald.corrected_week.values).float().to(device)
y_acetald=torch.tensor(train_acetald.acetald.values).float().to(device)

### Defining model and priors 

In [28]:
clear_param_store()
rbf_bc = gp.kernels.RBF(input_dim=1)


rbf_bc.variance = PyroSample(dist.HalfNormal(torch.tensor(train.acetald.mean())))

rbf_bc.lengthscale = PyroSample(dist.HalfNormal(torch.tensor(10.)))
gpr_bc = gp.models.GPRegression(X_acetald,y_acetald, rbf_bc).to(device)
gpr_bc.noise = PyroSample(dist.HalfNormal(torch.tensor(10.)))


### Traning mcmc sampler and saving model 

In [29]:
nuts_kernel_bc = NUTS(gpr_bc.model)
mcmc_bc = MCMC(nuts_kernel_bc,warmup_steps=8000, num_samples=2000,num_chains=1)
mcmc_bc.run()
torch.save(gpr_bc, "../models/acetald_seasonality");

Sample: 100%|██████████████████████████████████████| 10000/10000 [03:17, 50.71it/s, step size=7.49e-01, acc. prob=0.913]


### making Arviz dataset for acetald 

In [30]:
posterior_samples_bc = mcmc_bc.get_samples(500)
posterior_predictive_bc = Predictive(gpr_bc, posterior_samples_bc)(X_acetald)
prior_bc = Predictive(gpr_bc, num_samples=500)(X_acetald)

pyro_data_bc = az.from_pyro(mcmc_bc,
    prior=prior_bc,
    posterior_predictive=posterior_predictive_bc,

)
az.to_json(pyro_data_bc, "../Arviz_stats/mcmc_acetald_seasonality.json")



'../Arviz_stats/mcmc_acetald_seasonality.json'

### Preparing data to gaussion process for acetone

In [31]:
train_acetone=train.dropna(subset="acetone")
X_acetone=torch.tensor(train_acetone.corrected_week.values).float().to(device)
y_acetone=torch.tensor(train_acetone.acetone.values).float().to(device)

### Acetone define model

In [32]:
clear_param_store()
rbf_ac = gp.kernels.RBF(input_dim=1)
rbf_ac.variance = PyroSample(dist.HalfNormal(torch.tensor(train.acetone.mean())))
rbf_ac.lengthscale = PyroSample(dist.HalfNormal(torch.tensor(10.)))

gpr_ac = gp.models.GPRegression(X_acetone,y_acetone, rbf_ac).to(device)
gpr_ac.noise = PyroSample(dist.HalfNormal(torch.tensor(10.)))

Sample: 100%|██████████████████████████████████████| 10000/10000 [03:41, 45.18it/s, step size=7.16e-01, acc. prob=0.888]


### Train mcmc and gaussian sample Acetone


In [None]:
nuts_kernel_ac = NUTS(gpr_ac.model)

mcmc_ac = MCMC(nuts_kernel_ac,warmup_steps=8000, num_samples=2000,num_chains=1)

mcmc_ac.run()
torch.save(gpr_ac, "../models/acetone_seasonality");

### Make parametric stats for the mcmc sampler and save in json file

In [34]:
posterior_samples_ac = mcmc_ac.get_samples(500)
posterior_predictive_ac = Predictive(gpr_bc, posterior_samples_ac)(X_acetone)
prior_ac = Predictive(gpr_ac, num_samples=500)(X_acetone)

pyro_data_ac = az.from_pyro(mcmc_ac,
    prior=prior_ac,
    posterior_predictive=posterior_predictive_ac,

)
az.to_json(pyro_data_ac, "../Arviz_stats/mcmc_acetone_seasonality.json")



'../Arviz_stats/mcmc_acetone_seasonality.json'