In [40]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
from pyro import clear_param_store
import pyro.contrib.gp as gp
from pyro.nn import PyroSample
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive,HMC
import torch
import kennard_stone as ks
import arviz as az
from sklearn.metrics import r2_score

In [41]:
train=pd.read_excel(r"../data/chem_train.xlsx")
test=pd.read_excel(r"../data/chem_test.xlsx")


In [42]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [43]:
train_no2=train.dropna(subset="no2")
X_no2=torch.tensor(train_no2.corrected_week.values).float().to(device)
y_no2=torch.tensor(train_no2.no2.values).float().to(device)
X_no2_test=torch.tensor(test.corrected_week.values).float().to(device)
y_no2_test=torch.tensor(test.no2.values).float().to(device)

In [44]:
clear_param_store()
rbf = gp.kernels.RBF(input_dim=1)


rbf.variance = PyroSample(dist.HalfNormal(torch.tensor(train.no2.mean())))

rbf.lengthscale = PyroSample(dist.HalfNormal(torch.tensor(10.)))

gpr = gp.models.GPRegression(X_no2,y_no2, rbf).to(device)
gpr.noise = PyroSample(dist.HalfNormal(torch.tensor(20.)))

In [45]:
nuts_kernel = NUTS(gpr.model)
mcmc = MCMC(nuts_kernel,warmup_steps=8000, num_samples=4000,num_chains=1)
mcmc.run()
torch.save(gpr, "../models/no2_seasonality");

Sample: 100%|██████████████████████████████████████| 12000/12000 [03:54, 51.25it/s, step size=7.48e-01, acc. prob=0.923]


In [46]:
posterior_samples = mcmc.get_samples(500)
posterior_predictive= Predictive(gpr, posterior_samples)(X_no2)
prior = Predictive(gpr, num_samples=500)(X_no2)

pyro_data = az.from_pyro(mcmc,
    prior=prior,
    posterior_predictive=posterior_predictive,

)
az.to_json(pyro_data, "../Arviz_stats/mcmc_no2_seasonality.json")



'../Arviz_stats/mcmc_no2_seasonality.json'

In [47]:
train_nox

Unnamed: 0.2,Unnamed: 0.1,cpnr,Unnamed: 0,maalnr,constrution_year,Area,floor_level,powerplant,road,motorway,...,gastove,fireplace,visit,no2,nox,formaldehyd,acetald,acetone,sample_time,corrected_week
0,0,419,1030.0,2.0,1968.0,136.0,0.0,0.0,11552.2,0.0,...,12.0,36.0,2,7.0,49.1,29.701935,2.751132,1.850476,71,44.000000
1,1,123,291.0,1.0,1980.0,142.0,0.0,0.0,15368.2,0.0,...,0.0,25.0,1,6.9,8.5,12.600000,11.700000,10.100000,83,50.857143
2,2,123,292.0,2.0,1980.0,142.0,0.0,0.0,15368.2,0.0,...,20.0,30.0,2,6.3,4.8,28.672236,10.675329,15.106293,70,29.571429
3,3,123,293.0,3.0,1980.0,142.0,0.0,0.0,15368.2,0.0,...,20.0,30.0,3,6.3,20.7,24.177584,11.346237,5.677348,76,47.285714
4,4,244,593.0,1.0,1965.0,121.8,0.0,195.3,19826.0,3023.1,...,0.0,0.0,1,8.7,19.7,23.691806,3.526763,3.829447,74,42.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125,0,274,666.0,3.0,1962.0,75.0,1.0,7.1,15685.2,560.9,...,300.0,0.0,3,10.9,18.5,22.992311,2.211332,5.277120,71,-17.585714
1126,0,427,1052.0,2.0,1959.7,99.1,0.0,121.6,27399.0,1828.9,...,0.0,15.0,2,13.6,65.4,22.928374,1.834360,1.809277,91,-8.157143
1127,0,119,284.0,1.0,1890.0,205.0,0.0,0.0,2756.2,0.0,...,0.0,240.0,1,5.7,22.2,9.300000,8.300000,8.900000,72,-4.014286
1128,0,119,286.0,3.0,1890.0,205.0,0.0,0.0,2756.2,0.0,...,0.0,245.0,3,5.3,12.2,11.530052,8.254482,5.024362,88,-4.728571


In [48]:
train_nox=train.dropna(subset="nox")
X_nox=torch.tensor(train_nox.corrected_week.values).float().to(device)
y_nox=torch.tensor(train_nox.nox.values).float().to(device)
X_nox_test=torch.tensor(test.corrected_week.values).float().to(device)
y_nox_test=torch.tensor(test.nox.values).float().to(device)

In [31]:
clear_param_store()
rbf_bc = gp.kernels.RBF(input_dim=1)


rbf_bc.variance = PyroSample(dist.HalfNormal(torch.tensor(train.nox.mean())))

rbf_bc.lengthscale = PyroSample(dist.HalfNormal(torch.tensor(10.)))

gpr_bc = gp.models.GPRegression(X_nox,y_nox, rbf_bc).to(device)
gpr_bc.noise = PyroSample(dist.HalfNormal(torch.tensor(40.)))

In [32]:
nuts_kernel_bc = NUTS(gpr_bc.model)


mcmc_bc = MCMC(nuts_kernel_bc,warmup_steps=8000, num_samples=2000,num_chains=1)

mcmc_bc.run()
torch.save(gpr_bc, "../models/nox_seasonality");

Sample: 100%|██████████████████████████████████████| 10000/10000 [02:52, 57.90it/s, step size=8.51e-01, acc. prob=0.899]


In [33]:
posterior_samples_bc = mcmc_bc.get_samples(500)
posterior_predictive_bc = Predictive(gpr_bc, posterior_samples_bc)(X_nox)
prior_bc = Predictive(gpr_bc, num_samples=500)(X_nox)

pyro_data_bc = az.from_pyro(mcmc_bc,
    prior=prior_bc,
    posterior_predictive=posterior_predictive_bc,

)
az.to_json(pyro_data_bc, "../Arviz_stats/mcmc_nox_seasonality.json")



'../Arviz_stats/mcmc_nox_seasonality.json'