In [3]:
import numpy as np
import pandas as pd

from jax import numpy as jnp
from jax import ops, random
from jax.scipy.special import expit

import numpyro
from numpyro import distributions as dist
from numpyro.distributions import constraints
from numpyro.infer import MCMC, NUTS, Predictive
from sklearn.model_selection import train_test_split
from icecream import ic
from sklearn.metrics import mean_absolute_error


import random as rd
import time
def m_mape(y_true, y_predict):
    n = len(y_true)
    At = np.array(y_true) + 1
    Ft = np.array(y_predict) + 1

    res = ((100/n)*(np.sum(np.abs((Ft-At)/At))))
    return res


https://num.pyro.ai/en/latest/tutorials/bayesian_imputation.html?highlight=imputation
https://num.pyro.ai/en/latest/utilities.html?highlight=Predictive%20regression#numpyro.infer.util.Predictive
https://num.pyro.ai/en/latest/tutorials/bayesian_regression.html?highlight=Predictive%20regression#Bayesian-Regression-Using-NumPyro (Posterior predictive)

In [23]:
dataset = pd.read_csv("../data/X_station_day.csv")
dataset.head()

Unnamed: 0,station_id,year,month,day,latitude,longitude,altitude,wind_direction,wind_speed,temperature,humidity,dew_point,precipitations,ground_truth
0,14066001,2016,1,1,49.334,-0.431,2,146.5,3.91375,280.33374,88.59167,278.5146,0.2,3.4
1,14066001,2016,1,2,49.334,-0.431,2,205.625,8.04125,282.93668,82.3,279.9975,3.4,11.7
2,14066001,2016,1,3,49.334,-0.431,2,195.25,5.430417,281.10165,86.604164,278.9975,11.7,0.6
3,14066001,2016,1,4,49.334,-0.431,2,212.66667,6.715417,281.055,80.645836,277.90082,0.6,0.4
4,14066001,2016,1,5,49.334,-0.431,2,205.04167,5.957083,281.25583,82.75,278.48416,0.4,3.0


In [24]:
columns = dataset.columns.tolist()
for i in range(len(columns)):
    for j in range(i, len(columns)):
        if i != j:
            spearman_correlation = dataset[columns[i]].corr(dataset[columns[j]], method='spearman')
            pearson_correlation = dataset[columns[i]].corr(dataset[columns[j]], method='pearson')
            if max(np.abs(spearman_correlation), np.abs(pearson_correlation)) > 0.4 :
                print('Correlation between ', columns[i], 'and', columns[j])
                print('Pearson\'s correlation: %.3f' % spearman_correlation)
                print('Spearman\'s correlation: %.3f' % pearson_correlation)

Correlation between  temperature and humidity
Pearson's correlation: -0.355
Spearman's correlation: -0.401
Correlation between  temperature and dew_point
Pearson's correlation: 0.935
Spearman's correlation: 0.924
Correlation between  humidity and precipitations
Pearson's correlation: 0.426
Spearman's correlation: 0.257


# General functions

In [4]:
def get_normalization_infos(*x_s, columns):
    normalization_infos = pd.DataFrame(data=[[1000 for _ in range(len(columns))],[0 for _ in range(len(columns))]],
                                       index=["min","max"],
                                       columns=columns)
    for x in x_s :
        for col in columns:
            min_value = min(normalization_infos[col]["min"], x[col].min())
            max_value = max(normalization_infos[col]["max"], x[col].max())
            normalization_infos[col] = [min_value, max_value]

    normalization_infos.loc["spread"] = normalization_infos.apply(lambda c : c["max"] - c["min"], axis=0)

    return normalization_infos


def normalize(x : pd.DataFrame, normalization_infos : pd.DataFrame):
    for col in x.columns:
        x[col] = (x[col] - normalization_infos[col]["min"])/normalization_infos[col]["spread"]
    return x

def de_normalize(x : pd.DataFrame, normalization_infos : pd.DataFrame):
    for col in x.columns:
        x[col] = x[col] * normalization_infos[col]["spread"] + normalization_infos[col]["min"]
    return x


def create_nans(dataset, columns, ratio_nan):
    for col in columns:
        random_vec = np.random.random(dataset[col].shape) < 1 - ratio_nan
        dataset[col] = dataset[col].where(random_vec, other=np.nan)
    return dataset

In [5]:
def model_ground_truth(
        latitude, longitude, altitude, wind_direction, wind_speed, temperature, humidity, dew_point, precipitations, mu=None, sigma=None, ground_truth=None,
        nan_columns = None
):
    lat, long, alt, w_d, w_s, temp, hum, d_pt, prec = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    default = numpyro.sample("default", dist.Normal(0.0, 0.2))
    if latitude is not None:
        bayes_latitude = numpyro.sample('bayes_latitude', dist.Normal(0, 1))
        lat = bayes_latitude * latitude
    if longitude is not None:
        bayes_longitude = numpyro.sample('bayes_longitude', dist.Normal(0, 1))
        long = bayes_longitude * longitude
    if altitude is not None:
        bayes_altitude = numpyro.sample('bayes_altitude', dist.Normal(0, 1))
        alt = bayes_altitude * altitude

    if 'wind_direction' in nan_columns:
        wd_mu = numpyro.sample("wd_mu", dist.Normal(mu['wind_direction'], 0.2))
        wd_sigma = numpyro.sample("wd_sigma", dist.Normal(sigma['wind_direction'], 0.2))

        wind_direction_nanidx = np.array(np.isnan(wind_direction).astype(int)).nonzero()[0]
        wd_impute = numpyro.sample("wd_impute", dist.Normal(wd_mu, wd_sigma)
                                            .expand((len(wind_direction_nanidx),))
                                            .mask(False))

        wind_direction = ops.index_update(wind_direction, wind_direction_nanidx, wd_impute)

        numpyro.sample("latent_wind_direction", dist.Normal(mu['wind_direction'], sigma['wind_direction']), obs=wind_direction)
        bayes_wind_direction = numpyro.sample("bayes_wind_direction", dist.Normal(0, 1))
        w_d = bayes_wind_direction * wind_direction
    else:
        if wind_direction is not None:
            bayes_wind_direction = numpyro.sample('bayes_wind_direction', dist.Normal(0, 1))
            w_d = bayes_wind_direction * wind_direction

    if 'wind_speed' in nan_columns:
        ws_mu = numpyro.sample("ws_mu", dist.Normal(mu['wind_speed'], 0.2))
        ws_sigma = numpyro.sample("ws_sigma", dist.Normal(sigma['wind_speed'], 0.2))

        wind_speed_nanidx = np.array(np.isnan(wind_speed).astype(int)).nonzero()[0]
        ws_impute = numpyro.sample("ws_impute", dist.Normal(ws_mu, ws_sigma)
                                   .expand((len(wind_speed_nanidx),))
                                   .mask(False))

        wind_speed = ops.index_update(wind_speed, wind_speed_nanidx, ws_impute)

        numpyro.sample("latent_wind_speed", dist.Normal(mu['wind_speed'], sigma['wind_speed']), obs=wind_speed)
        bayes_wind_speed = numpyro.sample("bayes_wind_speed", dist.Normal(0, 1))
        w_s = bayes_wind_speed * wind_speed
    else:
        if wind_speed is not None:
            bayes_wind_speed = numpyro.sample('bayes_wind_speed', dist.Normal(0, 1))
            w_s = bayes_wind_speed * wind_speed

    if 'temperature' in nan_columns:
        temp_mu = numpyro.sample("temp_mu", dist.Normal(mu['temperature'], 0.2))
        temp_sigma = numpyro.sample("temp_sigma", dist.Normal(sigma['temperature'], 0.2))

        temperature_nanidx = np.array(np.isnan(temperature).astype(int)).nonzero()[0]
        temperature_impute = numpyro.sample("temperature_impute", dist.Normal(temp_mu, temp_sigma)
                                         .expand((len(temperature_nanidx),))
                                         .mask(False))

        temperature = ops.index_update(temperature, temperature_nanidx, temperature_impute)

        numpyro.sample("latent_temperature", dist.Normal(mu['temperature'], sigma['temperature']), obs=temperature)
        bayes_temperature = numpyro.sample("bayes_temperature", dist.Normal(0, 1))
        temp = bayes_temperature * temperature
    else:
        if temperature is not None:
            bayes_temperature = numpyro.sample('bayes_temperature', dist.Normal(0, 1))
            temp = bayes_temperature * temperature

    if 'humidity' in nan_columns:
        hum_mu = numpyro.sample("hum_mu", dist.Normal(mu['humidity'], 0.2))
        hum_sigma = numpyro.sample("hum_sigma", dist.Normal(sigma['humidity'], 0.2))

        humidity_nanidx = np.array(np.isnan(humidity).astype(int)).nonzero()[0]
        humidity_impute = numpyro.sample("humidity_impute", dist.Normal(hum_mu, hum_sigma)
                                          .expand((len(humidity_nanidx),))
                                          .mask(False))

        humidity = ops.index_update(humidity, humidity_nanidx, humidity_impute)

        numpyro.sample("latent_humidity", dist.Normal(mu['humidity'], sigma['humidity']), obs=humidity)
        bayes_humidity = numpyro.sample('bayes_humidity', dist.Normal(0, 1))
        hum = bayes_humidity * humidity
    else:
        if humidity is not None:
            bayes_humidity = numpyro.sample('bayes_humidity', dist.Normal(0, 1))
            hum = bayes_humidity * humidity

    if 'dew_point' in nan_columns:
        dew_point_mu = numpyro.sample("dew_point_mu", dist.Normal(mu['dew_point'], 0.2))
        dew_point_sigma = numpyro.sample("dew_point_sigma", dist.Normal(sigma['dew_point'], 0.2))

        dew_point_nanidx = np.array(np.isnan(dew_point).astype(int)).nonzero()[0]
        dew_point_impute = numpyro.sample("dew_point_impute", dist.Normal(dew_point_mu, dew_point_sigma)
                                               .expand((len(dew_point_nanidx),))

                                               .mask(False))

        dew_point = ops.index_update(dew_point, dew_point_nanidx, dew_point_impute)

        numpyro.sample("latent_dew_point", dist.Normal(mu['dew_point'], sigma['dew_point']), obs=dew_point)
        bayes_dew_point = numpyro.sample('bayes_dew_point', dist.Normal(0, 1))
        d_pt = bayes_dew_point * dew_point
    else:
        if dew_point is not None:
            bayes_dew_point = numpyro.sample('bayes_dew_point', dist.Normal(0, 1))
            d_pt = bayes_dew_point * dew_point
    if 'precipitations' in nan_columns:
        precipitations_mu = numpyro.sample("precipitations_mu", dist.Normal(mu['precipitations'], 0.2))
        precipitations_sigma = numpyro.sample("precipitations_sigma", dist.Normal(sigma['precipitations'], 0.2))

        precipitations_nanidx = np.array(np.isnan(precipitations).astype(int)).nonzero()[0]
        precipitations_impute = numpyro.sample("precipitations_impute", dist.Normal(precipitations_mu, precipitations_sigma)
                                            .expand((len(precipitations_nanidx),))
                                            .mask(False))

        precipitations = ops.index_update(precipitations, precipitations_nanidx, precipitations_impute)

        numpyro.sample("latent_precipitations", dist.Normal(mu['precipitations'], sigma['precipitations']), obs=precipitations)
        bayes_precipitations = numpyro.sample('bayes_precipitations', dist.Normal(0, 1))
        prec = bayes_precipitations * precipitations
    else:
        if precipitations is not None:
            bayes_precipitations = numpyro.sample('bayes_precipitations', dist.Normal(0, 1))
            prec = bayes_precipitations * precipitations

    sigma_model = numpyro.sample("sigma", dist.Exponential(1.0))
    mu_model = default + lat + long + alt + w_d + w_s + temp + hum + d_pt + prec
    # print("sigma", sigma_model, "mu", mu_model)
    numpyro.sample("ground_truth", dist.Normal(mu_model, sigma_model), obs=ground_truth)

# Data fully provided

In [27]:
def get_data(normalization = True):
    dataset = pd.read_csv("../data/X_station_day.csv")
    del dataset['station_id']
    ground_truth = dataset.ground_truth.values
    del dataset['ground_truth']

    columns = dataset.columns.tolist()
    print(columns)

    if normalization:
        normalisation_infos = get_normalization_infos(dataset, columns=columns)
        dataset = normalize(dataset, normalisation_infos)

    data = dict(
        latitude = dataset.latitude.values,
        longitude = dataset.longitude.values,
        altitude = dataset.altitude.values,
        wind_direction = dataset.wind_direction.values,
        wind_speed = dataset.wind_speed.values,
        temperature = dataset.temperature.values,
        humidity = dataset.humidity.values,
        dew_point = dataset.dew_point.values,
        precipitations = dataset.precipitations.values,
    )

    mu_col = dict()
    sigma_col = dict()

    for column in column_to_impute:
        mu_col[column] = dataset[column].mean()
        sigma_col[column] = dataset[column].std()

    print(mu_col)
    print(sigma_col)

    return data, ground_truth, mu_col, sigma_col

### Without normalisation

In [28]:
data, ground_truth, mu_col, sigma_col = get_data(normalization=False)

mcmc = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
mcmc.run(random.PRNGKey(0), **data, ground_truth=ground_truth, nan_columns = [], mu=mu_col, sigma=sigma_col)
# Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
mcmc.print_summary()

['year', 'month', 'day', 'latitude', 'longitude', 'altitude', 'wind_direction', 'wind_speed', 'temperature', 'humidity', 'dew_point', 'precipitations']


NameError: name 'column_to_impute' is not defined

### With normalisation

In [None]:
data, ground_truth, mu_col, sigma_col = get_data(normalization=True)

mcmc = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
mcmc.run(random.PRNGKey(0), **data, ground_truth=ground_truth, nan_columns = [], mu=mu_col, sigma=sigma_col)
# Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
mcmc.print_summary()

In [None]:
from numpyro.infer import Predictive

posterior = mcmc.get_samples()
ground_truth_pred = Predictive(model_ground_truth, posterior)(random.PRNGKey(1), **data,  nan_columns = [])["ground_truth"]
ground_truth_pred = ground_truth_pred.mean(axis=0)

print(len(ground_truth_pred))
print(ground_truth_pred)
print(ground_truth)

In [None]:
from sklearn.metrics import mean_absolute_error

def m_mape(y_true, y_predict):
    n = len(y_true)
    At = np.array(y_true) + 1
    Ft = np.array(y_predict) + 1

    res = ((100/n)*(np.sum(np.abs((Ft-At)/At))))
    return res

print("MAPE : ", m_mape(ground_truth, ground_truth_pred))
print("Mean Absolute Error : ", mean_absolute_error(ground_truth, ground_truth_pred))

# Imputation part


In [None]:
dataset = pd.read_csv("../data/X_station_day.csv")
del dataset['station_id']
ground_truth = dataset.ground_truth.values
del dataset['ground_truth']

columns = dataset.columns.tolist()
print(columns)

normalisation_infos = get_normalization_infos(dataset, columns=columns)
dataset = normalize(dataset, normalisation_infos)

# column_to_impute = ['wind_direction', 'temperature', 'humidity', 'dew_point', 'precipitations']
column_to_impute = ['wind_direction'] # , 'wind_speed']
dataset = create_nans(dataset, column_to_impute, 0.01)

print(dataset['precipitations'].isna().sum())

data = dict(
    latitude = dataset.latitude.values,
    longitude = dataset.longitude.values,
    altitude = dataset.altitude.values,
    wind_direction = dataset.wind_direction.values,
    wind_speed = dataset.wind_speed.values,
    temperature = dataset.temperature.values,
    humidity = dataset.humidity.values,
    dew_point = dataset.dew_point.values,
    precipitations = dataset.precipitations.values,
)

mu_col = dict()
sigma_col = dict()

for column in column_to_impute:
    mu_col[column] = dataset[column].mean()
    sigma_col[column] = dataset[column].std()

print(mu_col)
print(sigma_col)


In [None]:
# WITH NaNS (ratio 0.4)
mcmc = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
mcmc.run(random.PRNGKey(0), **data, ground_truth=ground_truth, nan_columns = column_to_impute, mu=mu_col, sigma=sigma_col)
# Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
mcmc.print_summary()

In [None]:
from numpyro.infer import Predictive

posterior = mcmc.get_samples()
ground_truth_pred = Predictive(model_ground_truth, posterior)(random.PRNGKey(1), **data,  nan_columns = [])["ground_truth"]
ground_truth_pred = ground_truth_pred.mean(axis=0)
# print("Accuracy:", (survived_pred == survived).sum() / survived.shape[0])

print(len(ground_truth_pred))
print(ground_truth_pred)
print(ground_truth)

In [None]:
from sklearn.metrics import mean_absolute_error

def m_mape(y_true, y_predict):
    n = len(y_true)
    At = np.array(y_true) + 1
    Ft = np.array(y_predict) + 1

    res = ((100/n)*(np.sum(np.abs((Ft-At)/At))))
    return res

print("MAPE : ", m_mape(ground_truth, ground_truth_pred))
print("Mean Absolute Error : ", mean_absolute_error(ground_truth, ground_truth_pred))

## Imputation by inference

In [9]:
def model_inference_wind_direction(
        latitude, longitude, altitude, wind_speed, temperature, humidity, dew_point, precipitations, mu=None, sigma=None, wind_direction=None,
        nan_columns = None, prec_nan=False
):
    lat, long, alt, w_s, temp, hum, d_pt, prec = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    default = numpyro.sample("default", dist.Normal(0.0, 0.2))
    if latitude is not None:
        bayes_latitude = numpyro.sample('bayes_latitude', dist.Normal(0, 1))
        lat = bayes_latitude * latitude
    if longitude is not None:
        bayes_longitude = numpyro.sample('bayes_longitude', dist.Normal(0, 1))
        long = bayes_longitude * longitude
    if altitude is not None:
        bayes_altitude = numpyro.sample('bayes_altitude', dist.Normal(0, 1))
        alt = bayes_altitude * altitude
    if wind_speed is not None:
            bayes_wind_speed = numpyro.sample('bayes_wind_speed', dist.Normal(0, 1))
            w_s = bayes_wind_speed * wind_speed
    if temperature is not None:
            bayes_temperature = numpyro.sample('bayes_temperature', dist.Normal(0, 1))
            temp = bayes_temperature * temperature
    if humidity is not None:
            bayes_humidity = numpyro.sample('bayes_humidity', dist.Normal(0, 1))
            hum = bayes_humidity * humidity
    if dew_point is not None:
            bayes_dew_point = numpyro.sample('bayes_dew_point', dist.Normal(0, 1))
            d_pt = bayes_dew_point * dew_point
    if precipitations is not None:
            bayes_precipitations = numpyro.sample('bayes_precipitations', dist.Normal(0, 1))
            prec = bayes_precipitations * precipitations

    sigma_model = numpyro.sample("sigma", dist.Exponential(1.0))
    if not prec_nan:
        mu_model = default + lat + long + alt + w_s + temp + hum + d_pt + prec
    else:
        mu_model = default + lat + long + alt + w_s + temp + hum + d_pt
    # print("sigma", sigma_model, "mu", mu_model)
    numpyro.sample("wind_direction", dist.Normal(mu_model, sigma_model), obs=wind_direction)

def model_inference_precipitation(
        latitude, longitude, altitude, wind_direction, wind_speed, temperature, humidity, dew_point, mu=None, sigma=None, precipitations=None,
        nan_columns = None
):
    lat, long, alt, w_d, w_s, temp, hum, d_pt = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    default = numpyro.sample("default", dist.Normal(0.0, 0.2))
    if latitude is not None:
        bayes_latitude = numpyro.sample('bayes_latitude', dist.Normal(0, 1))
        lat = bayes_latitude * latitude
    if longitude is not None:
        bayes_longitude = numpyro.sample('bayes_longitude', dist.Normal(0, 1))
        long = bayes_longitude * longitude
    if altitude is not None:
        bayes_altitude = numpyro.sample('bayes_altitude', dist.Normal(0, 1))
        alt = bayes_altitude * altitude
    if wind_direction is not None:
        bayes_wind_direction = numpyro.sample('bayes_wind_direction', dist.Normal(0, 1))
        w_d = bayes_wind_direction* wind_direction
    if wind_speed is not None:
        bayes_wind_speed = numpyro.sample('bayes_wind_speed', dist.Normal(0, 1))
        w_s = bayes_wind_speed * wind_speed
    if temperature is not None:
        bayes_temperature = numpyro.sample('bayes_temperature', dist.Normal(0, 1))
        temp = bayes_temperature * temperature
    if humidity is not None:
        bayes_humidity = numpyro.sample('bayes_humidity', dist.Normal(0, 1))
        hum = bayes_humidity * humidity
    if dew_point is not None:
        bayes_dew_point = numpyro.sample('bayes_dew_point', dist.Normal(0, 1))
        d_pt = bayes_dew_point * dew_point

    sigma_model = numpyro.sample("sigma", dist.Exponential(1.0))
    mu_model = default + lat + long + alt + w_d + w_s + temp + hum + d_pt
    # print("sigma", sigma_model, "mu", mu_model)
    numpyro.sample("precipitations", dist.Normal(mu_model, sigma_model), obs=precipitations)

#### Get data and simulate some NaNs
#### Separate the data with NaNs from the others and create the data to use to fit the wind_direction inference model

In [10]:
def get_data_imputation_wd(ratio = 0.01):
    dataset = pd.read_csv("../data/X_station_day.csv")
    del dataset['station_id']

    columns = ['latitude', 'longitude', 'altitude', 'wind_direction', 'wind_speed', 'temperature', 'humidity', 'dew_point', 'precipitations']
    print(columns)

    normalisation_infos = get_normalization_infos(dataset, columns=columns)
    dataset.loc[:, columns] = normalize(dataset.loc[:, columns], normalisation_infos)

    nanidx = rd.sample(range(0, dataset.shape[0]), int(ratio*dataset.shape[0]))

    true_wind_direction = dataset.iloc[nanidx]['wind_direction']

    dataset.loc[nanidx, 'wind_direction'] = np.nan * np.ones(shape=(int(ratio*dataset.shape[0]),1))

    print(dataset['wind_direction'].isna().sum())

    return dataset, nanidx, true_wind_direction

def prepare_data_imputation_wd(dataset, nanidx):
    # nanidx = np.array(np.isnan(dataset['wind_direction']).astype(int)).nonzero()[0]
    dataset_to_impute = dataset.iloc[nanidx]

    dataset = dataset.drop(index=nanidx)

    print(dataset['wind_direction'].isna().sum())

    wind_direction = dataset.wind_direction.values

    data = dict(
        latitude = dataset.latitude.values,
        longitude = dataset.longitude.values,
        altitude = dataset.altitude.values,
        wind_speed = dataset.wind_speed.values,
        temperature = dataset.temperature.values,
        humidity = dataset.humidity.values,
        dew_point = dataset.dew_point.values,
        precipitations = dataset.precipitations.values,
    )

    data_to_impute = dict(
        latitude = dataset_to_impute.latitude.values,
        longitude = dataset_to_impute.longitude.values,
        altitude = dataset_to_impute.altitude.values,
        wind_speed = dataset_to_impute.wind_speed.values,
        temperature = dataset_to_impute.temperature.values,
        humidity = dataset_to_impute.humidity.values,
        dew_point = dataset_to_impute.dew_point.values,
        precipitations = dataset_to_impute.precipitations.values,
    )

    return dataset, dataset_to_impute, data, data_to_impute, wind_direction

def get_data_imputation_prec(ratio = 0.01, dataset=None):
    if dataset is None:
        dataset = pd.read_csv("../data/X_station_day.csv")
        del dataset['station_id']

        columns = ['latitude', 'longitude', 'altitude', 'wind_direction', 'wind_speed', 'temperature', 'humidity', 'dew_point', 'precipitations']
        print(columns)

        normalisation_infos = get_normalization_infos(dataset, columns=columns)
        dataset.loc[:, columns] = normalize(dataset.loc[:, columns], normalisation_infos)

    nanidx = rd.sample(range(0, dataset.shape[0]), int(ratio*dataset.shape[0]))

    true_precipitations= dataset.iloc[nanidx]['precipitations']

    dataset.loc[nanidx, 'precipitations'] = np.nan * np.ones(shape=(int(ratio*dataset.shape[0]),1))

    print(dataset['precipitations'].isna().sum())

    return dataset, nanidx, true_precipitations

def prepare_data_imputation_prec(dataset, nanidx):
    # nanidx = np.array(np.isnan(dataset['wind_direction']).astype(int)).nonzero()[0]
    dataset_to_impute = dataset.iloc[nanidx]

    dataset = dataset.drop(index=nanidx)

    print(dataset['precipitations'].isna().sum())


    precipitations = dataset.precipitations.values

    data = dict(
        latitude = dataset.latitude.values,
        longitude = dataset.longitude.values,
        altitude = dataset.altitude.values,
        wind_direction = dataset.wind_direction.values,
        wind_speed = dataset.wind_speed.values,
        temperature = dataset.temperature.values,
        humidity = dataset.humidity.values,
        dew_point = dataset.dew_point.values,
    )

    data_to_impute = dict(
        latitude = dataset_to_impute.latitude.values,
        longitude = dataset_to_impute.longitude.values,
        altitude = dataset_to_impute.altitude.values,
        wind_direction = dataset_to_impute.wind_direction.values,
        wind_speed = dataset_to_impute.wind_speed.values,
        temperature = dataset_to_impute.temperature.values,
        humidity = dataset_to_impute.humidity.values,
        dew_point = dataset_to_impute.dew_point.values,
    )

    return dataset, dataset_to_impute, data, data_to_impute, precipitations

#### Fit the inference model and predict the NaNs values using MCMC (wind_direction)

In [None]:
ratios = [0.01, 0.1, 0.25, 0.5, 0.75, 0.85]

time_imputation = []
mape_imputation = []
mae_imputation = []
time_prediction = []
mape_prediction = []
mae_prediction = []

for ratio in ratios:
    print('ratio', ratio)
    dataset, nanidx, true_wind_direction = get_data_imputation_wd(ratio)
    dataset, dataset_to_impute, data, data_to_impute, wind_direction = prepare_data_imputation_wd(dataset, nanidx)

    # IMPUTATION

    start_time = time.time()
    mcmc_imputation = MCMC(NUTS(model_inference_wind_direction), num_warmup=1000, num_samples=1000)
    mcmc_imputation.run(random.PRNGKey(0), **data, wind_direction=wind_direction, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_imputation.print_summary()

    posterior_imputation = mcmc_imputation.get_samples()
    imputed_wind_direction = Predictive(model_inference_wind_direction, posterior_imputation)(random.PRNGKey(1), **data_to_impute,  nan_columns = [])["wind_direction"]
    imputed_wind_direction = imputed_wind_direction.mean(axis=0)
    time_imputation.append(time.time() - start_time)

    print("MAPE : ", m_mape(true_wind_direction, imputed_wind_direction))
    mape_imputation.append(m_mape(true_wind_direction, imputed_wind_direction))
    print("Mean Absolute Error : ", mean_absolute_error(true_wind_direction, imputed_wind_direction))
    mae_imputation.append(mean_absolute_error(true_wind_direction, imputed_wind_direction))


    # PREPARE DATA FOR PREDICTION

    dataset_to_impute['wind_direction'] = imputed_wind_direction
    dataset = pd.concat([dataset, dataset_to_impute], axis=0)
    ground_truth = dataset.ground_truth.values
    del dataset['ground_truth']

    x_train, x_test, y_train, y_test = train_test_split(dataset, ground_truth, test_size=0.33)

    # print(dataset['wind_direction'].isna().sum())

    data_train = dict(
        latitude = x_train.latitude.values,
        longitude = x_train.longitude.values,
        altitude = x_train.altitude.values,
        wind_direction = x_train.wind_direction.values,
        wind_speed = x_train.wind_speed.values,
        temperature = x_train.temperature.values,
        humidity = x_train.humidity.values,
        dew_point = x_train.dew_point.values,
        precipitations = x_train.precipitations.values,
    )

    data_test = dict(
        latitude = x_test.latitude.values,
        longitude = x_test.longitude.values,
        altitude = x_test.altitude.values,
        wind_direction = x_test.wind_direction.values,
        wind_speed = x_test.wind_speed.values,
        temperature = x_test.temperature.values,
        humidity = x_test.humidity.values,
        dew_point = x_test.dew_point.values,
        precipitations = x_test.precipitations.values,
    )

    mcmc_prediction = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
    mcmc_prediction.run(random.PRNGKey(0), **data_train, ground_truth=y_train, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_prediction.print_summary()

    posterior_prediction = mcmc_prediction.get_samples()
    ground_truth_pred = Predictive(model_ground_truth, posterior_prediction)(random.PRNGKey(1), **data_test,  nan_columns = [])["ground_truth"]
    ground_truth_pred = ground_truth_pred.mean(axis=0)

    print("MAPE : ", m_mape(y_test, ground_truth_pred))
    mape_prediction.append(m_mape(y_test, ground_truth_pred))
    print("Mean Absolute Error : ", mean_absolute_error(y_test, ground_truth_pred))
    mae_prediction.append(mean_absolute_error(y_test, ground_truth_pred))

In [None]:
print("time_imputation =", time_imputation)
print('mape_imputation = ', mape_imputation)
print('mae_imputation = ', mae_imputation)
print('time_prediction = ', time_prediction )
print('mape_prediction = ', mape_prediction)
print('mae_prediction = ', mae_prediction)

### Results for imputation on wind_direction (just train)

ratios = [0.01, 0.1, 0.2, 0.3, 0.5, 0.7]

time_imputation = [498.4595172405243, 535.9812779426575, 382.225394487381, 467.5977392196655, 207.57443261146545, 131.66128158569336]
mape_imputation =  [10.661021432629596, 10.66094987616098, 10.534959470297153, 10.601560210130817, 10.578586794722735, 10.578794819419919]
mae_imputation =  [0.15565755190667555, 0.15548198558043402, 0.15398647534098273, 0.15473225996057607, 0.15471727357235873, 0.15473207181873366]
time_prediction =  []
mape_prediction =  [115.09920384958139, 115.10145605712758, 115.22304792014107, 115.20514023106551, 115.29881327654412, 115.46439420994176]
mae_prediction =  [2.411045576079181, 2.411469252298278, 2.4130132029841653, 2.4120047693693074, 2.4130731608806273, 2.4157148375452886]

### Results for imputation on wind_direction (train and test)

ratios = [0.01, 0.1, 0.25, 0.5, 0.75, 0.85]
time_imputation = [760.7768249511719, 833.6295111179352, 331.03947353363037, 210.5356183052063, 100.60632395744324, 69.34340333938599]
mape_imputation =  [10.655264626915809, 10.499994185083757, 10.681917154662534, 10.638655587883045, 10.596149833552735, 10.596493616639869]
mae_imputation =  [0.15591639626159046, 0.153218699400214, 0.15589513935226562, 0.15519977132031357, 0.1550552133342025, 0.1549220600102269]
time_prediction =  []
mape_prediction =  [115.4349735877578, 116.48780259488963, 114.29392445601097, 115.2284456187796, 116.77062778921001, 115.70286894467233]
mae_prediction =  [2.4142372593398025, 2.4023796948936806, 2.4387429669468874, 2.409663312415465, 2.4045725457652907, 2.4021950186983516]


#### Fit the inference model and predict the NaNs values using MCMC (precipitation)

In [None]:
ratios = [0.01, 0.1, 0.25, 0.5, 0.75, 0.85]

time_imputation = []
mape_imputation = []
mae_imputation = []
time_prediction = []
mape_prediction = []
mae_prediction = []

for ratio in ratios:
    print('ratio', ratio)
    dataset, nanidx, true_precipitations = get_data_imputation_prec(ratio)
    dataset, dataset_to_impute, data, data_to_impute, precipitations = prepare_data_imputation_prec(dataset, nanidx)

    # IMPUTATION

    start_time = time.time()
    mcmc_imputation = MCMC(NUTS(model_inference_precipitation), num_warmup=1000, num_samples=1000)
    mcmc_imputation.run(random.PRNGKey(0), **data, precipitations=precipitations, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_imputation.print_summary()

    posterior_imputation = mcmc_imputation.get_samples()
    imputed_precipitation = Predictive(model_inference_precipitation, posterior_imputation)(random.PRNGKey(1), **data_to_impute,  nan_columns = [])["precipitations"]
    imputed_precipitation = imputed_precipitation.mean(axis=0)
    time_imputation.append(time.time() - start_time)

    print("MAPE : ", m_mape(true_precipitations, imputed_precipitation))
    mape_imputation.append(m_mape(true_precipitations, imputed_precipitation))
    print("Mean Absolute Error : ", mean_absolute_error(true_precipitations, imputed_precipitation))
    mae_imputation.append(mean_absolute_error(true_precipitations, imputed_precipitation))


    # PREPARE DATA FOR PREDICTION

    dataset_to_impute['precipitations'] = imputed_precipitation
    dataset = pd.concat([dataset, dataset_to_impute], axis=0)
    ground_truth = dataset.ground_truth.values
    del dataset['ground_truth']

    x_train, x_test, y_train, y_test = train_test_split(dataset, ground_truth, test_size=0.33)

    # print(dataset['wind_direction'].isna().sum())

    data_train = dict(
        latitude = x_train.latitude.values,
        longitude = x_train.longitude.values,
        altitude = x_train.altitude.values,
        wind_direction = x_train.wind_direction.values,
        wind_speed = x_train.wind_speed.values,
        temperature = x_train.temperature.values,
        humidity = x_train.humidity.values,
        dew_point = x_train.dew_point.values,
        precipitations = x_train.precipitations.values,
    )

    data_test = dict(
        latitude = x_test.latitude.values,
        longitude = x_test.longitude.values,
        altitude = x_test.altitude.values,
        wind_direction = x_test.wind_direction.values,
        wind_speed = x_test.wind_speed.values,
        temperature = x_test.temperature.values,
        humidity = x_test.humidity.values,
        dew_point = x_test.dew_point.values,
        precipitations = x_test.precipitations.values,
    )

    mcmc_prediction = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
    mcmc_prediction.run(random.PRNGKey(0), **data_train, ground_truth=y_train, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_prediction.print_summary()

    posterior_prediction = mcmc_prediction.get_samples()
    ground_truth_pred = Predictive(model_ground_truth, posterior_prediction)(random.PRNGKey(1), **data_test,  nan_columns = [])["ground_truth"]
    ground_truth_pred = ground_truth_pred.mean(axis=0)

    print("MAPE : ", m_mape(y_test, ground_truth_pred))
    mape_prediction.append(m_mape(y_test, ground_truth_pred))
    print("Mean Absolute Error : ", mean_absolute_error(y_test, ground_truth_pred))
    mae_prediction.append(mean_absolute_error(y_test, ground_truth_pred))

In [None]:
print("time_imputation =", time_imputation)
print('mape_imputation = ', mape_imputation)
print('mae_imputation = ', mae_imputation)
print('time_prediction = ', time_prediction )
print('mape_prediction = ', mape_prediction)
print('mae_prediction = ', mae_prediction)

### Results for imputation on precipitations (just train)


ratios = [0.01, 0.1, 0.25, 0.5, 0.75, 0.85]

time_imputation = [334.5759816169739, 220.93195962905884, 223.3536810874939, 218.35092449188232, 75.44321203231812, 58.480687856674194]
mape_imputation =  [2.69251744495105, 2.6656074188353105, 2.7072722875026463, 2.6965653750125296, 2.7166022716786093, 2.7161747234117986]
mae_imputation =  [0.02927843191081118, 0.02888885154555619, 0.029249887627131644, 0.02910631590598334, 0.02934373638911998, 0.02933613892276303]
time_prediction =  []
mape_prediction =  [115.14346397586787, 115.5460603302976, 116.49281661244719, 117.93400404409374, 119.23363228520006, 119.83836222691068]
mae_prediction =  [2.4117583647778793, 2.4172633556710967, 2.430496724987021, 2.4493258274049, 2.466964335067219, 2.4742455842166406]

### Results for imputation on precipitations (train and test)

time_imputation = [345.27573561668396, 278.56666231155396, 304.60888600349426, 165.8144268989563, 97.52951622009277, 48.52631092071533]
mape_imputation =  [2.579436331117188, 2.7146133812855884, 2.701725408799627, 2.6964253859506884, 2.7054057318343734, 2.70623604950047]
mae_imputation =  [0.027640181257082227, 0.029349886855223765, 0.029184162608646508, 0.029177849803427757, 0.029270931863838903, 0.029232264090179393]
time_prediction =  []
mape_prediction =  [114.88637682769046, 116.65396876746904, 118.48552655302022, 118.95122414676707, 119.73960195911792, 119.28144026832175]
mae_prediction =  [2.409019031646397, 2.4180447872986544, 2.422017444721502, 2.4446329493926515, 2.464761624828655, 2.4760137519946217]

### Imputation on precipitations AND wind_direction

In [7]:
def get_data_imputation_wd_prec(ratio = 0.01):
    dataset = pd.read_csv("../data/X_station_day.csv")
    del dataset['station_id']

    columns = ['latitude', 'longitude', 'altitude', 'wind_direction', 'wind_speed', 'temperature', 'humidity', 'dew_point', 'precipitations']
    print(columns)

    normalisation_infos = get_normalization_infos(dataset, columns=columns)
    dataset.loc[:, columns] = normalize(dataset.loc[:, columns], normalisation_infos)

    nanidx_wd = rd.sample(range(0, dataset.shape[0]), int(ratio*dataset.shape[0]))
    nanidx_prec = rd.sample(range(0, dataset.shape[0]), int(ratio*dataset.shape[0]))

    # print(nanidx_wd)
    # print(nanidx_prec)

    true_wind_direction = dataset.iloc[nanidx_wd]['wind_direction']
    true_precipitations = dataset.iloc[nanidx_prec]['precipitations']

    dataset.loc[nanidx_wd, 'wind_direction'] = np.nan * np.ones(shape=(int(ratio*dataset.shape[0]),1))
    dataset.loc[nanidx_prec, 'precipitations'] = np.nan * np.ones(shape=(int(ratio*dataset.shape[0]),1))

    # print(dataset['wind_direction'].isna().sum())
    # print(dataset['precipitations'].isna().sum())

    return dataset, nanidx_wd, nanidx_prec, true_wind_direction, true_precipitations



In [13]:
ratios = [0.75, 0.85] #[0.01, 0.1, 0.25, 0.5] # ,

mape_prediction = []
mae_prediction = []

for ratio in ratios:
    print('ratio', ratio)
    dataset, nanidx_wd, nanidx_prec, true_wind_direction, true_precipitations = get_data_imputation_wd_prec(ratio)

    print(len(nanidx_wd))

    print(dataset['wind_direction'].isna().sum())
    print(dataset['precipitations'].isna().sum())

    # Here the dataset contains the data with nan in columns wind_direction and precipitaions
    _, _, data, data_to_impute, wind_direction = prepare_data_imputation_wd(dataset, nanidx_wd)

# IMPUTATION WIND_DIRECTION

    mcmc_imputation = MCMC(NUTS(model_inference_wind_direction), num_warmup=1000, num_samples=1000)
    mcmc_imputation.run(random.PRNGKey(0), **data, wind_direction=wind_direction, nan_columns = [], mu=dict(), sigma=dict(), prec_nan=True)
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_imputation.print_summary()

    posterior_imputation = mcmc_imputation.get_samples()
    imputed_wind_direction = Predictive(model_inference_wind_direction, posterior_imputation)(random.PRNGKey(1), **data_to_impute,  nan_columns = [], prec_nan=True)["wind_direction"]
    imputed_wind_direction = imputed_wind_direction.mean(axis=0)

    # here we replace the nans in wind_direction by the predicted values
    # dataset_to_impute['wind_direction'] = imputed_wind_direction
    # dataset = pd.concat([dataset, dataset_to_impute], axis=0)


    dataset.loc[nanidx_wd, 'wind_direction'] = imputed_wind_direction

    # IMPUTATION PRECIPITATIONS

    dataset, dataset_to_impute, data, data_to_impute, precipitations = prepare_data_imputation_prec(dataset, nanidx_prec)

    mcmc_imputation = MCMC(NUTS(model_inference_precipitation), num_warmup=1000, num_samples=1000)
    mcmc_imputation.run(random.PRNGKey(0), **data, precipitations=precipitations, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_imputation.print_summary()

    posterior_imputation = mcmc_imputation.get_samples()
    imputed_precipitation = Predictive(model_inference_precipitation, posterior_imputation)(random.PRNGKey(1), **data_to_impute,  nan_columns = [])["precipitations"]
    imputed_precipitation = imputed_precipitation.mean(axis=0)

    # PREPARE DATA FOR PREDICTION

    dataset_to_impute['precipitations'] = imputed_precipitation
    dataset = pd.concat([dataset, dataset_to_impute], axis=0)
    ground_truth = dataset.ground_truth.values
    del dataset['ground_truth']

    print(dataset['wind_direction'].isna().sum())
    print(dataset['precipitations'].isna().sum())

    x_train, x_test, y_train, y_test = train_test_split(dataset, ground_truth, test_size=0.33)

    # print(dataset['wind_direction'].isna().sum())

    data_train = dict(
        latitude = x_train.latitude.values,
        longitude = x_train.longitude.values,
        altitude = x_train.altitude.values,
        wind_direction = x_train.wind_direction.values,
        wind_speed = x_train.wind_speed.values,
        temperature = x_train.temperature.values,
        humidity = x_train.humidity.values,
        dew_point = x_train.dew_point.values,
        precipitations = x_train.precipitations.values,
    )

    data_test = dict(
        latitude = x_test.latitude.values,
        longitude = x_test.longitude.values,
        altitude = x_test.altitude.values,
        wind_direction = x_test.wind_direction.values,
        wind_speed = x_test.wind_speed.values,
        temperature = x_test.temperature.values,
        humidity = x_test.humidity.values,
        dew_point = x_test.dew_point.values,
        precipitations = x_test.precipitations.values,
    )

    mcmc_prediction = MCMC(NUTS(model_ground_truth), num_warmup=1000, num_samples=1000)
    mcmc_prediction.run(random.PRNGKey(0), **data_train, ground_truth=y_train, nan_columns = [], mu=dict(), sigma=dict())
    # Print the statistics of posterior samples collected during running this MCMC instance. (documentation)
    mcmc_prediction.print_summary()

    posterior_prediction = mcmc_prediction.get_samples()
    ground_truth_pred = Predictive(model_ground_truth, posterior_prediction)(random.PRNGKey(1), **data_test,  nan_columns = [])["ground_truth"]
    ground_truth_pred = ground_truth_pred.mean(axis=0)

    print("MAPE : ", m_mape(y_test, ground_truth_pred))
    mape_prediction.append(m_mape(y_test, ground_truth_pred))
    print("Mean Absolute Error : ", mean_absolute_error(y_test, ground_truth_pred))
    mae_prediction.append(mean_absolute_error(y_test, ground_truth_pred))

ratio 0.75
['latitude', 'longitude', 'altitude', 'wind_direction', 'wind_speed', 'temperature', 'humidity', 'dew_point', 'precipitations']
61590
61590
61590
0


sample: 100%|██████████| 2000/2000 [02:54<00:00, 11.47it/s, 511 steps of size 1.01e-02. acc. prob=0.93] 



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.04      0.01      0.04      0.03      0.05    915.17      1.00
       bayes_dew_point      4.42      0.17      4.42      4.15      4.68    225.93      1.00
        bayes_humidity     -1.23      0.06     -1.23     -1.32     -1.13    234.89      1.00
        bayes_latitude      0.01      0.01      0.01     -0.00      0.02    832.19      1.00
       bayes_longitude      0.01      0.01      0.01      0.00      0.02    718.80      1.01
  bayes_precipitations     -0.05      0.99     -0.06     -1.76      1.44    651.88      1.00
     bayes_temperature     -3.44      0.14     -3.45     -3.66     -3.21    226.85      1.00
      bayes_wind_speed      0.47      0.02      0.47      0.45      0.50    704.33      1.00
               default      0.53      0.02      0.53      0.49      0.57    293.30      1.00
                 sigma      0.19      0.00      0.19      0.19      0

sample: 100%|██████████| 2000/2000 [02:00<00:00, 16.58it/s, 255 steps of size 8.72e-03. acc. prob=0.94]



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.01      0.00      0.01      0.00      0.01   1398.46      1.00
       bayes_dew_point     -0.49      0.04     -0.49     -0.57     -0.43    125.41      1.00
        bayes_humidity      0.30      0.02      0.30      0.27      0.32    127.44      1.00
        bayes_latitude     -0.01      0.00     -0.01     -0.01     -0.01   1287.27      1.00
       bayes_longitude      0.01      0.00      0.01      0.01      0.01   1180.47      1.00
     bayes_temperature      0.44      0.04      0.44      0.39      0.50    125.11      1.00
  bayes_wind_direction      0.02      0.00      0.02      0.01      0.02    575.96      1.00
      bayes_wind_speed      0.19      0.00      0.19      0.18      0.19    561.28      1.00
               default     -0.17      0.01     -0.17     -0.18     -0.16    146.48      1.00
                 sigma      0.05      0.00      0.05      0.05      0

sample: 100%|██████████| 2000/2000 [03:27<00:00,  9.62it/s, 63 steps of size 3.44e-02. acc. prob=0.94] 



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.52      0.08      0.52      0.39      0.65   1060.42      1.00
       bayes_dew_point      0.61      0.64      0.60     -0.43      1.65    407.20      1.00
        bayes_humidity      0.75      0.23      0.73      0.34      1.09    398.50      1.00
        bayes_latitude      0.11      0.08      0.11     -0.02      0.24   1182.87      1.00
       bayes_longitude     -0.22      0.07     -0.22     -0.33     -0.09    837.53      1.00
  bayes_precipitations     10.40      0.56     10.42      9.52     11.31    916.65      1.00
     bayes_temperature     -1.08      0.53     -1.07     -1.88     -0.18    406.27      1.00
  bayes_wind_direction      0.98      0.18      0.98      0.68      1.26    760.09      1.00
      bayes_wind_speed      5.15      0.23      5.16      4.76      5.52    611.98      1.00
               default     -0.12      0.14     -0.12     -0.35      0

sample: 100%|██████████| 2000/2000 [01:39<00:00, 20.18it/s, 511 steps of size 7.81e-03. acc. prob=0.96] 



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.04      0.01      0.04      0.03      0.05   1059.76      1.00
       bayes_dew_point      3.92      0.22      3.92      3.51      4.23    329.16      1.01
        bayes_humidity     -1.03      0.08     -1.04     -1.16     -0.89    327.82      1.01
        bayes_latitude      0.02      0.01      0.02      0.00      0.03   1258.09      1.00
       bayes_longitude      0.03      0.01      0.03      0.01      0.04    930.76      1.00
  bayes_precipitations     -0.02      0.98     -0.03     -1.61      1.55   1040.26      1.00
     bayes_temperature     -3.02      0.19     -3.02     -3.28     -2.66    327.99      1.01
      bayes_wind_speed      0.44      0.02      0.44      0.40      0.47   1039.59      1.00
               default      0.46      0.03      0.46      0.40      0.50    371.50      1.01
                 sigma      0.19      0.00      0.19      0.19      0

sample: 100%|██████████| 2000/2000 [01:29<00:00, 22.26it/s, 255 steps of size 8.09e-03. acc. prob=0.94] 



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.01      0.00      0.01      0.00      0.01    926.95      1.00
       bayes_dew_point     -0.45      0.06     -0.45     -0.53     -0.35    307.64      1.00
        bayes_humidity      0.27      0.02      0.27      0.23      0.30    302.68      1.00
        bayes_latitude     -0.01      0.00     -0.01     -0.01     -0.01   1032.38      1.00
       bayes_longitude      0.01      0.00      0.01      0.00      0.01   1068.69      1.00
     bayes_temperature      0.40      0.05      0.40      0.31      0.47    306.81      1.00
  bayes_wind_direction      0.02      0.01      0.02      0.01      0.03    568.12      1.00
      bayes_wind_speed      0.18      0.01      0.18      0.17      0.19    612.72      1.00
               default     -0.16      0.01     -0.16     -0.17     -0.15    322.31      1.00
                 sigma      0.05      0.00      0.05      0.05      0

sample: 100%|██████████| 2000/2000 [04:18<00:00,  7.74it/s, 127 steps of size 3.39e-02. acc. prob=0.94]



                            mean       std    median      5.0%     95.0%     n_eff     r_hat
        bayes_altitude      0.50      0.08      0.50      0.37      0.63    973.05      1.00
       bayes_dew_point      0.64      0.66      0.65     -0.42      1.67    343.60      1.00
        bayes_humidity      0.88      0.25      0.87      0.46      1.25    330.68      1.00
        bayes_latitude     -0.02      0.08     -0.02     -0.16      0.10   1041.08      1.00
       bayes_longitude     -0.16      0.07     -0.16     -0.27     -0.03    787.50      1.00
  bayes_precipitations      9.53      0.65      9.54      8.54     10.64    845.12      1.00
     bayes_temperature     -1.03      0.54     -1.04     -1.85     -0.11    348.47      1.00
  bayes_wind_direction      0.68      0.22      0.68      0.30      1.02    805.39      1.00
      bayes_wind_speed      5.31      0.25      5.32      4.91      5.73    395.92      1.00
               default     -0.10      0.14     -0.10     -0.32      0

In [36]:
    len(imputed_wind_direction)

41060

In [14]:
print('mape_prediction = ', mape_prediction)
print('mae_prediction = ', mae_prediction)

mape_prediction =  [121.0717496822495, 120.95708945598557]
mae_prediction =  [2.4638451731754847, 2.478234124218667]
