In [17]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az

# a
df = pd.read_csv('/content/drive/MyDrive/Prices.csv') # am atasat fisierul in drive

price = df['Price'].values
speed = df['Speed'].values
log_hd = np.log(df['HardDrive'].values)

price_mean = price.mean()
price_std = price.std()
price_s = (price - price_mean) / price_std

speed_mean = speed.mean()
speed_std = speed.std()
speed_s = (speed - speed_mean) / speed_std

log_hd_mean = log_hd.mean()
log_hd_std = log_hd.std()
log_hd_s = (log_hd - log_hd_mean) / log_hd_std

X_data = np.stack([speed_s, log_hd_s], axis=1)

with pm.Model() as model_lab11:
    alpha = pm.Normal('alpha', mu=0, sigma=10)
    betas = pm.Normal('betas', mu=0, sigma=10, shape=2)
    sigma = pm.HalfCauchy('sigma', 5)

    mu = alpha + pm.math.dot(X_data, betas)

    y_pred = pm.Normal('y_pred', mu=mu, sigma=sigma, observed=price_s)

    idata = pm.sample(2000, tune=2000, return_inferencedata=True, progressbar=False)


# b
summary = az.summary(idata, var_names=['betas'], hdi_prob=0.95)
print(summary[['mean', 'hdi_2.5%', 'hdi_97.5%']])


# c

b0_hdi = summary.loc['betas[0]', ['hdi_2.5%', 'hdi_97.5%']].values
b1_hdi = summary.loc['betas[1]', ['hdi_2.5%', 'hdi_97.5%']].values

print(f"1. Frecventa (Speed): Interval {b0_hdi}. Contine 0? {'DA' if b0_hdi[0] < 0 < b0_hdi[1] else 'NU'}.")
print(f"   -> Predictor {'inutil' if b0_hdi[0] < 0 < b0_hdi[1] else 'UTIL'}.")

print(f"2. HardDrive (Log):   Interval {b1_hdi}. Contine 0? {'DA' if b1_hdi[0] < 0 < b1_hdi[1] else 'NU'}.")
print(f"   -> Predictor {'inutil' if b1_hdi[0] < 0 < b1_hdi[1] else 'UTIL'}.")


# d
new_speed = 33
new_hd = 540
new_log_hd = np.log(new_hd)


val_speed_s = (new_speed - speed_mean) / speed_std
val_log_hd_s = (new_log_hd - log_hd_mean) / log_hd_std


post = idata.posterior.stack(samples={"chain", "draw"})
alpha_samples = post['alpha'].values

beta0_samples = post['betas'].sel(betas_dim_0=0).values
beta1_samples = post['betas'].sel(betas_dim_0=1).values


mu_s_dist = alpha_samples + beta0_samples * val_speed_s + beta1_samples * val_log_hd_s


mu_final_dist = mu_s_dist * price_std + price_mean

hdi_mu = az.hdi(mu_final_dist, hdi_prob=0.90)
print(f"\nMu (pret mediu) interval HDI 90%: [{hdi_mu[0]:.2f}, {hdi_mu[1]:.2f}]")


# e
sigma_samples = post['sigma'].values

y_s_dist = np.random.normal(loc=mu_s_dist, scale=sigma_samples)

y_final_dist = y_s_dist * price_std + price_mean

hdi_y = az.hdi(y_final_dist, hdi_prob=0.90)
print(f"Predictie interval HDI 90%: [{hdi_y[0]:.2f}, {hdi_y[1]:.2f}]")

           mean  hdi_2.5%  hdi_97.5%
betas[0]  0.148     0.060      0.228
betas[1]  0.384     0.292      0.464
1. Frecventa (Speed): Interval [0.06  0.228]. Contine 0? NU.
   -> Predictor UTIL.
2. HardDrive (Log):   Interval [0.292 0.464]. Contine 0? NU.
   -> Predictor UTIL.

Mu (pret mediu) interval HDI 90%: [2252.52, 2384.54]
Predictie interval HDI 90%: [1428.30, 3089.62]
