Subpunctul 1.

In [14]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

data = pd.read_csv('Prices.csv')

y = data['Price'].values
x1 = data['Speed'].values
x2 = np.log(data['HardDrive'].values)


with pm.Model() as model:

    alpha = pm.Normal('alpha', mu=0, sigma=10)
    beta1 = pm.Normal('beta1', mu=0, sigma=10)
    beta2 = pm.Normal('beta2', mu=0, sigma=10)
    sigma = pm.HalfNormal('sigma', sigma=10)
    
    mu = alpha + beta1 * x1 + beta2 * x2
    
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)

    trace = pm.sample(2000, tune=1000, return_inferencedata=False, random_seed=42)
#az.plot_posterior(trace, var_names=["alpha", "sigma"])

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta1, beta2, sigma]


KeyboardInterrupt: 

Subpunctul 2:

In [2]:


hdi_beta1 = az.hdi(trace['beta1'], hdi_prob=0.95)
hdi_beta2 = az.hdi(trace['beta2'], hdi_prob=0.95)

print(f"95% HDI pentru β1: {hdi_beta1}")
print(f"95% HDI pentru β2: {hdi_beta2}")


95% HDI pentru β1: [14.26506869 16.86433837]
95% HDI pentru β2: [212.65275376 237.09261483]


Subpunctul 3:

Aceste intervale pentru β1 si β2 nu conțin valoarea 0, ceea ce sugerează că frecvența procesorului și mărimea hard diskului sunt predictori semnificativi pentru prețul de vânzare al unui PC.

Subpunctul 4:

In [None]:
x1_new = 33
x2_new = np.log(540)

#valuarea medie μ pentru noile date
mu_new = trace['alpha'] + trace['beta1'] * x1_new + trace['beta2'] * x2_new

#5000 de extrageri
mu_simulated = np.random.choice(mu_new, size=5000)

#intervalul HDI de 90% pentru prețul estimat
hdi_mu = az.hdi(mu_simulated, hdi_prob=0.90)
print(f"Intervalul HDI de 90% pentru pretul estimat: {hdi_mu}")

Intervalul HDI de 90% pentru pretul estimat: [1934.15935016 2000.47891783]


Subpunctul 5:

In [10]:

y_simulated = np.random.normal(mu_new[0:5000], np.random.choice(trace['sigma'], size=5000))

#calcularea HDI
hdi_y = az.hdi(y_simulated, hdi_prob=0.90)
print(f"Intervalul HDI de 90% pentru prețul predictiv: {hdi_y}")

Intervalul HDI de 90% pentru prețul predictiv: [1465.76321468 2513.63270917]


Subiectul 6:

In [12]:

y = data['Price'].values
x1 = data['Speed'].values

x2 = np.log(data['HardDrive'].values)
x3 = [1 if value == 'yes' else 0 for value in data['Premium']]
print(x3)

with pm.Model() as model:

    alpha = pm.Normal('alpha', mu=0, sigma=10)
    beta1 = pm.Normal('beta1', mu=0, sigma=10)
    beta2 = pm.Normal('beta2', mu=0, sigma=10)
    beta3 = pm.Normal('beta3', mu=0, sigma=10)
    sigma = pm.HalfNormal('sigma', sigma=10)
    
    mu = alpha + beta1 * x1 + beta2 * x2 + beta3 * x3
    
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)

    trace = pm.sample(2000, tune=1000, return_inferencedata=False, random_seed=42)
#az.plot_posterior(trace, var_names=["alpha", "sigma"])

[1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta1, beta2, beta3, sigma]


Output()

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 22 seconds.


In [13]:
hdi_beta3 = az.hdi(trace['beta3'], hdi_prob=0.95)
print(f"95% HDI pentru β3: {hdi_beta3}")

95% HDI pentru β3: [ 0.62862604 38.24413715]


LABORATOR:


In [29]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

data = pd.read_csv('real_estate_data.csv')

y = data['Price'].values
x1 = data['Surface_area'].values
x2=  data['Rooms'].values
x3 = data['Distance_to_center'].values
y = (y- np.mean(y)) / np.std(y)
x1 = (x1 - np.mean(x1)) / np.std(x1)
x2 = (x2 - np.mean(x2)) / np.std(x2)
x3 = (x3 - np.mean(x3)) / np.std(x3)
with pm.Model() as model:

    alpha = pm.Normal('alpha', mu=0, sigma=10)
    beta1 = pm.Normal('beta1', mu=0, sigma=10)
    beta2 = pm.Normal('beta2', mu=0, sigma=10)
    beta3 = pm.Normal('beta3', mu=0, sigma=10)
    sigma = pm.HalfNormal('sigma', sigma=10)
    
    mu = alpha + beta1 * x1 + beta2 * x2 + beta3*x3 
    
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)

    trace = pm.sample(2000, tune=1000, return_inferencedata=False, random_seed=42)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta1, beta2, beta3, sigma]


Output()

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 16 seconds.


In [30]:
hdi_beta1 = az.hdi(trace['beta1'], hdi_prob=0.95)
hdi_beta2 = az.hdi(trace['beta2'], hdi_prob=0.95)
hdi_beta3 = az.hdi(trace['beta3'], hdi_prob=0.95)

print("95% HDI for β1", hdi_beta1)
print("95% HDI for β2", hdi_beta2)
print("95% HDI for β3", hdi_beta3)

95% HDI for β1 [0.97837004 1.00820876]
95% HDI for β2 [0.1324751  0.16189656]
95% HDI for β3 [-0.13304985 -0.10312754]


In [31]:

mean_x1, std_x1 = np.mean(np.log(data['Surface_area'].values)), np.std(np.log(data['Surface_area'].values))
mean_x2, std_x2 = np.mean(data['Rooms'].values), np.std(data['Rooms'].values)
mean_x3, std_x3 = np.mean(np.log(data['Distance_to_center'].values)), np.std(np.log(data['Distance_to_center'].values))

beta1_mean_original = np.mean(trace['beta1']) / std_x1
beta2_mean_original = np.mean(trace['beta2']) / std_x2
beta3_mean_original = np.mean(trace['beta3']) / std_x3

alpha_mean_original = np.mean(trace['alpha']) - (
    beta1_mean_original * mean_x1 +
    beta2_mean_original * mean_x2 +
    beta3_mean_original * mean_x3
)

print("β1 denormalizat:", beta1_mean_original)
print("β2 denormalizat:", beta2_mean_original)
print("β3 denormalizat:", beta3_mean_original)
print("y denormalizat α:", alpha_mean_original)


β1 denormalizat: 3.1060379890435392
β2 denormalizat: 0.12691156598029507
β3 denormalizat: -0.16167896278946334
y denormalizat α: -14.132732778876738
