In [94]:
import pyro
import pyro.distributions as dist
from pyro.infer import Importance, EmpiricalMarginal
from statistics import mean
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.multicomp
pyro.set_rng_seed(101)

In [95]:
df = pd.read_csv("data/test.csv")

```r
dag1 <- model2network("[Neigh][BPB][Zest|BPB:Neigh][Rent|BPB:Neigh][ROI|Zest:Rent]")
dag2 <- model2network("[latitude][longitude][BPB][Zest|BPB:latitude:longitude][Rent|BPB:latitude:longitude][ROI|Zest:Rent]")
dag3 <- model2network("[latlng][Type][BPB|Type][Zest|BPB:latlng:Type][Rent|BPB:latlng][ROI|Zest:Rent]")
dag4 <- model2network("[latlng][BPB][Zest|BPB:latlng][Rent|BPB:latlng][ROI|Zest:Rent]")
dag5 <- model2network("[Neigh][BPB][Zest|BPB:Neigh][Rent|BPB][ROI|Zest:Rent]")
```

# Building Models and CPTs for Nodes

Taken from [this](https://pythonfordatascience.org/anova-2-way-n-way/) tutorial

- Grab standard error and use as Nrent

In [118]:
rent_model = ols('Rent ~ C(BPB)', df).fit()

# Seeing if the overall model is significant
print(f"Overall model F({rent_model.df_model: .0f},{rent_model.df_resid: .0f}) = {rent_model.fvalue: .3f}, p = {rent_model.f_pvalue: .4f}")


Overall model F( 4, 200) =  13.359, p =  0.0000


In [120]:
rent_model.predict(pd.DataFrame({'BPB':[4]}))

0    196.4375
dtype: float64

In [121]:
res = sm.stats.anova_lm(rent_model, typ= 2)
res

Unnamed: 0,sum_sq,df,F,PR(>F)
C(BPB),242221.823818,4.0,13.359283,1.148343e-09
Residual,906567.424962,200.0,,


- Do OLS model for Zestimates
- Get Neighborhood Marginal
- Get BPB marginal
- Get ROI function in the code

In [122]:
# zest_lr_dict = {}
# for bpb in df.BPB.unique():
#     subset = df[df.BPB == bpb]
#     lr = LinearRegression()
#     lr.fit(subset[['latlng']], subset.Zest)
#     zest_lr_dict[bpb] = lr
#     print(r2_score(subset.Zest, lr.predict(subset[['latlng']])), subset.shape[0])
    
# print("\n\n")
# rent_lr_dict = {}

# for bpb in df.BPB.unique():
#     subset = df[df.BPB == bpb]
#     lr = LinearRegression()
#     lr.fit(subset[['latlng']], subset.Rent)
#     rent_lr_dict[bpb] = lr
#     print(r2_score(subset.Rent, lr.predict(subset[['latlng']])), subset.shape[0])

In [None]:
exogenous_dists = {
    'Nlatlng': dist.Bernoulli(torch.tensor(.5)),
    'Ny': dist.Bernoulli(torch.tensor(.5)),
    'Nz': dist.Bernoulli(torch.tensor(.5))
}

zest_lr = LinearRegression()
zest_lr.fit(df[['BPB', 'latlng']], df.Zest)


rent_lr = LinearRegression()
rent_lr.fit(df[['BPB', 'latlng']], df.Rent)

def model(exogenous_dists):
    Nx = pyro.sample('Nlatlng', exogenous_dists['Nlatlng'])
    Ny = pyro.sample('Ny', exogenous_dists['Ny'])
    Nz = pyro.sample('Nz', exogenous_dists['Nz'])
    
    X = pyro.sample('X', dist.Delta(Nx))
    Y = pyro.sample('Y', dist.Delta(Ny))
    
    Nz = dist.Normal(sd_zest_lr)
    
    Zest = pyro.sample('Zest', dist.Delta( + Nz))
    
    or_part = min(X+Y, torch.tensor(1))
    and_part = (X * Y)
    
    Z = pyro.sample('Z', dist.Delta(Nz * or_part + (1 - Nz) * and_part))
    return X, Y, Z