# Create Features for edges (h2n) and nodes (N)

In [57]:
import pandas as pd

l_h2n = [[0.3,0.7], [0.2,0.6], [0.1,0.5], [0.4,0.4], [0.5,0.8]]

h2n = pd.DataFrame(l_h2n, index=['Buffalo','Hand','Hyde','Lyman','Faulk'], columns=['covid_rate','social_grade'])

l_N = [[0.3,0.8,0.4], [0.3,0.8,0.4], [0.3,0.8,0.4]]

N = pd.DataFrame(l_N, index=['Buffalo','Hand','Hyde'], columns=['medical_preparedness','county_health','Social_status'])


# Probability formulas for P(h2n) and P(N)

#### Starting with the simplistic view of multiplying likelihood

In [58]:
h2n['ph2n'] = h2n['covid_rate'] * h2n['social_grade']
N['pN'] = N['medical_preparedness'] * N['county_health'] * N['Social_status']

# Defining the list of neighbours 

In [17]:
l_neighbours = {'Buffalo':['Hand','Hyde','Lyman'], 'Hand':['Faulk','Hyde','Buffalo'], 'Hyde':['Faulk','Hand','Buffalo']}
neighbours = pd.DataFrame.from_dict(l_neighbours)

# Collecting the P(H-N) probabilites of neighbours spreading covid

In [19]:
h2n_dict = h2n.to_dict()
neighboursprob = neighbours.copy()
for county in neighbours.columns:
    newcol = county + 'ph2n'
    neighboursprob[newcol] = neighbours[county].map(h2n_dict['ph2n'])

In [20]:
neighboursprob

Unnamed: 0,Buffalo,Hand,Hyde,Buffaloph2n,Handph2n,Hydeph2n
0,Hand,Faulk,Faulk,0.12,0.4,0.4
1,Hyde,Hyde,Hand,0.05,0.05,0.12
2,Lyman,Buffalo,Buffalo,0.16,0.21,0.21


# Probability of a county getting covid
#### P(n) = P(N) * P(H-N)
#### P(N) = sum(P(N.medical_preparedness), P(N.county_health), P(N.social_status)

In [62]:
pn = {}
for county in neighbours.columns:
    countyph2n = county + "ph2n"
    ph2nsum = neighboursprob[countyph2n].sum()
    print(county)
    pn[county] = ph2nsum * N.at["Buffalo","pN"]
    

Buffalo
Hand
Hyde


# We sort pn and find the counties with the highest probabilites

In [71]:
pd.DataFrame.from_dict(pn, orient='index').sort_values(0, ascending=False)

Unnamed: 0,0
Hyde,0.07008
Hand,0.06336
Buffalo,0.03168
