# Create Features for neighbour edges (H-N) and nodes (N)

### Currently we use neighbour (H) as simplified features for (H-N)
### We need to prepare the dataframes of features below:

In [83]:
import pandas as pd

l_h2n = [[0.3,0.7], [0.2,0.6], [0.1,0.5], [0.4,0.4], [0.5,0.8]]

h2n = pd.DataFrame(l_h2n, index=['Buffalo','Hand','Hyde','Lyman','Faulk'], columns=['covid_rate','social_grade'])

l_N = [[0.3,0.8,0.4], [0.3,0.8,0.4], [0.3,0.8,0.4]]

N = pd.DataFrame(l_N, index=['Buffalo','Hand','Hyde'], columns=['medical_preparedness','county_health','Social_status'])


# Probability formulas for P(H-N)  and P(N)

#### Starting with the simplistic view of multiplying likelihood

In [58]:
h2n['ph2n'] = h2n['covid_rate'] * h2n['social_grade'] ##tune here
N['pN'] = N['medical_preparedness'] * N['county_health'] * N['Social_status'] ##tune here

# Defining the list of neighbours 
### We need to create a dictionary of neighbours for each county below:

In [85]:
l_neighbours = {'Buffalo':['Hand','Hyde','Lyman'], 'Hand':['Faulk','Hyde','Buffalo'], 'Hyde':['Faulk','Hand','Buffalo']}
neighbours = pd.DataFrame.from_dict(l_neighbours)


# Collecting the P(H-N) probabilites of neighbours spreading covid

In [19]:
h2n_dict = h2n.to_dict()
neighboursprob = neighbours.copy()
for county in neighbours.columns:
    newcol = county + 'ph2n'
    neighboursprob[newcol] = neighbours[county].map(h2n_dict['ph2n'])

# Probability of a county getting covid
#### P(n) = P(N) * P(H-N)
#### P(N) = sum(P(N.medical_preparedness), P(N.county_health), P(N.social_status)

In [78]:
pn = {}
for county in neighbours.columns:
    countyph2n = county + "ph2n"
    ph2nsum = neighboursprob[countyph2n].sum() ##tune here
    print(county)
    pn[county] = ph2nsum * N.at[county,"pN"] ##tune here
    

Buffalo
Hand
Hyde


# We sort pn and find the counties with the highest probabilites
### E.g. If we used features for Feb 2020, these probabilites would be the prediction for Mar 2020

In [80]:
pd.DataFrame.from_dict(pn, orient='index').sort_values(0, ascending=False)

Unnamed: 0,0
Hyde,0.07008
Hand,0.06336
Buffalo,0.03168


# We need to get some ground truth to compare with

### The above is for example features from Feb 2020, we will need to compare the output with Mar 2020 counties covid count