# Checking Results

In [1]:
import pandas as pd
from pyhere import here

path = {
    "ddpub": here("data", "derived", "public", "version1"),
    "og_out": here("data", "raw", "public", "spielman", "output")
}

In [2]:
def check_it(file, rounder = False):
    '''
    Given a file name, this function finds the corresponding file provided by Spielman et al and the file produced
    by our code and returns the number of matches for each column.    
    '''
    global rpl
    global og
    global test
    
    rpl = pd.read_csv( here(path["ddpub"], file) )
    og = pd.read_csv( here(path["og_out"], file) )
    og = og.rename(columns = {"Geo_FIPS": "GEOID"})
    
    if "sovi" in rpl.columns:
        rpl["sovi"] = rpl["sovi"].round(2)
        og["sovi"] = og["sovi"].round(2)
    
    if "Unnamed: 0" in rpl.columns:
        rpl.index = rpl["Unnamed: 0"]
        rpl = rpl.drop(columns = ["Unnamed: 0"])
        
    if "Unnamed: 0" in og.columns:
        og.index = og["Unnamed: 0"]
        og = og.drop(columns = ["Unnamed: 0"])
        
    if og.columns[0] != rpl.columns[0]:
        og = og.reindex(sorted(og.columns), axis = 1)
        rpl = rpl.reindex(sorted(rpl.columns), axis = 1)
    
    if rounder != False:
        og = og.round(rounder)
        rpl = rpl.round(rounder)
        
    test = rpl.eq(og)
    
    if test.sum().eq(len(rpl)).sum() == len(test.sum()):
        return print("All values match!")
    else:
        return test.sum()

### US SoVI Scores & Rankings

In [3]:
check_it('US_Sovi_Score.csv')

GEOID    3143
sovi     3143
rank     3141
dtype: int64

In [4]:
merged = og.merge(rpl, how = "inner", on = "GEOID")
merged.loc[~test["rank"]]

Unnamed: 0,GEOID,sovi_x,rank_x,sovi_y,rank_y
1150,g22075,-4.53,2984.0,-4.53,2983.0
3120,g56001,-4.53,2983.0,-4.53,2984.0


It looks like two SoVI values were very close together, switching ranks.

### FEMA Region SoVI Scores & Rankings

In [5]:
# could add some code to make NaN equal in the test function
# could also write some code to say how many records discarding bc SoVI Null
# If sovis NaN, then discard

In [6]:
check_it('FEMA_Region_Sovi_Score.csv')

GEOID          3143
sovi           3109
rank           3109
fema_region    3109
dtype: int64

In [7]:
merged = og.merge(rpl, how = "inner", on = "GEOID")
merged.loc[~test["rank"] | ~test["sovi"] | ~test["fema_region"]]#.head()

Unnamed: 0,GEOID,sovi_x,rank_x,fema_region_x,sovi_y,rank_y,fema_region_y
67,g02013,,,,,,
68,g02016,,,,,,
69,g02020,,,,,,
70,g02050,,,,,,
71,g02060,,,,,,
72,g02068,,,,,,
73,g02070,,,,,,
74,g02090,,,,,,
75,g02100,,,,,,
76,g02105,,,,,,


Why is there missing data in both my data and theirs?
These rows represent all counties and county equivalents in:
- 15: Hawaii
- 02: Alaska

Checking their code, when they define the states in FEMA region IX, they do not include HI, and when they define the states in FEMA region X, they do not include AK.

### State SoVI Scores & Rankings

In [8]:
check_it('State_Sovi_Score.csv')

All values match!


### County in State Rank

In [9]:
check_it("County_in_State_Rank.csv")

All values match!


### Variable Contributions

In [10]:
check_it("variable_contributions.csv", rounder = 3)

All values match!


### State FEMA US Rank Correlations

***REPORTED IN SPIELMAN ET AL'S PAPER AS TABLE 2*** 

One value is actually reported 0.65 instead of 0.68 in their paper. All others match their paper.

In [11]:
check_it("state_fema_us_rank_correlations.csv", rounder = 14)

All values match!
