# Visualizations

In [1]:
# Import modules, define directories

import pandas as pd
from pyhere import here
from scipy.stats import rankdata
import geopandas as gpd
from matplotlib.colors import ListedColormap    
import matplotlib.pyplot as plt

path = {
    "dscr": here("data", "scratch"),
    "drpub": here("data", "raw", "public"),
    "drpriv": here("data", "raw", "private"),
    "ddpub": here("data", "derived", "public", "version1"),
    "ddpriv": here("data", "derived", "private"),
    "rfig": here("results", "figures"),
    "roth": here("results", "other"),
    "rtab": here("results", "tables")
}

## Figure 2

I can't find any code to create this figure in their repo. There is definitely some code to process the data though.

Note: the input to making this figure matched in both my results and their provided data. But if I need to be doing something with dropping observations, I have not done that.

In [2]:
input_names = [['MEDAGE_ACS', 'pos', 'person', 'Median Age'],
               ['BLACK_ACS', 'pos', 'person', 'Pop African-American (%)'],
               ['QNATAM_ACS', 'pos', 'person', 'Pop Native American (%)'],
               ['QASIAN_ACS', 'pos', 'person', 'Pop Asian (%)'],
               ['QHISP_ACS', 'pos', 'person', 'Pop Hispanic (%)'],
               ['QAGEDEP_ACS', 'pos', 'person', 'Age Dependency (%)'],
               ['QPUNIT_ACS', 'pos', 'person', 'Persons Per Housing Unit'],
               ['PRENTER_ACS', 'pos', 'hu', 'Rental Housing (%)'],
               ['QNRRES_ACS', 'pos', 'person', 'Nursing Home Residents (%)'],
               ['QFEMALE_ACS', 'pos', 'person', 'Pop Female (%)'],
               ['QFHH_ACS', 'pos', 'hu', 'Female-Headed Households (%)'],
               ['QUNOCCHU_ACS', 'pos', 'hu', 'Vacant Housing (%)'],
               ['PERCAP_ALT', 'neg', 'person', 'Per-Capita Income'],
               ['QESL_ALT', 'pos', 'person', 'English as Second Language (%)'],
               ['QCVLUN', 'pos', 'person', 'Unemployment (%)'],
               ['QPOVTY', 'pos', 'person', 'Poverty (%)'],
               ['QMOHO', 'pos', 'hu', 'Mobile Homes (%)'],
               ['QED12LES_ALT', 'pos', 'person',
                   'Adults Completed <Grade 12 (%)'],
               ['QFEMLBR', 'pos', 'person', 'Female Employment (%)'],
               ['QEXTRCT_ALT', 'pos', 'person',
                   'Extractive Sector Employment (%)'],
               ['QSERV_ALT', 'pos', 'person', 'Service Sector Employment (%)'],
               ['QSSBEN', 'pos', 'hu', 'Social Security Income (%)'],
               ['QNOAUTO_ALT', 'pos', 'hu', 'No Automobile (%)'],
               ['QFAM', 'neg', 'person', 'Children in Married Families (%)'],
               ['QRICH200K', 'neg', 'hu', 'Annual Income >$200K (%)'],
               ['MDGRENT_ALT', 'neg', 'hu', 'Median Rent'],
               ['MHSEVAL_ALT', 'neg', 'hu', 'Median Home Value'],
               ['POPDENS', 'pos', 'person', 'Population Density']]

# attr_names = [j[0] for j in input_names] + ['GEOID']

In [3]:
varContrib = pd.read_csv( here(path["ddpub"], "variable_contributions.csv") )

varContrib.index = varContrib["Unnamed: 0"]
varContrib = varContrib.drop(columns = ["Unnamed: 0"])

In [4]:
# Determine all reversals from expected sign
reversals = varContrib.copy()
for i in range(len(varContrib.index)):
    # Iterate over the variables
    if varContrib.index[i] != input_names[i][0]:
        print("ERROR")
        break
    else:
        # Iterate over the different SoVI runs
        for j in range(len(varContrib.columns)):
            if input_names[i][1] == "pos" and varContrib.iloc[i, j] > 0:
                reversals.iloc[i, j] = 0
            elif input_names[i][1] == "neg" and varContrib.iloc[i, j] > 0:
                reversals.iloc[i, j] = 1
            elif input_names[i][1] == "neg" and varContrib.iloc[i, j] < 0:
                reversals.iloc[i, j] = 0
            elif input_names[i][1] == "pos" and varContrib.iloc[i, j] < 0:
                reversals.iloc[i, j] = 1
            else:
                print("WATCH OUT THERE'S A ZERO BRO")

In [5]:
reversals

Unnamed: 0_level_0,USA,FEMA_1,FEMA_2,FEMA_3,FEMA_4,FEMA_5,FEMA_6,FEMA_7,FEMA_8,FEMA_9,...,g23g33g25,g36,g51,g13,g17,g48,g29,g46,g06,g16
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MEDAGE_ACS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
BLACK_ACS,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
QNATAM_ACS,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0
QASIAN_ACS,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
QHISP_ACS,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
QAGEDEP_ACS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
QPUNIT_ACS,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
PRENTER_ACS,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,...,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
QNRRES_ACS,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
QFEMALE_ACS,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0


In [6]:
rankContrib = abs(varContrib).apply(rankdata, axis=0, method='average')
rankContrib = (28-rankContrib) + 1

In [7]:
rankContrib = rankContrib.sort_values("USA", ascending = True).reset_index()
rankContrib.index = rankContrib["Unnamed: 0"]
rankContrib = rankContrib.drop(columns = ["Unnamed: 0"])

In [8]:
summary_stats = pd.DataFrame( {"min": rankContrib.min(axis = 1),
                               "max": rankContrib.max(axis = 1),
                               "range": rankContrib.max(axis = 1) - rankContrib.min(axis = 1),
                               "avg": rankContrib.mean(axis = 1)
                              } )

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  summary_stats = pd.DataFrame( {"min": np.int(rankContrib.min(axis = 1)),


TypeError: cannot convert the series to <class 'int'>

In [None]:
def pos_neg(x):
    if x > 0:
        return "pos"
    else:
        return "neg"

usa = varContrib["USA"].apply(pos_neg)

In [None]:
reversal_sum = pd.DataFrame( {"reversals": reversals.sum(axis = 1),
                           "expected": [j[1] for j in input_names]} )

In [None]:
summary_stats = summary_stats.merge(reversal_sum, left_index = True, right_index = True)
summary_stats = summary_stats.merge(usa, left_index = True, right_index = True)
summary_stats

The above table should match up to Figure 2, but there are a number of discrepancies in the reversals column.

## Figure 1

Their caption indicates HI is included in the FEMA region, but I don't think they actually include it in their analysis.

I can't find any code to create this map in their repo, so I'm writing my own.

Note: all 3 files read in here match in my analysis and their provided data

In [None]:
# Read files
counties = gpd.read_file( here(path["ddpub"], "counties.gpkg") )
counties["GEOID"] = "g" + counties["GEOID"]
# USA = pd.read_csv( here(path["ddpub"], "US_Sovi_Score.csv") ).rename( columns={"sovi": "sovi_USA"} )
# FEMA = pd.read_csv( here(path["ddpub"], "FEMA_Region_Sovi_Score.csv") ).rename( columns={"sovi": "sovi_FEMA"} )
# CA = pd.read_csv( here(path["ddpub"], "State_Sovi_Score.csv") ).rename( columns={"sovi": "sovi_CA"} )

######
rank = pd.read_csv( here(path["ddpub"], 'County_in_State_Rank.csv') )
rank = rank.loc[rank['GEOID'].str.contains('g06')]
counties_rank = counties.merge(rank, on = "GEOID", how = "inner")

In [None]:
mycolor = ListedColormap('#DBDBDB')

In [None]:
# Create overarching plot
fig, ax = plt.subplots(1, 4, figsize=(20, 8))

fig.tight_layout()

ax[0].axis('off')
ax[1].axis('off')
ax[2].axis('off')
ax[3].axis('off');

# Create CA rank map

top5_CA = counties_rank.loc[counties_rank["state_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["state_sovi_rank"] > 53]
ax[0].set_title("California Analysis")
counties_rank.plot(ax = ax[0], cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax[0], column = "state_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax[0].annotate(text=round(x['state_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );
bottom5_CA.plot(ax = ax[0], column = "state_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax[0].annotate(text=round(x['state_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );


# Create FEMA rank map
top5_CA = counties_rank.loc[counties_rank["fema_region_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["fema_region_sovi_rank"] > 53]
ax[1].set_title("FEMA Region IX Analysis")
counties_rank.plot(ax = ax[1], cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax[1], column = "fema_region_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax[1].annotate(text=round(x['fema_region_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );
bottom5_CA.plot(ax = ax[1], column = "fema_region_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax[1].annotate(text=round(x['fema_region_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );


# Create USA rank map
top5_CA = counties_rank.loc[counties_rank["us_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["us_sovi_rank"] > 53]
ax[2].set_title("United States Analysis")
counties_rank.plot(ax = ax[2], cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax[2], column = "us_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax[2].annotate(text=round(x['us_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );
bottom5_CA.plot(ax = ax[2], column = "us_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax[2].annotate(text=round(x['us_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );



# Create range rank map
ax[3].set_title("Range of SoVI Rankings")
counties_rank["min_rank"] = counties_rank[["state_sovi_rank", "fema_region_sovi_rank", "us_sovi_rank"]].min(axis = 1)
counties_rank["max_rank"] = counties_rank[["state_sovi_rank", "fema_region_sovi_rank", "us_sovi_rank"]].max(axis = 1)
counties_rank["range_rank"] = counties_rank["max_rank"] - counties_rank["min_rank"]
counties_rank.plot(ax = ax[3], column = "range_rank", cmap = "Reds", edgecolor = 'black', linewidth = .1, scheme="User_Defined", 
         legend=True, classification_kwds=dict(bins=[5,15,25,35,45]));

In [None]:
?ax.annotate

In [None]:
import matplotlib.patheffects as pe

In [None]:
top5_CA = counties_rank.loc[counties_rank["state_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["state_sovi_rank"] > 53]
fig, ax = plt.subplots(figsize=(6, 6))
counties_rank.plot(ax = ax, cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax, column = "state_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax.text(s=round(x['state_sovi_rank']), color = 'black', x=x.geometry.centroid.coords[0][0], y=x.geometry.centroid.coords[0][1], ha='center', path_effects=[pe.withStroke(linewidth=1.5, foreground="white")]), axis=1, );
bottom5_CA.plot(ax = ax, column = "state_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax.text(s=round(x['state_sovi_rank']), color = 'black', x=x.geometry.centroid.coords[0][0], y=x.geometry.centroid.coords[0][1], ha='center',  path_effects=[pe.withStroke(linewidth=1.5, foreground="white")]), axis=1, );

Is 3 in a different place here?

In [None]:
top5_CA = counties_rank.loc[counties_rank["fema_region_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["fema_region_sovi_rank"] > 53]
fig, ax = plt.subplots(figsize=(6, 6))
counties_rank.plot(ax = ax, cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax, column = "fema_region_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax.annotate(text=round(x['fema_region_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );
bottom5_CA.plot(ax = ax, column = "fema_region_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax.annotate(text=round(x['fema_region_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );

Why did 4 and 5 switch places here?

In [None]:
top5_CA = counties_rank.loc[counties_rank["us_sovi_rank"] < 6]
bottom5_CA = counties_rank.loc[counties_rank["us_sovi_rank"] > 53]
fig, ax = plt.subplots(figsize=(6, 6))
counties_rank.plot(ax = ax, cmap = mycolor, edgecolor = 'black', linewidth = .1)
top5_CA.plot(ax = ax, column = "us_sovi_rank", cmap = "Reds_r")
top5_CA.apply(lambda x: ax.annotate(text=round(x['us_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );
bottom5_CA.plot(ax = ax, column = "us_sovi_rank", cmap = "Blues");
bottom5_CA.apply(lambda x: ax.annotate(text=round(x['us_sovi_rank']), xy=x.geometry.centroid.coords[0], ha='center'), axis=1, );

In [None]:
counties_rank["min_rank"] = counties_rank[["state_sovi_rank", "fema_region_sovi_rank", "us_sovi_rank"]].min(axis = 1)
counties_rank["max_rank"] = counties_rank[["state_sovi_rank", "fema_region_sovi_rank", "us_sovi_rank"]].max(axis = 1)
counties_rank["range_rank"] = counties_rank["max_rank"] - counties_rank["min_rank"]
fig, ax = plt.subplots(figsize=(6, 6))
counties_rank.plot(ax = ax, column = "range_rank", cmap = "Reds", edgecolor = 'black', linewidth = .1, scheme="User_Defined", 
         legend=True, classification_kwds=dict(bins=[5,15,25,35,45]));