## Week 4 Coding Tasks

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd

Taking the code from the past two weeks.

In [None]:
physicians = pd.read_csv('../data/primary_care_physicians.csv')

physicians = physicians[physicians['state'] == 'Tennessee']

population = pd.read_csv('../data/population_by_county.csv')

physicians = pd.merge(left = physicians[['FIPS', 'county', 'primary_care_physicians']],
        right = population[['FIPS', 'population', 'urban']])

physicians['residents_per_pcp'] = physicians['population'] / physicians['primary_care_physicians']

physicians.loc[physicians['residents_per_pcp'] < 1500, 'shadac_category'] = 'adequate'
physicians.loc[(physicians['residents_per_pcp'] >= 1500) & 
       (physicians['residents_per_pcp'] < 3500), 'shadac_category'] = 'moderately inadequate'
physicians.loc[(physicians['residents_per_pcp'] >= 3500), 'shadac_category'] = 'low inadequate'

unemployment = pd.read_csv('../data/tn_unemployment.csv')

unemployment['Name'] = unemployment['Name'].str.split(' County', expand = True).loc[:,0]

physicians = pd.merge(left = physicians,
        right = unemployment[['Name', 'unemployment_rate']].rename(columns = {'Name': 'county'}))

physicians['pcp_per_100k'] = physicians['primary_care_physicians'] / physicians['population'] * 100000

In [None]:
counties = gpd.read_file('../data/county/tncounty.shp')

In [None]:
counties.head(2)

In [None]:
physicians.head(2)

In [None]:
counties = pd.merge(left = counties,
        right = physicians.rename(columns = {'county': 'NAME'}))

 Coding tasks:
  1. Create a choropleth showing the unemployment rate per county for each Tennessee county.

In [None]:
fig, ax = plt.subplots(figsize=(16,4))

counties.plot(column = 'unemployment_rate', 
              edgecolor = 'black', 
              legend = True,
              cmap = 'Blues',
              scheme="NaturalBreaks",
              ax = ax)

# Position the legend
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.5))

# Add a title
plt.title('Unemployment Rate by County, 2018', fontsize = 18)

ax.axis('off');

More advanced manipulation of the legend.

In [None]:
from matplotlib.lines import Line2D

fig, ax = plt.subplots(figsize=(16,4))

counties.plot(column = 'unemployment_rate', 
              edgecolor = 'black', 
              legend = True,
              cmap = 'Blues',
              scheme="NaturalBreaks",
              ax = ax)

# Position the legend
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.5))

# Add a title
plt.title('Unemployment Rate by County, 2018', fontsize = 18)

ax.axis('off')

leg = ax.get_legend()

# ADVANCED: Adjusting format of legend
labels = []
n = len(leg.get_texts())
for i, lbl in enumerate(leg.get_texts()):
    label_text = lbl.get_text()
    lower = float(label_text.split()[0][:-1])
    upper = float(label_text.split()[1][:-1])
    if i == 0:
        new_text = "Below " + "{:,.2f}".format(upper + .01)
    elif i == n - 1:
        new_text = "Above " + "{:,.2f}".format(lower)
    else:
        new_text = "{:,.2f}".format(lower + .01) + " - " + "{:,.2f}".format(upper)
        
    labels.append(new_text)
    
markers = []
for line in leg.get_lines():
    marker = Line2D([0],[0], marker = 'o', 
                    markersize = line.get_markersize(), 
                    color = line.get_markerfacecolor(),
                    linestyle = 'None',
                    markeredgecolor = 'black',
                    markeredgewidth = 1)
    markers.append(marker)
    
plt.legend(markers, labels, fontsize = 12)
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.5));

  2. Create a choropleth showing the SHADAC category for each county (adequate, low inadequate, and moderately inadequate).

In [None]:
fig, ax = plt.subplots(figsize=(16,4))

counties.plot(column = 'shadac_category', 
              edgecolor = 'black', 
              legend = True,
              ax = ax)

# Position the legend
leg = ax.get_legend()
leg.set_bbox_to_anchor((1, 0.5))

# Add a title
plt.title('SHADAC Category by County, 2018', fontsize = 18)

ax.axis('off');

Warning: Tricky Code

Ordering the categories here is much harder, but can be pulled off following https://stackoverflow.com/questions/54370302/changing-the-order-of-entries-for-a-geopandas-choropleth-map-legend:

In [None]:
shad_to_int = {'adequate': 1,
               'moderately inadequate': 2,
               'low inadequate': 3}

int_to_shad = {'1': 'adequate',
               '2': 'moderately inadequate',
               '3': 'low inadequate'}

counties['shadac_num'] = counties['shadac_category'].map(shad_to_int)

fig, ax = plt.subplots(figsize=(16,4))

counties.plot(column = 'shadac_num',
              categorical = True,
              edgecolor = 'black', 
              legend = True,
              ax = ax)

# Position the legend
leg = ax.get_legend()
for txt in leg.get_texts():
    txt.set_text(int_to_shad.get(txt.get_text()))

leg.set_bbox_to_anchor((1, 0.5))

# Add a title
plt.title('SHADAC Category by County, 2018', fontsize = 18)

ax.axis('off');