### Geopandas Example

Choropleth and points

In [None]:
import pandas as pd
import requests
import os
import matplotlib.pyplot as plt
import geopandas
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
path = '/Users/Sarah/Documents/GitHub/Sarah-Discussion-Notebooks/Data'
filename = 'College_Graduation_or_More.xlsx'
chi_comm_area_boundries = os.path.join('Boundaries - Community Areas (current)','geo_export_8d7b31fe-6ff9-40e2-b9c6-91236f0af785.shp')
year = '2012-2016'
os.chdir(path)
        
def read_data(path, filename):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(path, filename))
    elif filename.endswith('.xls') or filename.endswith('.xlsx'):
        df = pd.read_excel(os.path.join(path, filename))
    else:
        print('unexpected file type in read_data')
    
    return df


def parse_chi_health_atlas_df(df, date_range):
    df = df[df['Year'] == date_range]
    df = df[['Geo_Group', 'Geo_ID', 'Percent']]
    
    return df

  
df = read_data(path, filename)
df = parse_chi_health_atlas_df(df, year)

In [None]:
df.head()

In [None]:
df_shp = geopandas.read_file(chi_comm_area_boundries)
df_shp.plot();
# Cite: https://geopandas.org/mapping.html

In [None]:
df_shp.head(3)

In [None]:
use_df = df.merge(df_shp, left_on = 'Geo_ID', right_on = 'area_numbe', how = 'inner')

In [None]:
df_shp.dtypes

In [None]:
df.dtypes

In [None]:
df_shp['area_num_1'] = df_shp['area_num_1'].astype(int)
#Cite: https://datatofish.com/string-to-integer-dataframe/
df_shp.dtypes

In [None]:
df_shp.head(3)

In [None]:
use_df = df.merge(df_shp, left_on = 'Geo_ID', right_on = 'area_num_1', how = 'inner')

In [None]:
use_df.head(3)

In [None]:
use_df.plot();

In [None]:
use_gdf = geopandas.GeoDataFrame(use_df)

In [None]:
use_gdf.plot();

In [None]:
use_gdf.plot(column = 'Percent');

In [None]:
fig, ax = plt.subplots(figsize=(8,8))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

ax = use_gdf.plot(ax=ax, column='Percent', legend=True, cax=cax)

ax.axis('off')
ax.set_title('Percent of Community Area Residents with a College Degree');

In [None]:
fig, ax = plt.subplots(figsize=(8,8))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

ax = use_gdf.plot(ax=ax, column='Percent', cmap = 'YlGn', edgecolor='grey',
                  legend=True, cax=cax)

ax.axis('off')
ax.set_title('Percent of Community Area Residents with a College Degree');

# Cite: https://geopandas.org/mapping.html
# Cite: https://matplotlib.org/tutorials/colors/colormaps.html

In [None]:
uni_df = pd.DataFrame(
        {'University': ['University of Chicago', 'Loyola University', 'University of Illinois -Chicago'],
         'Longitude': [-87.599724, -87.6578, -87.6496],
         'Latitude': [41.789722, 41.9999, 41.8696]})

In [None]:
uni_gdf = geopandas.GeoDataFrame(uni_df, 
                                 geometry=geopandas.points_from_xy(uni_df['Longitude'], uni_df['Latitude'])) 

In [None]:
uni_gdf.plot()

In [None]:
print(uni_gdf.crs)

In [None]:
df_shp.crs

In [None]:
uni_gdf = geopandas.GeoDataFrame(uni_df, 
                                 geometry=geopandas.points_from_xy(uni_df['Longitude'], uni_df['Latitude']), 
                                 crs = 'epsg:4326')
# Cite: https://geopandas.org/projections.html

In [None]:
fig, ax = plt.subplots(figsize=(8,8))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

ax = use_gdf.plot(ax=ax, column='Percent', cmap = 'YlGn', edgecolor='grey',
                  legend=True, cax=cax)


uni_gdf.plot(ax=ax, color='midnightblue')


ax.axis('off')
ax.set_title('Percent of Community Area Residents with a College Degree')

# Cite: https://geopandas.org/mapping.html
# Cite: https://matplotlib.org/tutorials/colors/colormaps.html
# Cite: https://matplotlib.org/3.1.0/gallery/color/named_colors.html

In [None]:
fig, ax = plt.subplots(figsize=(8,8))


ax = uni_gdf.plot(ax=ax, color='midnightblue')


x = 'Longitude'
y = 'Latitude'

for i, txt in enumerate(uni_gdf['University']):
    #print(i, txt)
    #print(uni_gdf[x][i], uni_gdf[y][i])
    ax.annotate(txt, (uni_gdf[x][i], uni_gdf[y][i]))

# Cite: https://stackoverflow.com/questions/14432557/matplotlib-scatter-plot-with-different-text-at-each-data-point

In [None]:
fig, ax = plt.subplots(figsize=(8,8))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.5)

ax = use_gdf.plot(ax=ax, column='Percent', cmap = 'YlGn', edgecolor='grey',
                  legend=True, cax=cax)


ax = uni_gdf.plot(ax=ax, color='midnightblue')


for i, txt in enumerate(uni_gdf['University']):
    #print(i, txt)
    #print(uni_gdf[x][i], uni_gdf[y][i])
    ax.annotate(txt, (uni_gdf['Longitude'][i], uni_gdf['Latitude'][i]))


ax.axis('off')
ax.set_title('Percent of Community Area Residents with a College Degree')

# Cite: https://geopandas.org/mapping.html
# Cite: https://matplotlib.org/tutorials/colors/colormaps.html
# Cite: https://matplotlib.org/3.1.0/gallery/color/named_colors.html
# Cite: https://stackoverflow.com/questions/14432557/matplotlib-scatter-plot-with-different-text-at-each-data-point

### Let's make it a function

In [None]:
def choropleth_and_point(gdf, pt_gdf, choro_color, pt_color, boundry_color, title = ''):
    '''
    Input is 
    gdf with boundry polygons (community areas) and data for polygon color 
    pt_gdf with point coordinates and names to be annotated
    choro_color: matplotlib colorpallet for the choropleth
    pt_color: matplotlib single color for the points
    boundry_color: matplotlib single color for the polygon boundries (community areas)
    '''
    fig, ax = plt.subplots(figsize=(8,8))

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.5)

    ax = gdf.plot(ax=ax, column='Percent', cmap = choro_color, 
                  edgecolor= boundry_color, legend=True, cax=cax)


    ax = pt_gdf.plot(ax=ax, color=pt_color)


    for i, txt in enumerate(pt_gdf['University']):
        ax.annotate(txt, (pt_gdf['Longitude'][i], pt_gdf['Latitude'][i]))


    ax.axis('off')
    ax.set_title(title)

    # Cite: https://geopandas.org/mapping.html
    # Cite: https://matplotlib.org/tutorials/colors/colormaps.html
    # Cite: https://matplotlib.org/3.1.0/gallery/color/named_colors.html
    # Cite: https://stackoverflow.com/questions/14432557/matplotlib-scatter-plot-with-different-text-at-each-data-point



In [None]:
choropleth_and_point(use_gdf, uni_gdf, 'YlGn', 'midnightblue', 'grey', 'Percent of Community Area Residents with a College Degree')

In [None]:
choropleth_and_point(use_gdf, uni_gdf, 'inferno', 'black', 'maroon')

In [None]:
choropleth_and_point(use_gdf, uni_gdf, 'inferno_r', 'whitesmoke', 'maroon')
# Cite: https://stackoverflow.com/questions/3279560/reverse-colormap-in-matplotlib 

### Let's look at a new dataset

In [None]:
filename = 'Household_poverty.xlsx'
df = read_data(path, filename)
df = parse_chi_health_atlas_df(df, year)
use_df = df.merge(df_shp, left_on = 'Geo_ID', right_on = 'area_num_1', how = 'inner')
use_gdf = geopandas.GeoDataFrame(use_df)
choropleth_and_point(use_gdf, uni_gdf, 'YlGn', 'midnightblue', 'grey')

In [None]:
filenames = [('College_Graduation_or_More.xlsx','YlGn', 'Percent of Community Area Residents with a College Degree'), 
             ('Household_poverty.xlsx', 'PuBu', 'Percent of Households in Poverty')]

In [None]:
def do_the_thing(path, filename, color, title):
    df = read_data(path, filename)
    df = parse_chi_health_atlas_df(df, year)
    use_df = df.merge(df_shp, left_on = 'Geo_ID', right_on = 'area_num_1', how = 'inner')
    use_gdf = geopandas.GeoDataFrame(use_df)
    choropleth_and_point(use_gdf, uni_gdf, color, 'midnightblue', 'grey', title)

for filename, color, title in filenames:
    do_the_thing(path, filename, color, title)

### More to do:
* Write a function for parseing the shapefile (or put this step in another function
* Clean up my full script, removing intermediary output and bundling into functions where stil needed
* Maybe find a dataframe with all of the universities in Chicago and plot them all
* Change my title to be more descriptive
* Give my choropleth ledged a title
* Maybe clean up my annotation (e.g. better spacing)
* etc