## Compare demographics

This notebook joins Census Tracts to entire service area and identified poor serivce areas, then aggregates demographics across each area and compares the two.

You will need to first load Census data with [`Get Census data`](Get%20Census%20data.ipynb)

And you will need to to collect Citi Bike data, then, run [`Build dataset`](../Build%20dataset.ipynb), [`Build service measures`](Build%20service%20measures.ipynb), and [`Compute clusters of poor service`](Compute%20clusters%20of%20poor%20service.ipynb).


Or, use this as a template for comparing demographics of any other serivce areas. 


In [1]:
import pandas as pd
import geopandas as gpd
import altair as alt

In [2]:
tracts = (
    gpd.read_file('../tract_data.geojson')
    .to_crs(2263)
)

In [3]:
stations_geo = (
    gpd.read_file('../stations_geo.geojson')
    .to_crs(2263)
)

In [4]:
poor_service_areas_buffer = (
    gpd.read_file('../poor_service_areas_buffer.geojson')
    .to_crs(2263)
)

In [5]:
service_area = gpd.GeoDataFrame(
    geometry=[(
        stations_geo.geometry
        .buffer(500)
        .unary_union
    )],
    crs=2263
)

In [6]:
tracts_in_service_area = (
    tracts
    .sjoin(service_area)
)

In [7]:
tracts_in_poor_service_areas_buffer = (
    tracts
    .sjoin(
        poor_service_areas_buffer
    )
)

In [8]:
tracts_in_service_area['Total population__2020'].sum()

4672870.0

In [9]:
tracts_in_poor_service_areas_buffer['Total population__2020'].sum()

942156.0

In [10]:
comparison_demog = (
    tracts_in_service_area
    [[
        'White__2020', 
        'Black__2020', 
        'Asian__2020', 
        'Hispanic__2020',
        'below_150_pct_poverty_level__ACS_2021'
    ]]
    .sum()
    .div(
        tracts_in_service_area
        ['Total population__2020']
        .sum()
    )
    .rename('entire service area')
    .to_frame()
    .join(
        tracts_in_poor_service_areas_buffer
        [[
            'White__2020', 
            'Black__2020', 
            'Asian__2020', 
            'Hispanic__2020',
            'below_150_pct_poverty_level__ACS_2021'
        ]]
        .sum()
        .div(
            tracts_in_poor_service_areas_buffer
            ['Total population__2020']
            .sum()
        )
        .rename('poor service areas')
    )
    .reset_index()
    .melt(
        id_vars='index',
        var_name='area',
        value_name='pct of population'
    )
    .replace({
        'White__2020':'White (%)', 
        'Black__2020':'Black (%)', 
        'Asian__2020':'Asian (%)', 
        'Hispanic__2020':'Hispanic/Latino (%)',
        'below_150_pct_poverty_level__ACS_2021':r'up to 150% of poverty level (%)'
    })
    .rename(columns={'index':'race/demographic'})
)

In [11]:
comparison_demog_chart = ((
    (
    comparison_demog
    [
        comparison_demog['race/demographic'] != r'up to 150% of poverty level (%)'
    ]
    )
    .pipe(alt.Chart)
    .mark_bar()
    .encode(
        y=alt.Y(
            'race/demographic',
            title='Race/ethnicity',
            axis=alt.Axis(
                titlePadding=20,
                titleX=-160
                # titleAlign='center',
                # titleBaseline='top'
            )
        ),
        x=alt.X(
            'pct of population',
            title='',
            axis=alt.Axis(
                format='%',
                labelFlush=False
            )
        ),
        color=alt.Color(
            'area',
        ),
        yOffset='area',
    )
) & (
    (
    comparison_demog
    [
        comparison_demog['race/demographic'] == r'up to 150% of poverty level (%)'
    ]
    )
    .pipe(alt.Chart)
    .mark_bar()
    .encode(
        y=alt.Y(
            'race/demographic',
            title='Poverty',
            axis=alt.Axis(
                titlePadding=20,
                titleX=-160,
                labelLimit=10000,
            )
        ),
        x=alt.X(
            'pct of population',
            title='Percent of population',
            axis=alt.Axis(
                format='%',
                labelFlush=False
            )
        ),
        color=alt.Color(
            'area',
        ),
        yOffset='area',
    )
)).resolve_scale(x='shared')

comparison_demog_chart