# Background

The census tract for the South End begins just on the SE side of Columbus Avenue, as shown in this map:

![Map of the South End](south_end.png)

Let's explore the demographics of the South End.

# Setup

In [None]:
import pandas as pd
import altair as alt
from altair import datum

# Loading Data

In [None]:
df = pd.read_csv('south_end.csv')
df

In [None]:
df[['Category','Subcategory']]

In [None]:
df[['Category','Subcategory']].drop_duplicates()

# Creating Visualizations

## Age Distribution by Decade

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q'
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color='Subcategory:N'
).transform_filter(
    (datum.Category == 'Age')
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory', title='Age Range'),
).transform_filter(
    (datum.Category == 'Age')
).properties(
    title='Age Distribution by Decade'
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title='Age Range', sort='descending'),
    order=alt.Order(
        'Subcategory:N',
        sort='ascending'
    )
).transform_filter(
    (datum.Category == 'Age')
).properties(
    title='Age Distribution by Decade'
)

In [None]:
alt.Chart(df).mark_line().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title='Age Range', sort='descending'),
    order=alt.Order(
        'Subcategory:N',
        sort='ascending'
    )
).transform_filter(
    (datum.Category == 'Age')
).properties(
    title='Age Distribution by Decade'
)

In [None]:
alt.Chart(df).mark_area().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title='Age Range', sort='descending'),
    order=alt.Order(
        'Subcategory:N',
        sort='ascending'
    )
).transform_filter(
    (datum.Category == 'Age')
).properties(
    title='Age Distribution by Decade'
)

## Educational Attainment by Decade

In [None]:
eduField = 'Educational Attainment (age 25+)'

In [None]:

alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title=eduField, sort='descending'),
).transform_filter(
    (datum.Category == eduField)
).properties(
    title='Educational Attainment Distribution by Decade'
)

In [None]:
eduSortOrder = [
        "Bachelor's Degree or Higher",
        "Some College or Associate's Degree",
        'High School or GED',
        'less than High School'
        ]

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title=eduField, sort=eduSortOrder),
).transform_filter(
    (datum.Category == eduField)
).properties(
    title='Educational Attainment Distribution by Decade'
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title=eduField, sort=eduSortOrder),
    column=alt.Column('Subcategory:N', title=eduField)
).transform_filter(
    (datum.Category == eduField)
).properties(
    title='Educational Attainment Distribution by Decade'
)

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title=eduField, sort=eduSortOrder),
    column=alt.Column('Subcategory:N', title=eduField, sort=eduSortOrder[::-1]),
).transform_filter(
    (datum.Category == eduField)
).properties(
    title='Educational Attainment Distribution by Decade'
)

## Race by Decade

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y='Count:Q',
    color=alt.Color('Subcategory:N', title='Race')
).transform_filter(
    (datum.Category == 'Race/ Ethnicity')
).properties(
    title='Race Distribution by Decade'
)

In [None]:
chart_race = alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y=alt.Y('Count:Q', stack='normalize', title='Demographic Split'),
    color=alt.Color('Subcategory:N', title='Race')
).transform_filter(
    (datum.Category == 'Race/ Ethnicity')
).properties(
    title='Race Distribution by Decade'
)

## Types of Occupancy by Decade

In [None]:
alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y=alt.Y('Count:Q', stack='normalize', title='Occupancy Split'),
    color=alt.Color('Subcategory:N', title='Type of Occupancy')
).transform_filter(
    (datum.Category == 'Housing Tenure') & 
    (datum.Subcategory != 'Occupied Housing Units')
).properties(
    title='Types of Occupancy Distribution by Decade'
)

# Saving Your Results

In [None]:
chart_occupancy = alt.Chart(df).mark_bar().encode(
    x='Decade:O',
    y=alt.Y('Count:Q', stack='normalize', title='Occupancy Split'),
    color=alt.Color('Subcategory:N', title='Type of Occupancy')
).transform_filter(
    (datum.Category == 'Housing Tenure') & 
    (datum.Subcategory != 'Occupied Housing Units')
).properties(
    title='Types of Occupancy Distribution by Decade'
)
chart_occupancy

## Saving to a web page...

In [None]:
chart_occupancy.save('chart_occupancy.html', embed_options={'renderer':'svg'})

## Saving an SVG, PNG, Vega Editor...

Here is an example infographic you could create by exporting an SVG and editing in Illustrator:

![South End Occupancy Infographic](south_end_occupancy_infographic.png)

In Illustrator:
* Open the SVG.
* Use the magic wand tool to select the owner series & the legend.
* Edit->Edit Colors->Saturate and reduce saturation.
* Add the `apartment_building.svg` and scale to ~20%