In [105]:
import pandas as pd
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [177]:
source = pd.read_csv("MBTA_Line_and_Stop.csv")
source = source.drop(['FID', 'mode', 'direction_id', 'day_type_id', 'day_type_name'], axis=1)
fall_2019 = source.loc[source['season'] == 'Fall 2019']
fall_2019 = fall_2019.loc[fall_2019['time_period_id'] != 'time_period_10']
fall_2019 = fall_2019.loc[fall_2019['time_period_id'] != 'time_period_11']


In [178]:
brush = alt.selection(type="single", fields=['route_name'])

line_chart = alt.Chart(fall_2019, title="Average Onboarding of Line by Time Period").mark_line().encode(
    x='time_period_id:O',
    color=alt.condition(brush, alt.Color('route_name',  type='nominal', scale=alt.Scale(scheme='viridis')), alt.value('lightgray')),
    y='average(total_ons)'
).add_selection(brush)
scatter_chart = alt.Chart(fall_2019, title="Average On,Offboarding of Station & Line").mark_circle().encode(
    x='ons:Q',
    color=alt.condition(brush, alt.Color('route_name', type='nominal', scale=alt.Scale(scheme='viridis')), alt.value('lightgray')),
    y='offs:Q',
    tooltip=['route_name','stop_name'],
    size='average_flow_of_stop:Q'
).transform_aggregate(
    offs='mean(total_offs)',
    ons='mean(total_ons)',
    average_flow_of_stop = 'average(average_flow)',
    groupby=['stop_name','route_name']
).add_selection(brush)

ruggles = alt.Chart(fall_2019).mark_circle(opacity=1.0).encode(
    x='ons:Q',
    color=alt.value("red"),
    y='offs:Q',
    tooltip=['route_name','stop_name'],
    size='average_flow_of_stop:Q'
).transform_filter(
    alt.FieldOneOfPredicate(field='stop_name', oneOf=['Ruggles','Northeastern University'])
).transform_aggregate(
    offs='mean(total_offs)',
    ons='mean(total_ons)',
    average_flow_of_stop = 'average(average_flow)',
    groupby=['stop_name','route_name']
)

line_chart | (scatter_chart + ruggles)


# Line Graph Chart:
This captures how the average intake of a train-line changes over the period of a day, starting at the very start of the T's onboarding and ending at closure.
Clicking on a line highlights only that line in its original color, and grays out the others. The most interesting phenomenon was how little inflow the gren line ever receives, despite how far it branches out into the neighborhoods of boston. 

# Scatter Plot Chart:
This chart details the relationship between passenger inflow and outflow, looking to identify if there were many places that people had different means of travelling to or from. The only real interesting information was how consistent and linear the data ended up being, with specific outliers like "community college" intaking 10k less people than it saw leave the station. 

# How we used pop-out:
I picked a color palette that was full of green and blue colors for my regular data and had a red coloration for Northeastern and Ruggles. This was to provide ample contrast for the two stops, which are smaller than and overlapped by other stops on the plot.