In [None]:
import geopandas as gpd
import altair as alt
import pandas as pd

In [None]:
dir = './Data'
collisions = pd.read_csv(dir + '/collisions_clean.csv')

# Which weather condition and type of vehicle were present in the majority of accidents each month? And in the combination of all the months?

In [None]:
import altair as alt
from vega_datasets import data

source = data.iris()

alt.Chart(source, width=500).transform_window(
    index='count()'
).transform_fold(
    ['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth']
).mark_line().encode(
    x='key:N',
    y='value:Q',
    color='species:N',
    detail='index:N',
    opacity=alt.value(0.5)
).interactive()

In [None]:
import altair as alt
from vega_datasets import data

source = data.unemployment_across_industries.url

alt.Chart(source).mark_area().encode(
    alt.X('yearmonth(date):T').axis(format='%Y', domain=False, tickSize=0),
    alt.Y('sum(count):Q').stack('center').axis(None),
    alt.Color('series:N').scale(scheme='category20b')
)


# In which area and at what hour did the majority of accidents each month happen? And in the combination of all the months?

In [None]:
nyc_map = gpd.read_file('Data/new-york-city-zipcodes-ny_.geojson')

click = alt.selection_point(fields=['postalCode'])

nyc = alt.Chart(nyc_map).mark_geoshape(
    stroke='white',
    strokeWidth=1,
    filled=True,
    tooltip=True
).encode(
    color=alt.Color('borough:N',
                    scale=alt.Scale(scheme='yellowgreenblue'),
                    legend=None),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2)),
    tooltip=[alt.Tooltip('postalCode:N', title='Zip Code'), 
             alt.Tooltip('borough:N', title='Borough')]
).project(
    type='identity', reflectY=True
)

nyc.add_params(click)

In [None]:
nyc_map = gpd.read_file('Data/new-york-city-boroughs-ny_.geojson')
nyc_map = nyc_map[['name', 'geometry']]
df = collisions[['BOROUGH']]


click = alt.selection_point(fields=['name'], on='mouseover')
# click = alt.selection_multi(fields=['name'])

nyc = alt.Chart(nyc_map).mark_geoshape(
    stroke='white',
    strokeWidth=1,
    filled=True,
    tooltip=True
).encode(
    color=alt.Color('name:N',
                    scale=alt.Scale(scheme='yellowgreenblue'),
                    legend=None),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2)),
    tooltip=[alt.Tooltip('name:N', title='Borough')]
).project(
    type='identity', reflectY=True
).properties(
    width=500,
    height=500
)


bars = alt.Chart(df).mark_bar().transform_lookup(
    lookup='BOROUGH',
    from_=alt.LookupData(data=nyc_map, key='name', fields=['name', 'name'])
).encode(
    x=alt.X('count():Q', title='Number of Collisions'),
    y=alt.Y('name:N',
            sort='-x',
            title='Borough'),
    color=alt.Color('name:N', legend=None),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2)),
    tooltip=[alt.Tooltip('name:N', title='Borough'), 
             alt.Tooltip('count():Q', title='Number of Collisions')]
).properties(
    width=500,
    height=500
)

(bars | nyc).add_params(click)

# Which area presented the majority of taxi accidents during rainy days in June on Mondays at noon, 12am?

# Which day had more accidents during clear days in July in Manhattan?