In [2]:
import geopandas as gpd
import altair as alt
import pandas as pd

In [3]:
dir = './Data'
collisions = pd.read_csv(dir + '/collisions_clean.csv')
weather = pd.read_csv(dir + '/weather2018.csv')

In [4]:
# create a new column HOUR that contains the hour of the day based on the CRASH TIME column
collisions['HOUR'] = pd.to_datetime(collisions['CRASH TIME']).dt.hour
collisions['MONTH'] = pd.to_datetime(collisions['CRASH DATE']).dt.month

collisions.head(1)

  collisions['HOUR'] = pd.to_datetime(collisions['CRASH TIME']).dt.hour


Unnamed: 0,COLLISION_ID,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,STREET NAME,CONTRIBUTING FACTOR VEHICLE 1,VEHICLE TYPE CODE 1,TOTAL INJURED,TOTAL KILLED,HOUR,MONTH
0,3975659,2018-09-05,23:00,Brooklyn,11219.0,40.64412,-73.98907,Fort Hamilton Parkway,Unspecified,TAXI,0.0,0.0,23,9


In [5]:
weather = weather[['datetime', 'icon']]
weather.shape, collisions.shape

((122, 2), (4003, 14))

In [6]:
# merge the two dataframes on the datetime column
merged = pd.merge(collisions, weather, left_on='CRASH DATE', right_on='datetime', how='left')
merged.shape

(4003, 16)

Which weather condition and type of vehicle were present in the majority of accidents each month? And in the combination of all the months?

Which day had more accidents during clear days in July in Manhattan?

Which area presented the majority of taxi accidents during rainy days in June on Mondays at noon, 12am?

In which area and at what hour did the majority of accidents each month happen? And in the combination of all the months?

In [7]:
merged['icon'] = merged['icon'].replace('rain', 'rainny')
merged.head(1)

Unnamed: 0,COLLISION_ID,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,STREET NAME,CONTRIBUTING FACTOR VEHICLE 1,VEHICLE TYPE CODE 1,TOTAL INJURED,TOTAL KILLED,HOUR,MONTH,datetime,icon
0,3975659,2018-09-05,23:00,Brooklyn,11219.0,40.64412,-73.98907,Fort Hamilton Parkway,Unspecified,TAXI,0.0,0.0,23,9,2018-09-05,rainny


In [8]:
df = merged[['COLLISION_ID', 'icon', 'VEHICLE TYPE CODE 1', 'MONTH']]

df = df.groupby(['icon', 'VEHICLE TYPE CODE 1', 'MONTH']).count().reset_index()

alt.Chart(df).mark_line().encode(
    x='icon:N',
    y='COLLISION_ID:Q',
    color=alt.Color('VEHICLE TYPE CODE 1:N')
).properties(
    width=800,
    height=400
).interactive()

In [9]:
df = merged[['COLLISION_ID', 'icon', 'VEHICLE TYPE CODE 1', 'MONTH']]

alt.Chart(df).mark_bar().encode(
    x='icon:N',
    y=alt.Y('count():Q', stack=True),
    color=alt.Color('VEHICLE TYPE CODE 1:N')
).properties(
    width=800,
    height=400
).interactive()

In [10]:
collisions['WEEKDAY'] = pd.to_datetime(collisions['CRASH DATE']).dt.day_name()

In [11]:
df = collisions[['CRASH DATE', 'HOUR', 'WEEKDAY', 'BOROUGH']]

click = alt.selection_point(encodings=['y'], on='mouseover', nearest=True, clear='mouseout')

heat = alt.Chart(df).mark_rect().encode(
    x=alt.X('HOUR:O'),
    y=alt.Y('WEEKDAY:O', sort=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']),
    color=alt.Color('count():Q')
).properties(
    width=600
)

text = heat.mark_text().encode(
    text=alt.Text('count():Q'),
    color=alt.ColorValue('white')
).properties(
    width=600
).transform_filter(
    click
)

# circ = heat.mark_point().encode(
#     alt.ColorValue('grey'),
#     alt.Size('count()')
# ).transform_filter(
#     click
# )

bars = alt.Chart(df).mark_bar().encode(
    x=alt.X('count():Q'),
    y=alt.Y('BOROUGH:N', sort='-x'),
    color=alt.Color('BOROUGH:N'),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2))
).add_params(
    click
).properties(
    width=600
)

alt.vconcat(
    heat + text,
    bars
).resolve_legend(
    color="independent",
    size="independent"
)

In [12]:
nyc_map = gpd.read_file('Data/new-york-city-boroughs-ny_.geojson')
nyc_map = nyc_map[['name', 'geometry']]

nyc = alt.Chart(nyc_map).mark_geoshape(
    stroke='white',
    strokeWidth=1,
    filled=True,
    tooltip=False
).encode(
    color=alt.ColorValue('lightblue')
).project(
    type='identity', reflectY=True
).properties(
    width=500,
    height=500
)

collisions = collisions[collisions['LONGITUDE'] != 0]
df = collisions[['LONGITUDE', 'LATITUDE', 'BOROUGH', 'ZIP CODE', 'VEHICLE TYPE CODE 1']]

slider = alt.binding_range(min=0, max=1, step=0.05, name='Opacity: ')
op_var = alt.param(value=0, bind=slider)

points = alt.Chart(df).mark_circle(
    size=40,
    opacity=op_var,
    tooltip=True
).encode(
    longitude='LONGITUDE:Q',
    latitude='LATITUDE:Q',
    color=alt.Color('VEHICLE TYPE CODE 1:N'),
    tooltip=[alt.Tooltip('BOROUGH:N'), alt.Tooltip('ZIP CODE:Q'), alt.Tooltip('VEHICLE TYPE CODE 1:N')]
).project(
    type='identity', reflectY=True
).add_params(
    op_var
)

nyc + points

In [25]:
collisions = collisions[collisions['LONGITUDE'] != 0]
df = collisions[['LONGITUDE', 'LATITUDE', 'BOROUGH', 'ZIP CODE', 'VEHICLE TYPE CODE 1', 'CRASH DATE', 'HOUR', 'WEEKDAY']]

click = alt.selection_point(fields=['BOROUGH'], on='mouseover', clear='mouseout')
# click = alt.selection_point(encodings=['y'], on='mouseover', nearest=True, clear='mouseout')

heat = alt.Chart(df).mark_rect().encode(
    x=alt.X('HOUR:O'),
    y=alt.Y('WEEKDAY:O', sort=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']),
    color=alt.Color('count():Q')
).properties(
    width=600,
    height=200
)

text = heat.mark_text().encode(
    text=alt.Text('count():Q'),
    color=alt.ColorValue('white')
).properties(
    width=600,
    height=200
).transform_filter(
    click
)

bars = alt.Chart(df).mark_bar().encode(
    x=alt.X('count():Q'),
    y=alt.Y('BOROUGH:N', sort='-x'),
    color=alt.Color('BOROUGH:N'),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2))
).add_params(
    click
).properties(
    width=600,
    height=200
)

zips = gpd.read_file('Data/new-york-city-zipcodes-ny_.geojson')

nyc = alt.Chart(zips).transform_lookup(
    lookup='borough',
    from_=alt.LookupData(df, 'BOROUGH', ['BOROUGH'])
).mark_geoshape(
    stroke='white',
    strokeWidth=1,
    filled=True,
    tooltip=False
).encode(
    color=alt.ColorValue('lightblue'),
    opacity=alt.condition(click, alt.value(1), alt.value(0.2))
).project(
    type='identity', reflectY=True
).add_params(
    click
).properties(
    width=500,
    height=500
)

slider = alt.binding_range(min=0, max=1, step=0.05, name='Opacity: ')
op_var = alt.param(value=0, bind=slider)

points = alt.Chart(df).mark_circle(
    size=40,
    opacity=op_var,
    tooltip=True
).encode(
    longitude='LONGITUDE:Q',
    latitude='LATITUDE:Q',
    color=alt.Color('VEHICLE TYPE CODE 1:N',
                    legend=alt.Legend(title='Vehicle Type', orient='bottom')),
    tooltip=[alt.Tooltip('VEHICLE TYPE CODE 1:N'), alt.Tooltip('ZIP CODE:Q')]
).project(
    type='identity', reflectY=True
).add_params(
    op_var
)

# poder seleccionar el tipo de coche
# poner bordes o nombres por barrio

(nyc + points) | ((heat + text) & bars)