In [2]:
import pandas as pd
import altair as alt

dir = '../Data'

weather = pd.read_csv(f'{dir}/weather_clean.csv')
collisions = pd.read_csv(f'{dir}/collisions_clean.csv')

alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [3]:
# select the year from the CRASH DATE column
collisions['YEAR'] = collisions['CRASH DATE'].astype(str).str[:4]

# At what time of the day are accidents more common?

In [None]:
coll_2018.columns

Index(['COLLISION_ID', 'DAY NAME', 'CRASH DATE', 'CRASH MOMENT',
       'CRASH TIME INTERVAL', 'BOROUGH', 'ZIP CODE', 'LOCATION', 'STREET NAME',
       'NUMBER OF INJURED', 'NUMBER OF KILLED',
       'CONTRIBUTING FACTOR VEHICLE 1', 'VEHICLE TYPE CODE 1'],
      dtype='object')

In [None]:
hour_data_18 = coll_2018[['COLLISION_ID', 'CRASH TIME INTERVAL']].groupby(['CRASH TIME INTERVAL']).count().reset_index()
hour_data_18['YEAR'] = '2018'

hour_data_20 = coll_2020[['COLLISION_ID', 'CRASH TIME INTERVAL']].groupby(['CRASH TIME INTERVAL']).count().reset_index()
hour_data_20['YEAR'] = '2020'

hour_data = pd.concat([hour_data_18, hour_data_20], ignore_index=True)

In [None]:
alt.Chart(hour_data).mark_area(
    # point=True,
    fillOpacity=0.8,
    line=True,
    interpolate='monotone'
).encode(
    x=alt.X('CRASH TIME INTERVAL:O', title='Hour of the Day', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('COLLISION_ID:Q', title='Number of Collisions'),
    color=alt.Color('YEAR:N', title='Year', scale=alt.Scale(domain=['2018', '2020'], range=['lightblue', 'lightgreen']))
).properties(
    width=700,
    height=200,
    title='Number of Collisions by Hour of the Day'
)

In [None]:
area_plot = alt.Chart(hour_data).mark_area(
    fillOpacity=0.8,
    line=True,
    interpolate='monotone'
).encode(
    x=alt.X('CRASH TIME INTERVAL:O', title='Hour of the Day', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('COLLISION_ID:Q', title='Number of Collisions'),
    color=alt.Color('YEAR:N', title='Year', scale=alt.Scale(domain=['2018', '2020'], range=['lightblue', 'lightgreen']))
).properties(
    width=700,
    height=200,
    title='Number of Collisions by Hour of the Day'
)

# Vertical lines
vertical_lines = alt.Chart(hour_data).mark_rule(color='gray', strokeWidth=1, opacity= 0.1).encode(
    x='CRASH TIME INTERVAL:O'
)

# Layer the area plot with vertical lines
final_chart = alt.layer(area_plot, vertical_lines)

final_chart

In [None]:
alt.Chart(hour_data).mark_bar(
    opacity=0.8
).encode(
    alt.X('CRASH TIME INTERVAL:O', title='Hour of the Day', axis=alt.Axis(labelAngle=0)),
    alt.Y('COLLISION_ID', title='Number of Collisions'),
    alt.Color('YEAR:N', title='Year', scale=alt.Scale(domain=['2018', '2020'], range=['lightblue', 'lightgreen']))
).properties(
    width=400,
    height=200,
    title='Number of Collisions by Hour of the Day'
)

In [None]:
alt.Chart(collisions[['COLLISION_ID', 'CRASH TIME INTERVAL', 'YEAR']]).transform_density(
    'CRASH TIME INTERVAL',
    as_=['CRASH TIME INTERVAL', 'density'],
    extent=[0, 23],
    groupby=['YEAR']
).mark_area(orient='horizontal').encode(
    alt.X('density:Q',
          stack='center',
          impute=None,
          title=None,
          axis=alt.Axis(labels=False, values=[0], grid=False, ticks=True)),
    alt.Y('CRASH TIME INTERVAL:Q',
          title='Hour of the Day',
          axis=alt.Axis(labelAngle=0)),
    alt.Color('YEAR:O',
              scale=alt.Scale(domain=['2018', '2020'], range=['lightblue', 'lightgreen']),
              legend=None),
    alt.Column('YEAR:O',
                header=alt.Header(titleOrient='bottom', labelOrient='bottom', labelPadding=0),
                spacing=0,
                title=None)
).configure_view(
    stroke=None
).properties(
    width=100,
    height=300
)

In [None]:
alt.Chart(collisions[['CRASH TIME INTERVAL', 'DAY NAME']]).mark_rect().encode(
    x=alt.X('CRASH TIME INTERVAL:N',
            title='Hour of the Day',
            axis=alt.Axis(labelAngle=0)),
    y=alt.Y('DAY NAME:N',
            title='Day of the Week'),
    color=alt.Color('count():Q',
                    title='Number of Collisions',
                    legend=None)
).properties(
    width=600,
    height=200,
    title='Number of Collisions by Hour of the Day and Day of the Week'
)