In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import datetime

df = pd.read_csv("Motor_Vehicle_Collisions_Crashes.csv", dtype=str)
idx = df['LATITUDE'].isna() | (df['LATITUDE'] == '0.0000000')
df['time'] = pd.to_datetime(df['CRASH DATE'] + " " + df['CRASH TIME'])
df.drop(['CRASH DATE', 'CRASH TIME'], axis=1, inplace=True)

numeric_columns = ['LATITUDE',
                   'LONGITUDE',
                   'NUMBER OF PERSONS INJURED',
                   'NUMBER OF PERSONS KILLED',
                   'NUMBER OF PEDESTRIANS INJURED',
                   'NUMBER OF PEDESTRIANS KILLED',
                   'NUMBER OF CYCLIST INJURED',
                   'NUMBER OF CYCLIST KILLED',
                   'NUMBER OF MOTORIST INJURED',
                   'NUMBER OF MOTORIST KILLED']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.drop(['LOCATION', 'COLLISION_ID'], axis=1, inplace=True)

df1 = df.loc[~idx] # where LAT/LON are known 
now = datetime.datetime.now()
earliest_date = now - datetime.timedelta(hours=24*38)
stage_df = df1[df1['time'] > earliest_date]


In [16]:

# Create a Map instance
m = folium.Map(location=[40.4342494,-73.5998745], tiles='Stamen Terrain',
                   zoom_start=10)

from folium.plugins import MarkerCluster

mc = MarkerCluster()

for each in stage_df.iterrows():
    mc.add_child(folium.Marker(
        location = [each[1]['LATITUDE'],each[1]['LONGITUDE']])) #, 
        #clustered_marker = True)

m.add_child(mc)

display(m)

# Line chart

In [56]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from bokeh.plotting import figure
from bokeh.transform import dodge
import pandas as pd

# Load the collision data into a pandas dataframe
df = pd.read_csv('Motor_Vehicle_Collisions_Crashes.csv', parse_dates=['CRASH DATE'])

# Rename columns to remove spaces and make them lowercase
df = df.rename(columns={'CRASH DATE': 'crash_date', 'LATITUDE': 'latitude', 'LONGITUDE': 'longitude', 'BOROUGH': 'borough'})

# Drop any rows with missing borough or date values
df = df.dropna(subset=['borough', 'crash_date'])

# Format the date as YYYY-MM-DD
df['crash_date'] = df['crash_date'].dt.strftime('%Y-%m-%d')

# Extract the year and month from the crash_date column
df['year'] = pd.to_datetime(df['crash_date']).dt.year
df['month'] = pd.to_datetime(df['crash_date']).dt.month

# Group the data by borough and month and count the number of collisions
grouped = df.groupby(['borough', 'month']).size().reset_index(name='count')

# Create a ColumnDataSource object for the plot
source = ColumnDataSource(grouped)

# Define the categories and colors for the boroughs
boroughs = ['BROOKLYN', 'BRONX', 'MANHATTAN', 'QUEENS', 'STATEN ISLAND']
colors = Category10[len(boroughs)]

# Create a figure object
p = figure(x_range=[str(i) for i in range(1, 13)], plot_height=400, plot_width=800, title='Collisions by Month and Borough')

# Add a line for each borough
for i, borough in enumerate(boroughs):
    borough_data = grouped[grouped['borough'] == borough]
    source = ColumnDataSource(borough_data)
    p.line(x='month', y='count', source=source, color=colors[i], legend_label=borough)

# Set the axis labels and legend location
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Number of Collisions'
p.legend.location = 'top_left'

# Show the plot
show(p)


  df = pd.read_csv('Motor_Vehicle_Collisions_Crashes.csv', parse_dates=['CRASH DATE'])


In [55]:
from bokeh.plotting import figure, show
from bokeh.palettes import Category10
from bokeh.models import ColumnDataSource
import pandas as pd

# 读取数据并进行必要的数据处理
df = pd.read_csv('Motor_Vehicle_Collisions_Crashes.csv', parse_dates=['CRASH DATE'])
df = df.rename(columns={'CRASH DATE': 'crash_date', 'BOROUGH': 'borough'})
df = df.dropna(subset=['borough', 'crash_date'])
df['year'] = pd.to_datetime(df['crash_date']).dt.year
df = df[df['year'] != 2023]
grouped = df.groupby(['borough', 'year']).size().reset_index(name='count')

# 创建一个 Bokeh 的 Figure 对象
p = figure(title='Collisions by Year and Borough',plot_height=400, plot_width=800, x_axis_label='Year', y_axis_label='Number of Collisions')

# 为每个地区创建一个数据源
brooklyn_source = ColumnDataSource(grouped[grouped['borough'] == 'BROOKLYN'])
bronx_source = ColumnDataSource(grouped[grouped['borough'] == 'BRONX'])
manhattan_source = ColumnDataSource(grouped[grouped['borough'] == 'MANHATTAN'])
queens_source = ColumnDataSource(grouped[grouped['borough'] == 'QUEENS'])
staten_island_source = ColumnDataSource(grouped[grouped['borough'] == 'STATEN ISLAND'])

# 绘制每个地区的折线
colors = Category10[5]
p.line(x='year', y='count', source=brooklyn_source, color=colors[0], legend_label='Brooklyn')
p.line(x='year', y='count', source=bronx_source, color=colors[1], legend_label='Bronx')
p.line(x='year', y='count', source=manhattan_source, color=colors[2], legend_label='Manhattan')
p.line(x='year', y='count', source=queens_source, color=colors[3], legend_label='Queens')
p.line(x='year', y='count', source=staten_island_source, color=colors[4], legend_label='Staten Island')

# 将图例添加到图表中，并显示图表
p.legend.location = 'top_left'
show(p)


  df = pd.read_csv('Motor_Vehicle_Collisions_Crashes.csv', parse_dates=['CRASH DATE'])
