In [2]:
# Import libraries
import pandas as pd
import datetime
from geopy.distance import geodesic
import folium
from math import radians, sin, cos, sqrt, atan2
from sodapy import Socrata

### Data extraction from Socrata using SQL & data cleaning

In [5]:
# Connect to the Socrata API
client = Socrata("data.cityofnewyork.us", None)

# Define the current date and one year ago
today = datetime.date.today()
one_year_ago = today - datetime.timedelta(days=365)

# Query the collisions in all boroughs
query_params = (
    f"(number_of_persons_injured>0 OR number_of_persons_killed>0) AND "
    f"crash_date>='{one_year_ago}'"
)

# Query the data
results = client.get("h9gi-nx95", where=query_params, limit=10000)

# Convert the results into a DataFrame
df = pd.DataFrame.from_records(results)

# Clean the data and ensure correct types
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

# Remove rows with missing latitude or longitude
df = df.dropna(subset=["latitude", "longitude"])

# Query ambulance stations in all boroughs
query_params = "factype='AMBULANCE STATION'"
ambulance_stations = client.get("ji82-xba5", where=query_params)

# Convert the ambulance station data into a DataFrame
ambulance_stations_df = pd.DataFrame.from_records(ambulance_stations)
ambulance_stations_df['latitude'] = ambulance_stations_df['latitude'].astype(float)
ambulance_stations_df['longitude'] = ambulance_stations_df['longitude'].astype(float)

# Query hospitals in all boroughs
query_params = "factype='HOSPITAL'"
hospitals = client.get("ji82-xba5", where=query_params)

# Convert the hospital data into a DataFrame
hospitals_df = pd.DataFrame.from_records(hospitals)
hospitals_df['latitude'] = hospitals_df['latitude'].astype(float)
hospitals_df['longitude'] = hospitals_df['longitude'].astype(float)



### Create new function to calculate the respomse time & identify overburdened stations

In [6]:
# Haversine function for distance calculation
def haversine(lat1, lon1, lat2, lon2):
    R = 6371000  # Earth's radius in meters
    phi1, phi2 = radians(lat1), radians(lat2)
    dphi = radians(lat2 - lat1)
    dlambda = radians(lon2 - lon1)
    a = sin(dphi / 2) ** 2 + cos(phi1) * cos(phi2) * sin(dlambda / 2) ** 2
    return R * 2 * atan2(sqrt(a), sqrt(1 - a))

# Function to calculate total response time for an ambulance station based on collision proximity
def calculate_response_time(station, collisions_df):
    station_location = (station['latitude'], station['longitude'])
    total_distance = 0
    total_collisions = 0
    
    for _, collision in collisions_df.iterrows():
        collision_location = (collision['latitude'], collision['longitude'])
        distance = haversine(station_location[0], station_location[1], collision_location[0], collision_location[1])
        total_distance += distance
        total_collisions += 1
    
    if total_collisions == 0:
        return 0  # No collisions in the area
    else:
        return total_distance / total_collisions  # Average response time

# Function to find the most overburdened ambulance station (highest response time)
def find_most_overburdened_station(ambulance_stations_df, collisions_df):
    highest_response_time = 0
    most_overburdened_station = None
    
    for _, station in ambulance_stations_df.iterrows():
        response_time = calculate_response_time(station, collisions_df)
        if response_time > highest_response_time:
            highest_response_time = response_time
            most_overburdened_station = station
            
    return most_overburdened_station, highest_response_time

# Identify the most overburdened ambulance station
most_overburdened_station, highest_response_time = find_most_overburdened_station(ambulance_stations_df, df)

print(f"The ambulance station that needs the most help is: {most_overburdened_station['facname']}")
print(f"Highest average response time (meters): {highest_response_time:.2f}")



The ambulance station that needs the most help is: FDNY EMS CMND - SEAVIEW CAMPUS
Highest average response time (meters): 48540.27


### Data visualisation using Folium map

In [14]:
# Create the Folium map centered around NYC
collision_map = folium.Map(location=[40.7128, -74.0060], zoom_start=10)  # Default zoom for NYC

# Initialize bounds for collision markers
collision_bounds = []

# Add ambulance station markers to the map
for _, row in ambulance_stations_df.iterrows():
    if row['facname'] == most_overburdened_station['facname']:
        # Highlight the most overburdened station with red color
        color = 'red'
        popup_content = (
            f"<b>Borough:</b> {row['boro']}<br>"
            f"<b>Ambulance Station:</b> {row['facname']}<br>"
            f"<b>Highest Avg Response Time:</b> {highest_response_time:.2f} m"
        )
    else:
        # Normal ambulance stations with blue color
        color = 'blue'
        popup_content = (
            f"<b>Borough:</b> {row['boro']}<br>"
            f"<b>Ambulance Station:</b> {row['facname']}<br>"
        )
    
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=popup_content,
        icon=folium.Icon(color=color, icon='ambulance', prefix='fa')
    ).add_to(collision_map)

# Add collision markers with grey color and track bounds
for _, row in df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=3,
        color='grey',  # Set marker color to grey
        fill=True,
        fill_color='grey',  # Set fill color to grey
        fill_opacity=0.6
    ).add_to(collision_map)
    # Add marker coordinates to bounds
    collision_bounds.append([row['latitude'], row['longitude']])

# Set the bounds for the entire view to show only the five boroughs of NYC
boroughs_bounds = [
    [40.4774, -74.2591],  # South-West corner of NYC (approximately)
    [40.9176, -73.7004]   # North-East corner of NYC (approximately)
]

# Adjust map view to fit all boroughs
collision_map.fit_bounds(boroughs_bounds)

# Save the map to an HTML file
collision_map.save("nyc_ambulance_station_map.html")