# Importing Libraries

In [28]:
import numpy as np
import pandas as pd
import re
import plotly.express as px



# Loading Datasets

In [30]:
# Load FDNY Monthly Response Times dataset
fdny_df = pd.read_csv("FDNY_Monthly_Response_Times_20250304.csv")

# Load Open Restaurant Applications dataset
restaurant_df = pd.read_csv("Open_Restaurant_Applications__Historic__20250304.csv")


# Datasets Exploration

In [31]:
print(fdny_df.head())
print(restaurant_df.head())

  YEARMONTH        INCIDENTCLASSIFICATION INCIDENTBOROUGH  INCIDENTCOUNT  \
0   2009/07  All Fire/Emergency Incidents        Citywide          40850   
1   2009/07  All Fire/Emergency Incidents       Manhattan          10709   
2   2009/07  All Fire/Emergency Incidents           Bronx           8137   
3   2009/07  All Fire/Emergency Incidents   Staten Island           2205   
4   2009/07  All Fire/Emergency Incidents        Brooklyn          11505   

  AVERAGERESPONSETIME  
0               04:27  
1               04:32  
2               04:37  
3               04:45  
4               04:01  
   objectid                                globalid  \
0     13610  {FD87ABAA-860E-4762-845D-8F0403D0246B}   
1      5900  {3B07E4C0-07B7-4079-8333-64446CC3EE03}   
2     13018  {137C575D-DC14-4F9D-83D9-A3FFE513B3B8}   
3     11630  {15270732-2A78-4C24-89DD-BE8DD916F115}   
4     13137  {EF9C8173-91D1-496E-8BD4-B02BEADC2A21}   

  Seating Interest (Sidewalk/Roadway/Both)            Restaurant Nam

In [32]:
# Display basic information about the datasets
print(fdny_df.info())
print(restaurant_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6552 entries, 0 to 6551
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   YEARMONTH               6552 non-null   object
 1   INCIDENTCLASSIFICATION  6552 non-null   object
 2   INCIDENTBOROUGH         6552 non-null   object
 3   INCIDENTCOUNT           6552 non-null   int64 
 4   AVERAGERESPONSETIME     6552 non-null   object
dtypes: int64(1), object(4)
memory usage: 256.1+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14428 entries, 0 to 14427
Data columns (total 35 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   objectid                                  14428 non-null  int64  
 1   globalid                                  14428 non-null  object 
 2   Seating Interest (Sidewalk/Roadway/Both)  14428 non-null  object 
 3   Rest

# Datasets Cleaning

In [38]:
# Convert AVERAGERESPONSETIME from "MM:SS" to total minutes (float)
def convert_time_to_minutes(time_str):
    try:
        minutes, seconds = map(int, time_str.split(":"))
        return minutes + seconds / 60
    except:
        return None  # Handle errors

fdny_df["AVERAGERESPONSETIME"] = fdny_df["AVERAGERESPONSETIME"].apply(convert_time_to_minutes)

# Drop any rows where conversion failed (if necessary)
fdny_df.dropna(subset=["AVERAGERESPONSETIME"], inplace=True)

# Handle mixed date formats in YEARMONTH
def clean_yearmonth(value):
    value = str(value).strip()
    if "/" in value:  # Format: YYYY/MM
        return pd.to_datetime(value, format="%Y/%m", errors="coerce")
    elif "FY" in value:  # Format: FY YYYY
        year = int(value.split(" ")[1])
        return pd.to_datetime(f"{year}-01", format="%Y-%m", errors="coerce")
    else:
        return None  # If format is unexpected

fdny_df["YEARMONTH"] = fdny_df["YEARMONTH"].apply(clean_yearmonth)
fdny_df.dropna(subset=["YEARMONTH"], inplace=True)  # Drop invalid dates

# Calculate the overall average response time
overall_avg_response_time = fdny_df["AVERAGERESPONSETIME"].mean()
print(f"Overall Average Response Time: {overall_avg_response_time:.2f} minutes")



Overall Average Response Time: 4.59 minutes


# Visualizations

In [40]:
# ---- Plot 1: Bar Chart of Average Response Time per Borough ----
fig1 = px.bar(fdny_df.groupby("INCIDENTBOROUGH")["AVERAGERESPONSETIME"].mean().reset_index(),
              x="INCIDENTBOROUGH", y="AVERAGERESPONSETIME",
              title="Average Response Time by Borough",
              labels={"AVERAGERESPONSETIME": "Avg Response Time (minutes)", "INCIDENTBOROUGH": "Borough"},
              color="AVERAGERESPONSETIME",
              color_continuous_scale="Blues")
fig1.show()


In [39]:
# ---- Plot 2: Time-Series Line Chart of Response Times ----
df_time_series = fdny_df.groupby("YEARMONTH")["AVERAGERESPONSETIME"].mean().reset_index()

fig2 = px.line(df_time_series, x="YEARMONTH", y="AVERAGERESPONSETIME",
               title="Response Time Trend Over Time",
               labels={"YEARMONTH": "Year-Month", "AVERAGERESPONSETIME": "Avg Response Time (minutes)"},
               markers=True)
fig2.show()

In [41]:
# ---- Plot 3: Heatmap of Incident Count by Month and Borough ----
fdny_df["YearMonth"] = fdny_df["YEARMONTH"].dt.strftime('%Y-%m')  # Format as string for visualization
heatmap_data = fdny_df.pivot_table(index="YearMonth", columns="INCIDENTBOROUGH", values="INCIDENTCOUNT", aggfunc="sum")

fig3 = px.imshow(heatmap_data, aspect="auto",
                 labels={"color": "Incident Count"},
                 title="Heatmap of Incident Counts by Borough and Month",
                 color_continuous_scale="Reds")
fig3.show()


In [47]:
# ---- Visualization 4: Distribution of Response Times ----
fig4 = px.histogram(fdny_df, x="AVERAGERESPONSETIME", nbins=20,
                    title="Distribution of Average Response Times",
                    labels={"AVERAGERESPONSETIME": "Avg Response Time (minutes)"},
                    marginal="box", color_discrete_sequence=["blue"])
fig4.show()


In [48]:
# ---- Visualization 5: Monthly Trend of Emergency Incidents ----
df_incident_trend = fdny_df.groupby("YEARMONTH")["INCIDENTCOUNT"].sum().reset_index()
fig5 = px.line(df_incident_trend, x="YEARMONTH", y="INCIDENTCOUNT",
               title="Monthly Trend of Emergency Incidents",
               labels={"YEARMONTH": "Year-Month", "INCIDENTCOUNT": "Total Incidents"},
               markers=True)
fig5.show()


In [49]:
# ---- Visualization 6: Incident Counts by Borough & Classification ----
fig6 = px.bar(fdny_df, x="INCIDENTBOROUGH", y="INCIDENTCOUNT", color="INCIDENTCLASSIFICATION",
              title="Incident Counts by Borough and Classification",
              labels={"INCIDENTBOROUGH": "Borough", "INCIDENTCOUNT": "Total Incidents"},
              barmode="stack", color_discrete_sequence=px.colors.qualitative.Set2)
fig6.show()


In [50]:
# ---- Visualization 7: Scatter Plot of Response Time vs Incident Count ----
fig7 = px.scatter(fdny_df, x="INCIDENTCOUNT", y="AVERAGERESPONSETIME",
                  color="INCIDENTBOROUGH", size="INCIDENTCOUNT",
                  title="Response Time vs Incident Count",
                  labels={"INCIDENTCOUNT": "Incident Count", "AVERAGERESPONSETIME": "Avg Response Time (minutes)"},
                  hover_data=["INCIDENTCLASSIFICATION"])
fig7.show()


In [51]:
# ---- Visualization 8: Pie Chart of Incident Distribution by Borough ----
df_pie = fdny_df.groupby("INCIDENTBOROUGH")["INCIDENTCOUNT"].sum().reset_index()
fig8 = px.pie(df_pie, names="INCIDENTBOROUGH", values="INCIDENTCOUNT",
              title="Proportion of Incidents by Borough",
              color_discrete_sequence=px.colors.qualitative.Pastel)
fig8.show()
