In [5]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_score
import os

path = os.path.join(os.getcwd(), r"Redacted-Department _ CAD Reconciliation.csv")

df = pd.read_csv(path)

# Task A1: What is the range of data for the cases (dispatches) ?

# Task A2: What % of data is missing, by each column?
#XREF ID                   0.000000%
#DISPATCH UNIT             0.000000%
#DISPATCH CREATED DATE     0.000000%
#INCIDENT NUMBER           0.000000%
#1ST UNIT ON SCENE        19.454545%
#ALARM DATE TIME           1.409091%
#CALL COMPLETE             1.409091%
#SHIFT                     3.136364%

missing_percent = df.isna().mean() * 100
print("Missing data % by column:\n", missing_percent)



# a3.

# a4.
df_clean = df.dropna().copy()


# b.1
df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")
df_clean["CALL COMPLETE"] = pd.to_datetime(df_clean["CALL COMPLETE"], errors="coerce")

df_clean = df_clean.dropna(subset=["ALARM DATE TIME", "CALL COMPLETE"])

df_clean["Resolution_Time"] = (df_clean["CALL COMPLETE"] - df_clean["ALARM DATE TIME"]).dt.total_seconds() / 60

avg_resolution = df_clean["Resolution_Time"].mean()
print(f"Average resolution time: {avg_resolution:.2f} minutes")

# b.2
df_clean["Num_Units"] = df_clean["DISPATCH UNIT"].apply(lambda x: len(str(x).split(",")))
avg_units = df_clean["Num_Units"].mean()
print(f"Average number of fire units per alarm: {avg_units:.2f}")

# b.3
shift_counts = df_clean["SHIFT"].value_counts()
print("Number of alarms per shift:\n", shift_counts)

busiest_shift = shift_counts.idxmax()
print(f"\nThe busiest shift is: {busiest_shift}")

# b.4
df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")

df_clean["DayOfWeek"] = df_clean["ALARM DATE TIME"].dt.day_name()
df_clean["Hour"] = df_clean["ALARM DATE TIME"].dt.hour

alarm_matrix = df_clean.pivot_table(
    index="Hour",
    columns="DayOfWeek",
    values="XREF ID",
    aggfunc="count",
    fill_value=0
)

week_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
alarm_matrix = alarm_matrix[week_order]

alarm_matrix['Total'] = alarm_matrix.sum(axis=1)
alarm_matrix.loc['Total'] = alarm_matrix.sum(axis=0)

print(alarm_matrix)

# Removes Totals
alarm_matrix_clustering = alarm_matrix.drop("Total", axis=0).drop("Total", axis=1)


# c.1
# K-Means
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans_labels = kmeans.fit_predict(alarm_matrix_clustering)

silhouette_kmeans = silhouette_score(alarm_matrix_clustering, kmeans_labels)
print(f"K-Means silhouette score: {silhouette_kmeans:.4f}")




# Spectral Clustering
spectral = SpectralClustering(n_clusters=2, affinity="rbf", random_state=42)
spectral_labels = spectral.fit_predict(alarm_matrix_clustering)

silhouette_spectral = silhouette_score(alarm_matrix_clustering, spectral_labels)
print(f"Spectral Clustering silhouette score: {silhouette_spectral:.4f}")



Missing data % by column:
 XREF ID                   0.000000
DISPATCH UNIT             0.000000
DISPATCH CREATED DATE     0.000000
INCIDENT NUMBER           0.000000
1ST UNIT ON SCENE        19.454545
ALARM DATE TIME           1.409091
CALL COMPLETE             1.409091
SHIFT                     3.136364
dtype: float64
Average resolution time: 125722.06 minutes
Average number of fire units per alarm: 1.48
Number of alarms per shift:
 SHIFT
A    593
C    570
B    557
Name: count, dtype: int64

The busiest shift is: A
DayOfWeek  Monday  Tuesday  Wednesday  Thursday  Friday  Saturday  Sunday  \
Hour                                                                        
0               7        6          4         1       2         7       4   
1               8        7          7         4       3         5       7   
2               4        3          2         2       3         7       7   
3               6        8          9         1      10         4       4   
4              

  df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")
  df_clean["CALL COMPLETE"] = pd.to_datetime(df_clean["CALL COMPLETE"], errors="coerce")


Spectral Clustering silhouette score: 0.1170
