In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_score
import os

path = os.path.join(os.getcwd(), r"Redacted-Department _ CAD Reconciliation.csv")

df = pd.read_csv(path)

# a1. Data range
#print("Earliest dispatch:", df['Dispatch_Date'].min())
#print("Latest dispatch:", df['Dispatch_Date'].max())
# a2. % of missing data
missing_percent = df.isna().mean() * 100
print("Missing data % by column:\n", missing_percent)
# a3.

# a4.
df_clean = df.dropna()

# b.1
df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")
df_clean["CALL COMPLETE"] = pd.to_datetime(df_clean["CALL COMPLETE"], errors="coerce")

df_clean = df_clean.dropna(subset=["ALARM DATE TIME", "CALL COMPLETE"])

df_clean["Resolution_Time"] = (df_clean["CALL COMPLETE"] - df_clean["ALARM DATE TIME"]).dt.total_seconds() / 60

avg_resolution = df_clean["Resolution_Time"].mean()
print(f"Average resolution time: {avg_resolution:.2f} minutes")

# b.2
df_clean["Num_Units"] = df_clean["DISPATCH UNIT"].apply(lambda x: len(str(x).split(",")))
avg_units = df_clean["Num_Units"].mean()
print(f"Average number of fire units per alarm: {avg_units:.2f}")

# b.3
shift_counts = df_clean["SHIFT"].value_counts()
print("Number of alarms per shift:\n", shift_counts)

busiest_shift = shift_counts.idxmax()
print(f"\nThe busiest shift is: {busiest_shift}")

# b.4
df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")

df_clean["DayOfWeek"] = df_clean["ALARM DATE TIME"].dt.day_name()
df_clean["Hour"] = df_clean["ALARM DATE TIME"].dt.hour

alarm_matrix = df_clean.pivot_table(
    index="Hour",
    columns="DayOfWeek",
    values="XREF ID",
    aggfunc="count",
    fill_value=0
)

week_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
alarm_matrix = alarm_matrix[week_order]

# Add Total column (sum across rows)
alarm_matrix['Total'] = alarm_matrix.sum(axis=1)

# Add Total row (sum across columns, including the Total column)
alarm_matrix.loc['Total'] = alarm_matrix.sum(axis=0)

# Optional: convert to int for cleaner display
alarm_matrix = alarm_matrix.astype(int)

print(alarm_matrix)


# c.1
# K-Means
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(alarm_matrix)

silhouette_kmeans = silhouette_score(alarm_matrix, kmeans_labels)
print(f"K-Means silhouette score: {silhouette_kmeans:.4f}")




#Spectral Clustering
spectral = SpectralClustering(n_clusters=3, affinity='nearest_neighbors', random_state=42)
spectral_labels = spectral.fit_predict(alarm_matrix)

silhouette_spectral = silhouette_score(alarm_matrix, spectral_labels)
print(f"Spectral Clustering silhouette score: {silhouette_spectral:.4f}")


Missing data % by column:
 XREF ID                   0.000000
DISPATCH UNIT             0.000000
DISPATCH CREATED DATE     0.000000
INCIDENT NUMBER           0.000000
1ST UNIT ON SCENE        19.454545
ALARM DATE TIME           1.409091
CALL COMPLETE             1.409091
SHIFT                     3.136364
dtype: float64
Average resolution time: 125722.06 minutes
Average number of fire units per alarm: 1.48
Number of alarms per shift:
 SHIFT
A    593
C    570
B    557
Name: count, dtype: int64

The busiest shift is: A


  df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean["ALARM DATE TIME"] = pd.to_datetime(df_clean["ALARM DATE TIME"], errors="coerce")
  df_clean["CALL COMPLETE"] = pd.to_datetime(df_clean["CALL COMPLETE"], errors="coerce")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean["CALL COMPLETE"] = pd.to_datetime(df_clean["CALL COMPLETE"], errors="coerce")


DayOfWeek  Monday  Tuesday  Wednesday  Thursday  Friday  Saturday  Sunday  \
Hour                                                                        
0               7        6          4         1       2         7       4   
1               8        7          7         4       3         5       7   
2               4        3          2         2       3         7       7   
3               6        8          9         1      10         4       4   
4               2        4          5         2       6         4       4   
5               8        3          6         5       3         2       4   
6               5        5          6         6       8         6       9   
7              13       13          6         7      11         4       7   
8              11        9         13         7       6         4      11   
9               8       14         11        12      12         7      11   
10             16       12          9        10      11        13      11   