In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv("cleaned_gunviolence.csv")
df_kmeans = df.drop(columns=["Unnamed: 0", "incident_id", "date", "state"])
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_kmeans)

In [None]:
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=1, n_init=10)
    kmeans.fit(scaled_data)
    inertia.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), inertia, marker='o')
plt.title("Elbow Method")
plt.xlabel("Number of clusters")
plt.ylabel("Inertia")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
kmeans = KMeans(n_clusters=3, random_state=1, n_init=10)
df["cluster"] = kmeans.fit_predict(scaled_data)

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x="n_killed", y="n_injured", hue="cluster", alpha=0.5)
plt.title("Clusters by Killed vs Injured")
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x="n_guns_involved", y="n_victims", hue="cluster", alpha=0.5)
plt.title("Clusters by Guns Involved vs Victims")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
cluster_stats = df.groupby("cluster").mean(numeric_only=True).round(2)
display(cluster_stats)

### Interpretation

- **Cluster 0**: small-scale incidents.
- **Cluster 1**: moderate incidents with more victims and suspects.
- **Cluster 2**: severe incidents (mass shootings, many victims and guns involved).

**Cluster usage**:
- Severity labeling.
- Geographic/temporal trend detection.
- Feature in predictive modeling.

**Why k=3?**
Clear "elbow" at k=3; inertia drops less significantly after that point.


In [None]:
df.to_csv("gunviolence_clustered.csv", index=False)