In [1]:
import pandas as pd

# Example data: distribution of incidents by type in % (rows: regions, cols: incident types)
incident_data = {
    "Region": ["Region 1", "Region 2", "Region 3", "Region 4"],
    "Incident Rate": [16, 5, 3, 4],  # incidents per 100,000 orders
    "Dangerous Driving": [38, 19, 36, 28],
    "Vehicle Crash": [21, 29, 26, 31],
    "Verbal Assault": [20, 24, 16, 21],
    "Theft/Robbery": [10, 22, 18, 15],
    "Physical Assault": [6, 4, 3, 3],
    "Harassment": [5, 2, 1, 2],
}

df = pd.DataFrame(incident_data)

# Calculate weighted score: (incident % * incident rate)
incident_types = df.columns[2:]  # only incident types
for incident in incident_types:
    df[incident + " Weighted"] = (df[incident] / 100) * df["Incident Rate"]

# Sum across regions for each incident type
scores = {}
for incident in incident_types:
    total_score = df[incident + " Weighted"].sum()
    scores[incident] = total_score

# Normalize the scores so they are between 0-1
max_score = max(scores.values())
normalized_scores = {k: round(v / max_score, 3) for k, v in scores.items()}

# Convert to DataFrame for sorting and visualization
score_df = pd.DataFrame(list(normalized_scores.items()), columns=["Incident Type", "Score"])
score_df = score_df.sort_values(by="Score", ascending=False)

print(score_df)

       Incident Type  Score
0  Dangerous Driving  1.000
1      Vehicle Crash  0.740
2     Verbal Assault  0.620
3      Theft/Robbery  0.416
4   Physical Assault  0.148
5         Harassment  0.109


In [3]:
# Incident types
incident_types = [
    "Dangerous Driving", "Vehicle Crash", "Verbal Assault",
    "Theft/Robbery", "Physical Assault", "Harassment"
]

# Incident percentages per region
incident_distribution = {
    "Region 1": [38, 21, 20, 10, 6, 5],
    "Region 2": [19, 29, 24, 22, 4, 2],
    "Region 3": [36, 26, 16, 18, 3, 1],
    "Region 4": [28, 31, 21, 15, 3, 2]
}

# Incidents per 100,000 orders
incident_rates = {
    "Region 1": 16,
    "Region 2": 5,
    "Region 3": 3,
    "Region 4": 4
}

# Generate full dataset
data = []
for region, percentages in incident_distribution.items():
    rate = incident_rates[region]
    for i, pct in enumerate(percentages):
        score = (pct / 100) * rate
        data.append({
            "Region": region,
            "Incident Type": incident_types[i],
            "Score": round(score, 2)
        })

# Create DataFrame
df2 = pd.DataFrame(data)

# Normalize scores within each region
df2["Normalized Score"] = df2.groupby("Region")["Score"].transform(lambda x: x / x.max())

# Get top 3 incidents per region
top_incidents = (
    df2.groupby("Region")
    .apply(lambda group: group.nlargest(3, "Score"))
    .reset_index(drop=True)
)

# Display results
for region in top_incidents["Region"].unique():
    print(f"\n{region} - Top 3 Incident Types:")
    subset = top_incidents[top_incidents["Region"] == region]
    for _, row in subset.iterrows():
        print(f" - {row['Incident Type']}: Score = {row['Score']}, Normalized = {round(row['Normalized Score'], 2)}")


Region 1 - Top 3 Incident Types:
 - Dangerous Driving: Score = 6.08, Normalized = 1.0
 - Vehicle Crash: Score = 3.36, Normalized = 0.55
 - Verbal Assault: Score = 3.2, Normalized = 0.53

Region 2 - Top 3 Incident Types:
 - Vehicle Crash: Score = 1.45, Normalized = 1.0
 - Verbal Assault: Score = 1.2, Normalized = 0.83
 - Theft/Robbery: Score = 1.1, Normalized = 0.76

Region 3 - Top 3 Incident Types:
 - Dangerous Driving: Score = 1.08, Normalized = 1.0
 - Vehicle Crash: Score = 0.78, Normalized = 0.72
 - Theft/Robbery: Score = 0.54, Normalized = 0.5

Region 4 - Top 3 Incident Types:
 - Vehicle Crash: Score = 1.24, Normalized = 1.0
 - Dangerous Driving: Score = 1.12, Normalized = 0.9
 - Verbal Assault: Score = 0.84, Normalized = 0.68


  .apply(lambda group: group.nlargest(3, "Score"))
