# Weighted Area Risk Level Analysis

This notebook converts the `add_weighted_area_risk_level.py` script to an interactive Jupyter notebook. It adds sophisticated risk level columns to the crime dataset based on three key factors:

## Methodology:
1. **Crime Count**: Total number of crimes per area
2. **Crime Severity**: Part 1 crimes (serious) weighted 2x more than Part 2 crimes
3. **Temporal Patterns**: Recent crimes weighted more heavily using exponential decay

## Output Columns:
- `area_risk_level`: Categorical (Low Risk, Medium Risk, High Risk, Very High Risk)
- `area_risk_level_numeric`: Numeric (1, 2, 3, 4) for ML models
- `area_risk_score`: Raw risk score (0-100) used for classification

## Risk Score Formula:
**Final Score = 0.7 Ã— (Weighted Score / Max Weighted Score) + 0.3 Ã— Serious Crime Ratio**

Where:
- **Weighted Score** = Î£(Recency Weight Ã— Severity Weight) for each crime
- **Recency Weight** = e^(-days_ago/365) (exponential decay over 1 year)
- **Severity Weight** = 2.0 for Part 1 crimes, 1.0 for Part 2 crimes


In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")



In [None]:
# Load dataset and execute the complete weighted risk analysis


df = pd.read_csv('data/cleaned_crime_data.csv')
print("âœ… Loaded cleaned dataset")

print(f"Dataset shape: {df.shape}")

def calculate_weighted_risk_score(df):
    """Calculate weighted risk score for each area"""
    
    # Convert DATE OCC to datetime
    df['date_occurred'] = pd.to_datetime(df['DATE OCC'])
    max_date = df['date_occurred'].max()
    
    # Calculate days since crime occurred
    df['days_ago'] = (max_date - df['date_occurred']).dt.days
    
    # Calculate recency weight (exponential decay)
    df['recency_weight'] = np.exp(-df['days_ago'] / 365)
    
    # Calculate severity weight (Part 1 crimes weighted 2x)
    df['severity_weight'] = df['Part 1-2'].map({1: 2.0, 2: 1.0})
    
    # Calculate weighted crime score for each crime
    df['crime_score'] = df['recency_weight'] * df['severity_weight']
    
    # Aggregate by area
    area_risk_scores = df.groupby('AREA NAME').agg({
        'DR_NO': 'count',
        'crime_score': 'sum',
        'Part 1-2': lambda x: (x == 1).sum(),
        'days_ago': 'mean'
    }).reset_index()
    
    area_risk_scores.columns = ['AREA NAME', 'crime_count', 'weighted_score', 'serious_crimes', 'avg_days_ago']
    
    # Calculate serious crime ratio
    area_risk_scores['serious_crime_ratio'] = area_risk_scores['serious_crimes'] / area_risk_scores['crime_count']
    
    # Calculate final risk score (normalized)
    area_risk_scores['risk_score'] = (
        0.7 * (area_risk_scores['weighted_score'] / area_risk_scores['weighted_score'].max()) +
        0.3 * area_risk_scores['serious_crime_ratio']
    )
    
    # Scale risk score to 0-100
    area_risk_scores['risk_score'] = area_risk_scores['risk_score'] * 100
    
    return area_risk_scores

def add_area_risk_levels(df):
    """Add risk level columns based on weighted risk scores"""
    
    # Calculate risk scores
    area_risk_scores = calculate_weighted_risk_score(df)
    
    # Sort by risk score
    area_risk_scores = area_risk_scores.sort_values('risk_score', ascending=False)
    
    # Create risk level categories using quartiles
    area_risk_scores['risk_level'] = pd.qcut(
        area_risk_scores['risk_score'], 
        q=4, 
        labels=['Low Risk', 'Medium Risk', 'High Risk', 'Very High Risk']
    )
    
    # Create numeric risk level
    area_risk_scores['risk_level_numeric'] = pd.qcut(
        area_risk_scores['risk_score'], 
        q=4, 
        labels=[1, 2, 3, 4]
    ).astype(int)
    
    # Create mapping dictionaries
    risk_mapping = dict(zip(area_risk_scores['AREA NAME'], area_risk_scores['risk_level']))
    risk_numeric_mapping = dict(zip(area_risk_scores['AREA NAME'], area_risk_scores['risk_level_numeric']))
    risk_score_mapping = dict(zip(area_risk_scores['AREA NAME'], area_risk_scores['risk_score']))
    
    # Add risk level columns to original dataframe
    df['area_risk_level'] = df['AREA NAME'].map(risk_mapping)
    df['area_risk_level_numeric'] = df['AREA NAME'].map(risk_numeric_mapping)
    df['area_risk_score'] = df['AREA NAME'].map(risk_score_mapping)
    
    # Drop temporary columns if they exist
    temp_cols = ['date_occurred', 'days_ago', 'recency_weight', 'severity_weight', 'crime_score']
    existing_temp_cols = [col for col in temp_cols if col in df.columns]
    if existing_temp_cols:
        df = df.drop(columns=existing_temp_cols)
    
    return df, area_risk_scores

# Execute the analysis
print("\n" + "="*60)
print("CALCULATING WEIGHTED AREA RISK LEVELS")
print("="*60)

df_enhanced, area_statistics = add_area_risk_levels(df.copy())

# Display results
print("\nðŸ”¥ Top 10 Highest Risk Areas:")
print("-" * 80)
print(f"{'Area':<20} {'Risk Score':<12} {'Crimes':<10} {'Serious %':<12} {'Risk Level':<15}")
print("-" * 80)

for _, row in area_statistics.head(10).iterrows():
    print(f"{row['AREA NAME']:<20} {row['risk_score']:>10.2f} {row['crime_count']:>10,} "
          f"{row['serious_crime_ratio']*100:>10.1f}% {row['risk_level']:<15}")

# Save results
output_filename = 'data/crime_data_with_weighted_risk_levels.csv'
df_enhanced.to_csv(output_filename, index=False)
print(f"\nâœ… Enhanced dataset saved as: {output_filename}")

area_stats_filename = 'area_weighted_risk_statistics.csv'
area_statistics.to_csv(area_stats_filename, index=False)
print(f"âœ… Area statistics saved as: {area_stats_filename}")

print(f"\nðŸ“Š Process completed! Added 3 new columns:")
print("- area_risk_level (categorical)")
print("- area_risk_level_numeric (numeric 1-4)")  
print("- area_risk_score (raw score 0-100)")
