In [None]:
!pip install collections-extended folium scikit-learn

In [2]:
import csv
import folium
from sklearn.cluster import DBSCAN
from collections import Counter

In [3]:
def render_data_on_map(unified_csv_file: str, output_map_file: str) -> None:
    # Read the unified dataset and extract the accidents with severity 1
    data = []
    with open(unified_csv_file, "r") as file:
        csv_reader = csv.DictReader(file)
        data = [row for row in csv_reader if row["Severity"] == "1"]

    if not data:
        print("No accidents with severity 1 found in the dataset.")
        return

    latitudes = [float(row["Latitude"]) for row in data]
    longitudes = [float(row["Longitude"]) for row in data]

    mean_lat = sum(latitudes) / len(latitudes)
    mean_lon = sum(longitudes) / len(longitudes)

    map_center = [mean_lat, mean_lon]
    accident_map = folium.Map(location=map_center, zoom_start=6)

    # Add markers for each accident location with severity 1
    for row in data:
        lat = float(row["Latitude"])
        lon = float(row["Longitude"])
        severity = row["Severity"]

        marker_color = "red"  # Use red color for severity 1 accidents

        folium.Marker(
            location=[lat, lon],
            popup=f"Severity: {severity}",
            icon=folium.Icon(color=marker_color)
        ).add_to(accident_map)

    # Save the map as an HTML file
    accident_map.save(output_map_file)
    print(f"Map saved to {output_map_file}")

    # Print the severity count message
    severity_count = len(data)
    print(f"{severity_count} accidents with severity 1 in Manchester.")

def analyze_high_risk_areas(unified_csv_file: str) -> None:
    # Read the unified dataset and extract the accidents with severity 1 or 2
    data = []
    with open(unified_csv_file, "r") as file:
        csv_reader = csv.DictReader(file)
        data = [row for row in csv_reader if row["Severity"] in ["1", "2"]]

    # Extract latitude and longitude coordinates
    coordinates = [[float(row["Latitude"]), float(row["Longitude"])] for row in data]

    # Perform clustering analysis using DBSCAN
    dbscan = DBSCAN(eps=0.01, min_samples=5)
    clusters = dbscan.fit_predict(coordinates)

    # Count the number of accidents in each cluster
    cluster_counts = Counter(clusters)

    # Calculate the center coordinates of each cluster
    cluster_centers = {}
    for idx, cluster_id in enumerate(clusters):
        if cluster_id != -1:  # Exclude noise points
            if cluster_id not in cluster_centers:
                cluster_centers[cluster_id] = [coordinates[idx]]
            else:
                cluster_centers[cluster_id].append(coordinates[idx])

    for cluster_id, center_coords in cluster_centers.items():
        latitudes = [coord[0] for coord in center_coords]
        longitudes = [coord[1] for coord in center_coords]
        cluster_centers[cluster_id] = [sum(latitudes) / len(latitudes), sum(longitudes) / len(longitudes)]

    # Print out the top high-risk areas with their coordinates and the number of severe accidents in each area
    print("Top High-Risk Areas:")
    for cluster_id, count in cluster_counts.most_common(5):
        if cluster_id != -1:  # Exclude noise points
            center_lat, center_lon = cluster_centers[cluster_id]
            print(f"Cluster {cluster_id}: {count} severe accidents")
            print(f"  Latitude: {center_lat:.6f}, Longitude: {center_lon:.6f}")

def analyze_contributing_factors(unified_csv_file: str) -> None:
    # Read the unified dataset and extract the accidents with severity 1 or 2
    data = []
    with open(unified_csv_file, "r") as file:
        csv_reader = csv.DictReader(file)
        data = [row for row in csv_reader if row["Severity"] in ["1", "2"]]

    # Analyze the contributing factors and calculate their frequency
    contributing_factors = [
        "RoadSurface", "LightingCondition", "WeatherCondition", "JunctionDetail"
    ]
    factor_counts = {factor: Counter() for factor in contributing_factors}

    for row in data:
        for factor in contributing_factors:
            factor_counts[factor][row[factor]] += 1

    # Print out the top contributing factors associated with severe accidents
    print("Top Contributing Factors:")
    for factor, counts in factor_counts.items():
        print(f"{factor}:")
        for value, count in counts.most_common(3):
            print(f"- {value}: {count} accidents")
        print()

def main() -> None:
    unified_dataset_file = "./data/unified_dataset.csv"
    output_map_file = "./data/severity_1_accident_map.html"

    render_data_on_map(unified_dataset_file, output_map_file)
    analyze_high_risk_areas(unified_dataset_file)
    analyze_contributing_factors(unified_dataset_file)

if __name__ == "__main__":
    main()

Map saved to ./data/severity_1_accident_map.html
977 accidents with severity 1 in Manchester.
Top High-Risk Areas:
Cluster 0: 10191 severe accidents
  Latitude: 53.501460, Longitude: -2.275125
Cluster 1: 101 severe accidents
  Latitude: 53.592716, Longitude: -2.549845
Cluster 4: 37 severe accidents
  Latitude: 53.411820, Longitude: -2.435307
Cluster 3: 30 severe accidents
  Latitude: 53.585847, Longitude: -2.042128
Top Contributing Factors:
RoadSurface:
- 1: 7391 accidents
- 2: 3057 accidents
- 4: 139 accidents

LightingCondition:
- 1: 5959 accidents
- 4: 3501 accidents
- 2: 508 accidents

WeatherCondition:
- 1: 8524 accidents
- 2: 1333 accidents
- 9: 314 accidents

JunctionDetail:
- 3: 4118 accidents
- 0: 3531 accidents
- 6: 1896 accidents

