In [None]:
import folium
from folium.plugins import HeatMap
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [None]:
def run_dashboard(df: pd.DataFrame):
    """
    Launch a Streamlit dashboard that provides interactive filtering of crime data,
    displays summary statistics, and shows geo-spatial maps.
    """
    import streamlit as st
    from streamlit_folium import st_folium

    st.title("CityX Crime Analysis Dashboard")
    st.sidebar.header("Filters")
    district_filter = st.sidebar.multiselect("Select District(s):",
                                             options=df["PdDistrict"].unique(),
                                             default=df["PdDistrict"].unique())
    crime_filter = st.sidebar.multiselect("Select Crime Category:",
                                          options=df["Category"].unique(),
                                          default=df["Category"].unique())

    filtered_df = df[(df["PdDistrict"].isin(district_filter)) & (df["Category"].isin(crime_filter))]

    st.subheader("Crime Data Overview")
    st.write(filtered_df.head())
    st.write(f"Total crimes: {len(filtered_df)}")

    st.subheader("Crime Clusters Map")
    create_cluster_map(filtered_df, output_html="temp_cluster_map.html")
    st_folium(folium.Map(), width=700, height=500)
    from google.colab import files
    files.download('temp_cluster_map.html')



In [None]:
def create_heatmap(df: pd.DataFrame, output_html: str = "crime_heatmap.html"):
    """Generate and save an interactive heatmap of crime incidents using Folium."""
    center_coords = [df["Latitude"].mean(), df["Longitude"].mean()]
    crime_map = folium.Map(location=center_coords, zoom_start=12)
    heat_data = df[["Latitude", "Longitude"]].values.tolist()
    HeatMap(heat_data, radius=15, blur=10).add_to(crime_map)
    crime_map.save(output_html)
    print(f"Heatmap saved as {output_html}")



def create_cluster_map(df: pd.DataFrame, output_html: str = "crime_clusters.html"):
    """Generate and save an interactive cluster map using Folium with DBSCAN hotspot detection."""
    # Clean data
    df_clean = df.dropna(subset=["Latitude", "Longitude"])
    center_coords = [df_clean["Latitude"].mean(), df_clean["Longitude"].mean()]

    # Create base map
    cluster_map = folium.Map(location=center_coords, zoom_start=12, tiles="cartodbpositron")

    # Create marker clusters by severity
    severity_groups = df_clean.groupby('Severity')
    for severity, group in severity_groups:
        marker_cluster = MarkerCluster(name=f"Severity {severity}").add_to(cluster_map)

        # Determine color based on severity
        if severity >= 5:
            color = 'red'
            icon = 'exclamation-triangle'
        elif severity >= 4:
            color = 'purple'
            icon = 'exclamation-triangle'
        elif severity >= 3:
            color = 'orange'
            icon = 'info-sign'
        elif severity >= 2:
            color = 'blue'
            icon = 'info-sign'
        else:
            color = 'green'
            icon = 'info-sign'

        # Add markers for each crime in this severity group
        for idx, row in group.iterrows():
            popup_content = (f"<b>Category:</b> {row['Category']}<br>"
                            f"<b>Date:</b> {row['Dates']}<br>"
                            f"<b>Severity:</b> {severity}<br>"
                            f"<b>Address:</b> {row.get('Address', 'Unknown')}")
            folium.Marker(
                location=[row["Latitude"], row["Longitude"]],
                popup=folium.Popup(popup_content, max_width=300),
                icon=folium.Icon(color=color, icon=icon)
            ).add_to(marker_cluster)

    # Add heat map layer
    heat_data = [[row['Latitude'], row['Longitude']] for idx, row in df_clean.iterrows()]
    HeatMap(heat_data).add_to(cluster_map)

    # Identify hotspots using DBSCAN clustering
    coords = df_clean[['Latitude', 'Longitude']].values
    # Standardize the data
    coords_scaled = StandardScaler().fit_transform(coords)

    # Precompute neighbors with sparse output
    neighbors = NearestNeighbors(n_neighbors=4, radius=0.1, metric='euclidean')
    neighbors.fit(coords_scaled)

    # Use precomputed sparse distance matrix for DBSCAN
    distance_matrix = neighbors.kneighbors_graph(coords_scaled, mode='distance')

    # Fit DBSCAN with the sparse distance matrix
    db = DBSCAN(eps=0.1, min_samples=4, metric='precomputed').fit(distance_matrix)
    df_clean['cluster'] = db.labels_

    # Highlight cluster centers as hotspots
    clusters = {}
    for cluster_id in set(db.labels_):
        if cluster_id != -1:  # Skip noise points
            mask = df_clean['cluster'] == cluster_id
            clusters[cluster_id] = {
                'center': [df_clean.loc[mask, 'Latitude'].mean(), df_clean.loc[mask, 'Longitude'].mean()],
                'count': mask.sum()
            }

    # Add hotspot markers
    for cluster_id, info in clusters.items():
        folium.CircleMarker(
            location=info['center'],
            radius=10,
            popup=f'Hotspot #{cluster_id}: {info["count"]} incidents',
            color='black',
            fill=True,
            fill_color='yellow',
            fill_opacity=0.7
        ).add_to(cluster_map)

    # Add layer control
    folium.LayerControl().add_to(cluster_map)

    # Save the map
    cluster_map.save(output_html)
    print(f"Cluster map saved as {output_html}")

    return cluster_map



In [None]:


def create_cluster_map(df: pd.DataFrame, output_html: str = "crime_clusters.html"):
    """Generate and save an interactive cluster map using Folium with DBSCAN hotspot detection.
       This version uses GeoJSON for marker data to reduce the overall file size.
    """
    # Clean data
    df_clean = df.dropna(subset=["Latitude", "Longitude"])
    center_coords = [df_clean["Latitude"].mean(), df_clean["Longitude"].mean()]

    # Create base map
    cluster_map = folium.Map(location=center_coords, zoom_start=12, tiles="cartodbpositron")

    # Create GeoDataFrame from the cleaned dataframe
    gdf = gpd.GeoDataFrame(
        df_clean,
        geometry=gpd.points_from_xy(df_clean['Longitude'], df_clean['Latitude'])
    )

    # keep Category, Dates, Severity, and Address
    gdf = gdf[['Category', 'Dates', 'Severity', 'Address', 'geometry']]

    # Save the GeoDataFrame as a GeoJSON file
    geojson_filename = 'crime_data.geojson'
    gdf.to_file(geojson_filename, driver='GeoJSON')

    # Add GeoJSON to the Folium map (this will load the data more efficiently)
    folium.GeoJson(
        geojson_filename,
        name="Crime Data"
    ).add_to(cluster_map)

    # Add heat map layer for additional context (using only coordinates)
    heat_data = [[row['Latitude'], row['Longitude']] for idx, row in df_clean.iterrows()]
    HeatMap(heat_data).add_to(cluster_map)

    # Identify hotspots using DBSCAN clustering
    coords = df_clean[['Latitude', 'Longitude']].values
    # Standardize the data
    coords_scaled = StandardScaler().fit_transform(coords)

    # Precompute neighbors with sparse output
    neighbors = NearestNeighbors(n_neighbors=4, radius=0.1, metric='euclidean')
    neighbors.fit(coords_scaled)

    # Use precomputed sparse distance matrix for DBSCAN
    distance_matrix = neighbors.kneighbors_graph(coords_scaled, mode='distance')

    # Fit DBSCAN with the sparse distance matrix
    db = DBSCAN(eps=0.1, min_samples=4, metric='precomputed').fit(distance_matrix)
    df_clean['cluster'] = db.labels_

    clusters = {}
    for cluster_id in set(db.labels_):
        if cluster_id != -1:  # Skip noise points
            mask = df_clean['cluster'] == cluster_id
            clusters[cluster_id] = {
                'center': [df_clean.loc[mask, 'Latitude'].mean(), df_clean.loc[mask, 'Longitude'].mean()],
                'count': mask.sum()
            }

    # Add hotspot markers as circle markers
    for cluster_id, info in clusters.items():
        folium.CircleMarker(
            location=info['center'],
            radius=10,
            popup=f'Hotspot #{cluster_id}: {info["count"]} incidents',
            color='black',
            fill=True,
            fill_color='yellow',
            fill_opacity=0.7
        ).add_to(cluster_map)

    # Add layer control
    folium.LayerControl().add_to(cluster_map)

    # Save the map to HTML
    cluster_map.save(output_html)
    print(f"Cluster map saved as {output_html}")

    return cluster_map


In [None]:
import pandas as pd
import folium
from folium.plugins import HeatMap, MarkerCluster
from streamlit_folium import st_folium

df = pd.read_csv('cleaned_data.csv')

df = df.rename(columns={
    'Latitude (Y)': 'Longitude',  # Contains longitude values (-122.5 to -120.5)
    'Longitude (X)': 'Latitude'   # Contains latitude values (37.7 to 90.0)
})


In [None]:
# Create geo-spatial visualizations
create_heatmap(df)




In [None]:
create_cluster_map(df)

  write(


Cluster map saved as crime_clusters.html
Buffered data was truncated after reaching the output size limit.

In [None]:
run_dashboard(df)



NameError: name 'create_cluster_map' is not defined