In [10]:
import pandas as pd
from sklearn.cluster import KMeans
import folium
from IPython.display import IFrame, display

# Load the dataset
def load_data(filepath):
    return pd.read_csv(filepath)

# Perform KMeans clustering
def perform_clustering(data, num_clusters):
    features = data[['latitude', 'longitude']]
    kmeans = KMeans(n_clusters=num_clusters)
    data['cluster'] = kmeans.fit_predict(features)
    return data, kmeans.cluster_centers_

# Count incidents in each cluster
def get_cluster_counts(data):
    return data['cluster'].value_counts().to_dict()

# Generate a folium map with clusters
def create_cluster_map(data, cluster_counts, map_center, colors):
    map_clusters = folium.Map(location=map_center, zoom_start=5)
    for _, row in data.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color=colors[row['cluster']],
            fill=True,
            fill_color=colors[row['cluster']],
            fill_opacity=0.7,
            popup=f"Cluster: {row['cluster']}<br>Incidents: {cluster_counts[row['cluster']]}",
        ).add_to(map_clusters)
    return map_clusters

# Main function
def main():
    # File path and parameters
    filepath = r"/Users/charishyadavali/Downloads/IndiaPoliticalViolence(no Islands_Kashmir_Ladakh).csv"
    num_clusters = 4
    colors = ['red', 'blue', 'green', 'yellow']

    # Load data and perform clustering
    dataset = load_data(filepath)
    dataset, cluster_centers = perform_clustering(dataset, num_clusters)

    # Get cluster incident counts
    cluster_counts = get_cluster_counts(dataset)

    # Create map centered on mean latitude and longitude
    map_center = [dataset['latitude'].mean(), dataset['longitude'].mean()]
    cluster_map = create_cluster_map(dataset, cluster_counts, map_center, colors)
    
    # Save the map as an HTML file
    map_path = "cluster_map.html"
    cluster_map.save(map_path)
    print("Map saved as cluster_map.html")
    
    # Display the map inline using an IFrame
    display(IFrame(map_path, width=900, height=800))

    # Display the number of incidents in each cluster
    print("Number of incidents in each cluster:")
    print(pd.Series(cluster_counts))

# Run the main function
if __name__ == "__main__":
    main()


  super()._check_params_vs_input(X, default_n_init=10)


Map saved as cluster_map.html


Number of incidents in each cluster:
3    11162
0    10080
2     8011
1     7130
dtype: int64
