In [7]:
!pip install xlrd

Collecting xlrd
  Downloading xlrd-2.0.1-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
   ---------------------------------------- 0.0/96.5 kB ? eta -:--:--
   ------------ --------------------------- 30.7/96.5 kB 1.3 MB/s eta 0:00:01
   ---------------------------------------- 96.5/96.5 kB 1.8 MB/s eta 0:00:00
Installing collected packages: xlrd
Successfully installed xlrd-2.0.1


In [13]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.impute import SimpleImputer
import folium

def load_data(filepath):
    """Load the dataset and handle any missing values."""
    try:
        dataset = pd.read_excel(filepath)
        print("Before handling missing values:")
        print(dataset.isnull().sum())
        return dataset
    except Exception as e:
        print("Error loading data:", e)
        return None

def handle_missing_values(dataset, columns):
    """Fill missing values in specified columns using the mean strategy."""
    imputer = SimpleImputer(strategy='mean')
    dataset[columns] = imputer.fit_transform(dataset[columns])
    print("After handling missing values:")
    print(dataset.isnull().sum())
    return dataset

def perform_clustering(dataset, features, num_clusters=3):
    """Perform KMeans clustering and add cluster labels to the dataset."""
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(features)
    dataset['cluster'] = cluster_labels
    return dataset, dataset['cluster'].value_counts()

def create_map(dataset, cluster_counts, output_file="cluster_map2.html"):
    """Create a folium map with clusters and save it as an HTML file."""
    map_center = [dataset['latitude'].mean(), dataset['longitude'].mean()]
    map_clusters = folium.Map(location=map_center, zoom_start=5)
    colors = ['orange', 'blue', 'green']

    # Plot each data point on the map with a color representing its cluster
    for _, row in dataset.iterrows():
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color=colors[row['cluster']],
            fill=True,
            fill_color=colors[row['cluster']],
            fill_opacity=0.7,
            popup=f"Cluster: {row['cluster']}<br>Incidents: {cluster_counts[row['cluster']]}",
        ).add_to(map_clusters)

    # Save and display the map
    map_clusters.save(output_file)
    print(f"Map saved as '{output_file}'")
    return map_clusters

def main(filepath):
    # Load the data
    dataset = load_data(filepath)
    if dataset is None:
        return

    # Handle missing values in latitude and longitude
    dataset = handle_missing_values(dataset, ['latitude', 'longitude'])

    # Perform clustering
    features = dataset[['latitude', 'longitude']]
    dataset, cluster_counts = perform_clustering(dataset, features)

    # Create and save the map
    map_clusters = create_map(dataset, cluster_counts)

    # Display cluster counts
    print("Number of incidents in each cluster:")
    print(cluster_counts)

# Filepath for the dataset
if __name__ == "__main__":
    filepath = r"C:\Users\ashwi\Downloads\fatal-police-shootings-data-continental-us.xls"
    main(filepath)
    
# Save the map as an HTML file
map_clusters.save("cluster_map2.html")

# Display the map
map_clusters

Before handling missing values:
id                           0
name                       262
date                         0
manner_of_death              0
armed                      203
age                        304
gender                       3
race                       711
city                         0
state                        0
signs_of_mental_illness      0
threat_level                 0
flee                       472
body_camera                  0
latitude                   303
longitude                  303
is_geocoding_exact           0
dtype: int64
After handling missing values:
id                           0
name                       262
date                         0
manner_of_death              0
armed                      203
age                        304
gender                       3
race                       711
city                         0
state                        0
signs_of_mental_illness      0
threat_level                 0
flee                     