In [2]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Define the file path
file_path = "crime_data.csv"

# Attempt to load the crime dataset
try:
    crime_data = pd.read_csv(file_path)
    print("Crime dataset loaded successfully!")
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please provide the correct file path.")

# Proceed with the analysis if the dataset is loaded successfully
if 'crime_data' in locals():
    # Preprocessing: Handle missing values, encode categorical variables, etc. (if needed)

    # Select relevant features for clustering
    features = ['Population', 'Unemployment Rate', 'Education Level', 'Income Level', 'Crime Rate']

    # Normalize the features
    normalized_data = (crime_data[features] - crime_data[features].mean()) / crime_data[features].std()

    # Perform K-means clustering
    kmeans = KMeans(n_clusters=3, random_state=42)
    clusters = kmeans.fit_predict(normalized_data)

    # Add cluster labels to the dataset
    crime_data['Cluster'] = clusters

    # Separate data for each cluster
    cluster_1_data = crime_data[crime_data['Cluster'] == 0]
    cluster_2_data = crime_data[crime_data['Cluster'] == 1]
    cluster_3_data = crime_data[crime_data['Cluster'] == 2]

    # Visualize the clusters (optional)
    plt.scatter(normalized_data['Population'], normalized_data['Crime Rate'], c=clusters, cmap='viridis')
    plt.xlabel('Population (Normalized)')
    plt.ylabel('Crime Rate (Normalized)')
    plt.title('Clustering of Districts based on Crime Rate and Population')
    plt.show()

    # Analysis and Report
    # Example:
    # - Analyze socio-economic factors contributing to higher crime rates in sensitive areas.
    # - Recommend interventions to reduce crime based on cluster characteristics.
    # - Identify safest and most unsafe districts within each cluster.
    # - Visualize key findings using plots or graphs.

    # Print DataFrame for each cluster
    print("Cluster 1 Data:")
    print(cluster_1_data.head())

    print("\nCluster 2 Data:")
    print(cluster_2_data.head())

    print("\nCluster 3 Data:")
    print(cluster_3_data.head())


Error: File 'crime_data.csv' not found. Please provide the correct file path.
