In [None]:
import pandas as pd
import numpy as np

# Create a DataFrame with 20 rows of random latitude, longitude, and accident numbers
np.random.seed(42)  # For reproducible results

# Generate random latitudes and longitudes within a specific range (e.g., around a city)
latitudes = np.random.uniform(low=28.4, high=28.9, size=150)
longitudes = np.random.uniform(low=77.0, high=77.5, size=150)
accidents = np.random.randint(low=1, high=50, size=150)

# Create the DataFrame
data = {
    'lat': latitudes,
    'lon': longitudes,
    'accidents': accidents
}

df = pd.DataFrame(data)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Generate a straight highway path
highway_lat_start = 28.4
highway_lat_end = 28.9
highway_lon_start = 77.0
highway_lon_end = 77.5

highway_latitudes = np.linspace(highway_lat_start, highway_lat_end, 100)
highway_longitudes = np.linspace(highway_lon_start, highway_lon_end, 100)

# Project accidents onto the highway path
projected_lats = np.interp(df['lon'], [highway_lon_start, highway_lon_end], [highway_lat_start, highway_lat_end])
df['projected_lat'] = projected_lats

In [None]:
# Plot the highway path
plt.figure(figsize=(12, 8))
plt.plot(highway_longitudes, highway_latitudes, 'k-', label='Highway Path')

# Plot accident locations as bubbles on the highway path
bubble_sizes = df['accidents'] * 10  # Scale bubble sizes for better visibility
plt.scatter(df['lon'], df['projected_lat'], s=bubble_sizes, c='red', alpha=0.5, label='Accidents')

# Adding labels and title
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Highway Path with Accident Locations')
plt.legend()
plt.grid(True)

# Display the plot
plt.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt


# Step 1: Remove Spatial Outliers
coords = df[['lat', 'lon']].values

# Compute pairwise distances
distances = cdist(coords, coords, metric='euclidean')
mean_distance = np.mean(distances)
std_distance = np.std(distances)

# Identify outliers (points with distances greater than mean + 3*std)
outlier_threshold = mean_distance + 3 * std_distance
outliers = np.any(distances > outlier_threshold, axis=1)

# Remove outliers
filtered_coords = coords[~outliers]
filtered_data = df[~outliers]

# Step 2: Clustering Using KMeans
# We will use the Elbow Method to determine the optimal number of clusters

# Define a function to calculate WSS (within-cluster sum of squares) for different k values
def calculate_wss(data, max_k):
    wss = []
    for k in range(1, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(data)
        wss.append(kmeans.inertia_)
    return wss

# Calculate WSS for k values from 1 to 20
max_k = 20
wss = calculate_wss(filtered_coords, max_k)

# Plot the WSS to find the elbow point
plt.plot(range(1, max_k + 1), wss, marker='o')
plt.xlabel('Number of Clusters')
plt.ylabel('Within-Cluster Sum of Squares (WSS)')
plt.title('Elbow Method for Optimal Number of Clusters')
plt.show()

In [None]:
optimal_k = 3  # This value should be chosen based on the elbow plot
# Perform KMeans clustering with the optimal number of clusters
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(filtered_coords)
filtered_data['cluster'] = kmeans.labels_

# Plot the clusters
plt.scatter(filtered_data['lon'], filtered_data['lat'], c=filtered_data['cluster'], cmap='rainbow')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Accident Clusters')
plt.show()

In [None]:
optimal_k=5
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
df['cluster'] = kmeans.fit_predict(accident_coords)

# Step 4: Visualizing Clusters
plt.figure(figsize=(12, 8))
plt.plot(highway_longitudes, highway_latitudes, 'k-', label='Highway Path')

# Plot each cluster with a different color
for cluster in range(optimal_k):
    cluster_data = df[df['cluster'] == cluster]
    plt.scatter(cluster_data['lon'], cluster_data['projected_lat'], s=cluster_data['accidents'] * 10, alpha=0.5, label=f'Cluster {cluster + 1}')

# Adding labels and title
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Highway Path with Accident Clusters')
plt.legend()
plt.grid(True)

# Display the plot
plt.show()