In [None]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
import numpy as np

# Assuming the same preparation of coordinates
coords = np.vstack((san_fransisco_df_copy_filtered[['start_station_latitude', 'start_station_longitude']].values,
                    san_fransisco_df_copy_filtered[['end_station_latitude', 'end_station_longitude']].values))

# Decide on a range of cluster numbers to evaluate
n_clusters_range = range(2, 11)

# Variables to store results
silhouette_scores = []

for n_clusters in n_clusters_range:
    clustering = AgglomerativeClustering(n_clusters=n_clusters)
    labels = clustering.fit_predict(coords)

    # Compute the silhouette score
    score = silhouette_score(coords, labels)
    silhouette_scores.append(score)

# Find the optimal number of clusters based on silhouette score
optimal_n_clusters = n_clusters_range[silhouette_scores.index(max(silhouette_scores))]
print(f"Optimal number of clusters: {optimal_n_clusters}")

In [None]:
import matplotlib.pyplot as plt

# Assuming 'optimal_labels' contains the cluster labels for both start and end locations
# and that the labels are ordered such that the first half corresponds to start locations
# and the second half to end locations
n_clusters = 2
clustering = AgglomerativeClustering(n_clusters=n_clusters)
optimal_labels = clustering.fit_predict(coords)

# Split the dataset for start and end locations for the sake of example visualization
# In a real scenario, these would be your actual start and end points
midpoint = len(optimal_labels) // 2
start_labels = optimal_labels[:midpoint]
end_labels = optimal_labels[midpoint:]
start_coords = coords[:midpoint]
end_coords = coords[midpoint:]

# Plotting
plt.figure(figsize=(14, 7))

# Plot for start locations
plt.subplot(1, 2, 1)
plt.scatter(start_coords[:, 1], start_coords[:, 0], c=start_labels, cmap='viridis', alpha=0.5)
plt.title('Start Locations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.colorbar(label='Cluster ID')

# Plot for end locations
plt.subplot(1, 2, 2)
plt.scatter(end_coords[:, 1], end_coords[:, 0], c=end_labels, cmap='viridis', alpha=0.5)
plt.title('End Locations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.colorbar(label='Cluster ID')

plt.tight_layout()
plt.show()

In [None]:
import folium
from folium.plugins import MarkerCluster

# Assuming 'coords' is an array of your coordinates and 'optimal_labels' contains the cluster labels
# For the sake of visualization, let's split the 'coords' into 'start_coords' and 'end_coords'
# and 'optimal_labels' into 'start_labels' and 'end_labels', as done previously
midpoint = len(coords) // 2
start_coords = coords[:midpoint]
end_coords = coords[midpoint:]
start_labels = optimal_labels[:midpoint]
end_labels = optimal_labels[midpoint:]

# Initialize a map centered around the average location
map_center = [np.mean(coords[:, 0]), np.mean(coords[:, 1])]
folium_map = folium.Map(location=map_center, zoom_start=12)

# Define colors for clusters (extend this list if you have more than 2 clusters)
cluster_colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

# Add start location markers to the map
for i, (lat, lon) in enumerate(start_coords):
    cluster_label = start_labels[i]
    folium.CircleMarker(location=(lat, lon),
                        radius=5,
                        color=cluster_colors[cluster_label],
                        fill=True,
                        fill_color=cluster_colors[cluster_label],
                        fill_opacity=0.6,
                        popup=f'Start Cluster: {cluster_label}').add_to(folium_map)

# Optionally, add end location markers to the map with a different marker
for i, (lat, lon) in enumerate(end_coords):
    cluster_label = end_labels[i]
    folium.CircleMarker(location=(lat, lon),
                        radius=5,
                        color=cluster_colors[cluster_label],
                        fill=True,
                        fill_color=cluster_colors[cluster_label],
                        fill_opacity=0.6,
                        popup=f'End Cluster: {cluster_label}').add_to(folium_map)

# Display the map
folium_map