In [None]:
## @author: Busra Zenbilci 
## 20170808054

In [None]:
import pandas as pd
import numpy as np
import folium
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy.spatial import Delaunay
from sklearn.metrics import silhouette_score

In [None]:
file_path = '24b_datamining_bsboarding (3).csv'
data = pd.read_csv(file_path)

In [None]:
features = data[['Latitude', 'Longitude']]

In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [None]:
# Silhouette for find to k-means
silhouette_scores = []
for k in range(2, 15):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans_labels = kmeans.fit_predict(scaled_features)
    silhouette_scores.append(silhouette_score(scaled_features, kmeans_labels))

In [None]:
# K-Means is 4
k_value = 4
kmeans = KMeans(n_clusters=k_value, random_state=42)
clusters = kmeans.fit_predict(scaled_features)

In [None]:
# Add results to dataset
data['Cluster'] = clusters

In [None]:
# Folium map
map_center = [data['Latitude'].mean(), data['Longitude'].mean()]
map_bus_stops = folium.Map(location=map_center, zoom_start=12)

In [None]:
# We use colors to show clusters
colors = ['#FF6347', '#87CEFA', '#FFD700', '#FF69B4', '#B0E0E6']

In [None]:
# Determining the size of circles proportional to BoardingCount
max_boarding_count = data['BoardingCount'].max()
min_radius = 3
max_radius = 30

In [None]:
# Add BusStops to the map
for idx, row in data.iterrows():
    cluster_color = colors[int(row['Cluster']) % len(colors)]
    # calculates the radius of a circle representing a bus stop based on the boarding count,
    # ensuring that stops with higher boarding counts are depicted with larger circles
    radius = min_radius + (max_radius - min_radius) * (row['BoardingCount'] / max_boarding_count)
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=radius,
        color=cluster_color,
        fill=True,
        fill_color=cluster_color,
        fill_opacity=0.6,
        popup=f"Bus Stop: {row['BusStop']}<br>Boarding Count: {row['BoardingCount']}<br>Cluster: {row['Cluster']}"
    ).add_to(map_bus_stops)

In [None]:
## Delaunay Triangulation method
# to connect points in a way that maximizes the minimum angle of the triangles,
# ensuring the most efficient and natural connections between neighboring points
points = data[['Latitude', 'Longitude']].values
tri = Delaunay(points)

In [None]:
# Add edges to the map
for simplex in tri.simplices:
    for i in range(3):
        start = points[simplex[i]]
        end = points[simplex[(i + 1) % 3]]
        distance = np.linalg.norm(start - end)
        # don't add only very distant and very close edges
        if distance <= 0.01 and distance >= 0.001:
            folium.PolyLine(
                locations=[start, end],
                color='gray',
                weight=1,
                opacity=0.7
            ).add_to(map_bus_stops)

In [None]:
# Save and show the map
map_bus_stops.save('bus_stops_kmeans_clusters.html')
map_bus_stops