In [2]:
import pandas as pd
import random
from datetime import datetime, timedelta

# --- Step 1: Generate a Set Map of Feasible Bus Stops (latitude, longitude) ---

# Simulate 30 feasible bus stop locations in a small city grid
random.seed(42)
base_lat, base_lon = 43.65, -79.38  # Central Toronto-style coordinates
bus_stops = []

for i in range(30):
    bus_stops.append({
        'stop_id': f"Stop_{i+1}",
        'latitude': round(base_lat + random.uniform(-0.01, 0.01), 6),
        'longitude': round(base_lon + random.uniform(-0.01, 0.01), 6)
    })

df_stops = pd.DataFrame(bus_stops)

# --- Step 2: Define 3 Bus Routes (each using a subset of the 30 stops) ---

routes = {
    "Route_A": df_stops.iloc[0:10].copy(),
    "Route_B": df_stops.iloc[10:20].copy(),
    "Route_C": df_stops.iloc[20:30].copy()
}

# Assign route and order to each stop
for route_name, route_df in routes.items():
    route_df['route'] = route_name
    route_df['order'] = range(1, len(route_df) + 1)
    routes[route_name] = route_df

# Combine all into a master route-stop map
df_routes = pd.concat(routes.values(), ignore_index=True)

# --- Step 3: Generate 300 Feedback Samples Spread Across These 30 Stops ---

remarks_positive = ["Very punctual", "Great service", "Driver was friendly", "Smooth ride", "Always on time"]
remarks_negative = ["Too late", "Bus was crowded", "Terrible timing", "Driver was rude", "Unreliable service"]
remarks_neutral  = ["Okay", "Average", "Nothing special", "Routine trip", "Mediocre experience"]
all_remarks = remarks_positive + remarks_negative + remarks_neutral

samples = []
start_time = datetime.strptime("2025-07-22 18:00", "%Y-%m-%d %H:%M")

for _ in range(300):
    stop_row = df_routes.sample(1).iloc[0]
    route = stop_row['route']
    stop_id = stop_row['stop_id']
    timestamp = (start_time + timedelta(minutes=random.randint(0, 180))).strftime("%Y-%m-%d %H:%M:%S")
    remark = random.choice(all_remarks)
    wait_time = round(random.uniform(2, 25), 2)
    ride_time = round(random.uniform(10, 40), 2)
    rating = round(random.uniform(1.0, 5.0), 1)

    samples.append({
        "route": route,
        "stop_id": stop_id,
        "remarks": remark,
        "timestamp": timestamp,
        "wait_time": wait_time,
        "ride_time": ride_time,
        "rating": rating
    })

df_feedback = pd.DataFrame(samples)

# --- Step 4: Save the Generated Files ---

df_routes.to_csv("bus_route_stops_map.csv", index=False)
df_feedback.to_csv("sample_route_feedback.csv", index=False)

print("✅ Files generated:")
print("- bus_route_stops_map.csv")
print("- sample_route_feedback.csv")


✅ Files generated:
- bus_route_stops_map.csv
- sample_route_feedback.csv


In [3]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic

# Load current route stop map and feedback
df_routes = pd.read_csv("bus_route_stops_map.csv")
df_feedback = pd.read_csv("sample_route_feedback.csv")

# Step 1: Compute average wait time, ride time, rating, and feedback volume per stop
agg_stats = df_feedback.groupby(['route', 'stop_id']).agg(
    avg_wait_time=('wait_time', 'mean'),
    avg_ride_time=('ride_time', 'mean'),
    avg_rating=('rating', 'mean'),
    feedback_volume=('remarks', 'count')
).reset_index()

# Merge with stop coordinates
agg_with_coords = agg_stats.merge(df_routes, on=['route', 'stop_id'])

# Step 2: Generate a city grid of candidate coordinates for new bus stops
lat_min, lat_max = df_routes['latitude'].min(), df_routes['latitude'].max()
lon_min, lon_max = df_routes['longitude'].min(), df_routes['longitude'].max()

# Create a grid of lat/lon points (~200m apart)
grid_points = []
step = 0.002  # ≈200 meters
lat_vals = np.arange(lat_min, lat_max, step)
lon_vals = np.arange(lon_min, lon_max, step)

for lat in lat_vals:
    for lon in lon_vals:
        grid_points.append((lat, lon))

# Step 3: Filter candidate locations that are far from all existing stops
min_distance_km = 0.25  # New stops must be at least 250m away from all current ones
new_stop_suggestions = []

for lat, lon in grid_points:
    distances = df_routes.apply(
        lambda row: geodesic((lat, lon), (row['latitude'], row['longitude'])).km, axis=1
    )
    if distances.min() > min_distance_km:
        new_stop_suggestions.append({'latitude': lat, 'longitude': lon})

df_suggested_stops = pd.DataFrame(new_stop_suggestions)

# Step 4: Annotate each stop with active time buckets from feedback data
df_feedback['timestamp'] = pd.to_datetime(df_feedback['timestamp'])
df_feedback['hour'] = df_feedback['timestamp'].dt.hour
df_feedback['time_bucket'] = pd.cut(
    df_feedback['hour'],
    bins=[0, 6, 9, 12, 15, 18, 21, 24],
    labels=['Late Night', 'Morning Rush', 'Late Morning', 'Afternoon', 'Evening Rush', 'Evening', 'Night'],
    right=False
)

# Aggregate by stop and time_bucket
stop_time_activity = df_feedback.groupby(['stop_id', 'time_bucket'], observed=True).size().reset_index(name='feedback_count')

# Only keep high-activity periods (more than 3 feedbacks)
high_activity_periods = stop_time_activity[stop_time_activity['feedback_count'] > 3]

# Save outputs
agg_with_coords.to_csv("bus_stop_summary_with_coords.csv", index=False)
df_suggested_stops.to_csv("proposed_new_stop_coords.csv", index=False)
high_activity_periods.to_csv("stop_active_periods.csv", index=False)

print("✅ Analysis complete. Files saved:")
print("- bus_stop_summary_with_coords.csv")
print("- proposed_new_stop_coords.csv")
print("- stop_active_periods.csv")


✅ Analysis complete. Files saved:
- bus_stop_summary_with_coords.csv
- proposed_new_stop_coords.csv
- stop_active_periods.csv


In [28]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from sklearn.metrics.pairwise import haversine_distances
from math import radians

# Load required inputs
df_routes = pd.read_csv("bus_route_stops_map.csv")
df_suggested_stops = pd.read_csv("proposed_new_stop_coords.csv")
high_activity_periods = pd.read_csv("stop_active_periods.csv")

# Convert coordinates to radians
coords_existing = df_routes[['latitude', 'longitude']].applymap(radians).to_numpy()
coords_candidates = df_suggested_stops[['latitude', 'longitude']].applymap(radians).to_numpy()

# Compute Haversine distances (km)
distance_matrix = haversine_distances(coords_existing, coords_candidates) * 6371

# Parameters
min_improvement_distance = 0.25  # km
top_suggestions = []

# Match each stop to a better new suggestion
for i, stop_row in df_routes.iterrows():
    stop_id = stop_row['stop_id']
    route = stop_row['route']
    original_lat = stop_row['latitude']
    original_lon = stop_row['longitude']

    distances = distance_matrix[i]
    candidate_idxs = np.where(distances > min_improvement_distance)[0]

    if len(candidate_idxs) > 0:
        best_idx = candidate_idxs[np.argmin(distances[candidate_idxs])]
        new_stop = df_suggested_stops.iloc[best_idx]

        # Get most active time for this stop
        times = high_activity_periods[high_activity_periods['stop_id'] == stop_id]
        active_periods = times.sort_values('feedback_count', ascending=False)['time_bucket'].tolist()
        active_time = active_periods[0] if active_periods else 'General'

        top_suggestions.append({
            'original_stop_id': stop_id,
            'route': route,
            'original_lat': original_lat,
            'original_lon': original_lon,
            'suggested_lat': new_stop['latitude'],
            'suggested_lon': new_stop['longitude'],
            'active_time': active_time
        })

# Save final suggestions
df_top_suggestions = pd.DataFrame(top_suggestions)
df_top_suggestions.to_csv("final_top_stop_suggestions.csv", index=False)
print("✅ Saved: final_top_stop_suggestions.csv")

✅ Saved: final_top_stop_suggestions.csv


  coords_existing = df_routes[['latitude', 'longitude']].applymap(radians).to_numpy()
  coords_candidates = df_suggested_stops[['latitude', 'longitude']].applymap(radians).to_numpy()
