In [1]:
import pandas as pd
from geopy.distance import geodesic

In [2]:


# Define POIs and their coordinates
pois = {
    'KLCC Twin Towers': (3.15785, 101.7123),
    'Menara Kuala Lumpur': (3.1528, 101.7039),
    'TRX Exchange Tower': (3.1579, 101.7165),
    'Menara 106': (3.1394, 101.7049),
    'Lake Gardens': (3.139, 101.681),
    'KL Forest Eco Park': (3.151, 101.702),
    'Titiwangsa Lake Gardens': (3.174, 101.704),
    'National Mosque': (3.1411, 101.6935),
    'Thean Hou Temple': (3.1175, 101.6878),
    'Batu Caves': (3.237, 101.683),
    'Muzium Negara': (3.1369, 101.6867),
    'Pavilion Kuala Lumpur': (3.1496, 101.7124),
    'Changkat Bukit Bintang': (3.147, 101.708),
    'Petaling Street': (3.1445, 101.6955),
    'Central Market': (3.1449, 101.6947),
    'Aquaria KLCC': (3.1575, 101.7125),
    'Zoo Negara': (3.212, 101.756),
    'KidZania Kuala Lumpur': (3.157, 101.598),
    'Sunway Lagoon': (3.073, 101.607),
    'Berjaya Times Square': (3.1425, 101.709),
    'Sultan Abdul Samad Building': (3.1475, 101.6935),
    'Dataran Merdeka': (3.147, 101.693),
    'Istana Negara': (3.137, 101.684)
}


# Function to find nearby POIs for a given location
def find_nearby_pois(lat, lon, pois, threshold=2000):
    stop_location = (lat, lon)
    nearby = []
    for poi_name, poi_location in pois.items():
        distance = geodesic(stop_location, poi_location).meters
        if distance <= threshold:
            nearby.append((poi_name, distance))
    return sorted(nearby, key=lambda x: x[1])

# Load GTFS data files
stops = pd.read_csv("stops.csv")
stop_times = pd.read_csv("stop_times.csv")

# Attach nearby POIs to stops
stops['nearby_pois'] = stops.apply(
    lambda row: find_nearby_pois(row['stop_lat'], row['stop_lon'], pois), axis=1
)

# Merge stop_times with updated stops to get POIs for stops in trips
stop_times_with_pois = stop_times.merge(
    stops[['stop_id', 'nearby_pois']], on='stop_id', how='left'
)

# Aggregate POIs for each route
routes_with_pois = (
    stop_times_with_pois.groupby('route_id')['nearby_pois']
    .apply(lambda x: {poi for pois in x if isinstance(pois, list) for poi in pois})
    .reset_index()
    .rename(columns={'nearby_pois': 'route_pois'})
)

# Convert POIs set to a list for readability
routes_with_pois['route_pois'] = routes_with_pois['route_pois'].apply(list)

# Aggregate POIs for each trip
trips_with_pois = (
    stop_times_with_pois.groupby('trip_id')['nearby_pois']
    .apply(lambda x: {poi for pois in x if isinstance(pois, list) for poi in pois})
    .reset_index()
    .rename(columns={'nearby_pois': 'trip_pois'})
)

# Convert POIs set to a list for readability
trips_with_pois['trip_pois'] = trips_with_pois['trip_pois'].apply(list)

# Save results to CSV files
routes_with_pois.to_csv("routes_with_pois.csv", index=False)
trips_with_pois.to_csv("trips_with_pois.csv", index=False)

print("Files generated: routes_with_pois.csv and trips_with_pois.csv")


Files generated: routes_with_pois.csv and trips_with_pois.csv
