In [1]:
from geopy.distance import geodesic
import pandas as pd

# Load Updated Apartments Dataset
apartments = pd.read_csv("apartments_data_enriched_lat_lon_combined.csv")
stations = pd.read_csv("zurich_transport_stations.csv")  # Your public transport dataset

# Ensure all required columns exist
if "lat" not in apartments.columns or "lon" not in apartments.columns:
    raise ValueError("🚨 The dataset must have 'latitude' and 'longitude' columns.")

def nearest_station(lat, lon):
    """Finds the nearest transport station and its distance"""
    min_distance = float("inf")
    nearest_station_name = ""

    for _, station in stations.iterrows():
        station_coords = (station["lat"], station["lon"])
        distance = geodesic((lat, lon), station_coords).km
        
        if distance < min_distance:
            min_distance = distance
            nearest_station_name = station["name"]

    return min_distance, nearest_station_name

# Apply function to each apartment
apartments[["distance_to_transport", "nearest_station"]] = apartments.apply(
    lambda row: nearest_station(row["lat"], row["lon"]), axis=1, result_type="expand"
)

# Save Updated Dataset
apartments.to_csv("apartments_with_transport.csv", index=False)
print("✅ Saved: apartments_with_transport.csv with new transport feature")

✅ Saved: apartments_with_transport.csv with new transport feature
