 ### Public Transportation (FINISHED)

In [13]:
#pip install geopandas


In [14]:
#pip install overpy


In [15]:
import overpy

api = overpy.Overpass()

# Query for public transport stations in Denmark using the bounding box
result = api.query("""
[out:json];
(
  node["public_transport"="station"](54.5,8.0,57.9,15.2);
  way["public_transport"="station"](54.5,8.0,57.9,15.2);
  relation["public_transport"="station"](54.5,8.0,57.9,15.2);
);
(._;>;);
out body;
""")

# Structure the data
nodes = [{'name': station.tags.get('name', 'n/a'), 'location': (station.lon, station.lat)} for station in result.nodes]
ways = [{'name': way.tags.get('name', 'n/a'), 'nodes': [(node.lon, node.lat) for node in way.nodes]} for way in result.ways]

# For relations, fetch associated ways from the result
relations = []
for relation in result.relations:
    relation_ways = [way for way in result.ways if way.id in [member.ref for member in relation.members if member.role == "outer"]]
    relations.append({'name': relation.tags.get('name', 'n/a'), 'ways': relation_ways})


In [16]:
import csv

def get_centroid(nodes):
    """Calculate the centroid of a list of nodes."""
    x_coords = [node[0] for node in nodes]
    y_coords = [node[1] for node in nodes]
    centroid_x = sum(x_coords) / len(nodes)
    centroid_y = sum(y_coords) / len(nodes)
    return (centroid_x, centroid_y)

stations_centroids = {}

# Handle nodes:
for node in nodes:
    station_name = node['name']
    if station_name not in stations_centroids:
        stations_centroids[station_name] = node['location']

# Handle ways:
for way in ways:
    station_name = way['name']
    if station_name not in stations_centroids:
        stations_centroids[station_name] = get_centroid(way['nodes'])

# Handle relations:
for relation in relations:
    station_name = relation['name']
    if station_name not in stations_centroids:
        relation_centroids = [get_centroid([(node.lon, node.lat) for node in way.nodes]) for way in relation['ways']]
        stations_centroids[station_name] = get_centroid(relation_centroids)


In [17]:
import csv

# Combine all data
all_centroids = {
    "Public Transport Stations": stations_centroids,
}

# Write data to CSV with UTF-8 encoding
with open("public_transport_stations.csv", "w", newline='', encoding='utf-8') as csv_file:
    writer = csv.writer(csv_file)
    # Write header
    writer.writerow(["Type", "Name", "Longitude", "Latitude"])
    
    for category, centroids in all_centroids.items():
        for name, location in centroids.items():
            if location != "No Centroid":
                writer.writerow([category, name, location[0], location[1]])
            else:
                writer.writerow([category, name, "No Centroid", "No Centroid"])

print("Data written to 'public_transport_stations.csv'")


Data written to 'public_transport_stations.csv'


In [18]:
import pandas as pd

# Assuming you've loaded your data into a DataFrame named 'df'
df = pd.read_csv('public_transport_stations.csv')

# Reordering the columns
df = df[['Type', 'Name', 'Latitude', 'Longitude']]

# Saving it back to the CSV (or to a new CSV if you prefer)
df.to_csv('public_transport_stations_right_order.csv', index=False)


In [3]:
import geopandas as gpd
import pandas as pd

# 1. Load the Denmark boundary shapefile
denmark_boundary = gpd.read_file("Danmark Shapefile/DNK_adm0.shp")

# 2. Load the CSV data
stations_df = pd.read_csv('public_transport_stations_right_order.csv')

# Convert the CSV dataframe to a geodataframe
gdf_stations = gpd.GeoDataFrame(stations_df,
                                geometry=gpd.points_from_xy(stations_df.Longitude, stations_df.Latitude))

# 3. Perform spatial join to filter stations within Denmark's boundary
stations_inside_denmark = gpd.sjoin(gdf_stations, denmark_boundary, op="within")

# Select only the first 4 columns and save to a new CSV
columns_to_save = ["Type", "Name", "Latitude", "Longitude"]
stations_inside_denmark[columns_to_save].to_csv('stations_inside_denmark.csv', index=False)

print(f"{len(stations_df) - len(stations_inside_denmark)} data points were outside Denmark and have been removed.")

# 4. Save the filtered data back to a new CSV
#stations_inside_denmark.drop(columns=['geometry', 'index_right']).to_csv('stations_inside_denmark.csv', index=False)

#print(f"{len(stations_df) - len(stations_inside_denmark)} stations were outside Denmark and have been removed.")


318 data points were outside Denmark and have been removed.


  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  stations_inside_denmark = gpd.sjoin(gdf_stations, denmark_boundary, op="within")
