In [None]:
import overpy
import pandas as pd 
import requests
import json
import pymssql
from sqlalchemy import Integer, String, Float, DATETIME, create_engine

In [None]:
# Load configuration from config/db_config.json
with open('../config/db_config.json', 'r') as f:
    db_config = json.load(f)

# Get database credentials
server = db_config['server']
database = db_config['database']
db_user = db_config['db_user']
db_password = db_config['db_password']

In [None]:
# Connect to SQL Database
conn = pymssql.connect(server, db_user, db_password, database)

# Create connection string for SQLAlchemy
connection_string = f"mssql+pymssql://{db_user}:{db_password}@{server}/{database}"
engine = create_engine(connection_string)

In [76]:
# SQL-Abfrage für die gewünschten Spalten
query = "SELECT DISTINCT id FROM OVRP_HikingRoutes"

# Daten aus der Azure SQL-Datenbank laden
relation_ids = pd.read_sql_query(query, con=engine)

# Ergebnis anzeigen
print(relation_ids)


            id
0        22614
1       120125
2       121950
3       121951
4       121952
...        ...
2819  18242844
2820  18242846
2821  18242856
2822  18260265
2823  18260266

[2824 rows x 1 columns]


In [78]:
# Funktion zur Überprüfung der Gültigkeit
def check_valid_ids(ids):
    invalid_ids = []
    for rid in ids:
        if not isinstance(rid, int) or rid <= 0:  # Prüfen, ob die ID eine positive Ganzzahl ist
            invalid_ids.append(rid)
    return invalid_ids

# Ungültige IDs finden
invalid_ids = check_valid_ids(relation_ids)

# Ergebnis anzeigen
if invalid_ids:
    print("Ungültige IDs gefunden:", invalid_ids)
else:
    print("Alle IDs sind gültig.")

Ungültige IDs gefunden: ['id']


In [79]:
relation_ids = relation_ids['id'].tolist()

In [None]:
relation_ids = [12624412, 15527560, 15527560]  # Beispiel-IDs

In [80]:
api = overpy.Overpass(url="http://overpass.osm.ch/api/interpreter")

timestamp_apicall = pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S")


# Leere Liste für die Ergebnisse
results = []

# Overpass-Abfragen iterativ durchführen
for relation_id in relation_ids:
    # Overpass-Abfrage
    
    query = f"""
    [out:json];
    relation["route"="hiking"](id:{relation_id});
    out ids;  // Gibt die Relation-ID aus
    way(r);
    out ids center tags;  // Gibt die Way-IDs und ihre Mittelpunkte aus
    """
    result = api.query(query)
    # Anfrage an die Overpass-API senden
   
    for way in result.ways:
            if way.center_lat and way.center_lon:
                # Daten als Dictionary speichern
                results.append({
                    "relation_id": relation_id,
                    "way_id": way.id,
                    "lat": way.center_lat,
                    "lon": way.center_lon,
                    "highway": way.tags.get("highway", None),
                    "surface": way.tags.get("surface", None),
                    "timestamp_apicall": timestamp_apicall
                })

# Convert lat and lon to numeric, timestamp to datetime



# Ergebnisse in einen DataFrame konvertieren
df_wege = pd.DataFrame(results)
df_wege['lat'] = pd.to_numeric(df_wege['lat'], errors='coerce')
df_wege['lon'] = pd.to_numeric(df_wege['lon'], errors='coerce')
df_wege['timestamp_apicall'] = pd.to_datetime(df_wege['timestamp_apicall'], errors='coerce')

# Ergebnis anzeigen
print(df_wege)




       relation_id      way_id        lat        lon  highway  surface  \
0            22614    25795627  46.647699  10.266808     path     None   
1            22614    25795655  46.638655  10.211410     path     None   
2            22614    25795657  46.640812  10.203378     path     None   
3            22614    36913824  46.639047  10.242167     path     None   
4            22614    59062846  46.664058  10.210320  primary  asphalt   
...            ...         ...        ...        ...      ...      ...   
30472     18260265   497875016  47.700263   8.858103    track  asphalt   
30473     18260265   497875017  47.701347   8.859138    track   gravel   
30474     18260265  1331682671  47.698765   8.857494    track     None   
30475     18260266   226764756  47.701344   8.874168    track   gravel   
30476     18260266   497874970  47.703585   8.873041    track     None   

        timestamp_apicall  
0     2024-11-20 17:13:12  
1     2024-11-20 17:13:12  
2     2024-11-20 17:13:12  

In [82]:
print(f"Anzahl der Zeilen im DataFrame: {df_wege.shape[0]}")

Anzahl der Zeilen im DataFrame: 30477


In [None]:
# Create table if it doesn't exist
table_name = "OVRP_ways"
query = f"""
        CREATE TABLE OVRP_ways_test (
            id                      INT         NOT NULL PRIMARY KEY,
            id_relation             INT         NOT NULL,
            lat                     FLOAT       NOT NULL,
            lon                     FLOAT       NOT NULL,
            highway                 VARCHAR(255) NULL,
            surface                 VARCHAR(255) NULL,
            timestamp_apicall       DATETIME    NULL,
            FOREIGN KEY (id_relation) REFERENCES OVRP_HikingRoutes (id)
        );
    """

conn = pymssql.connect(server, db_user, db_password, database)
cursor = conn.cursor()
cursor.execute(query)

conn.commit()
conn.close()

In [87]:
# Create connection string for SQLAlchemy
connection_string = f"mssql+pymssql://{db_user}:{db_password}@{server}/{database}"
engine = create_engine(connection_string)

# Ingest data to tabledatabase table
df_wege.to_sql(table_name, con=engine, if_exists='replace', index=False)
print("DataFrame erfolgreich in die MSSQL-Datenbank geladen!")

DataFrame erfolgreich in die MSSQL-Datenbank geladen!
