In [2]:
#Benötigte Packages
import os
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
import psycopg2
from pyproj import Transformer, Geod


In [2]:
# Grundeinstellungen

# bestandteile der URL
## alle Zahlen in Grad

lon = [13.5]  # linker Rand
lat = [50.9]  # unterer Rand
zellweite_lon = [0.5]  # pro 0.1 ist die Zelle ca 7km breiter/schmaler
zellweite_lat = [0.3]  # pro 0.1 ist die Zelle ca 11km höher/niedriger
url1 = 'https://api.openstreetmap.org/api/0.6/trackpoints?bbox='  # bestandteile URL
url2 = '&page='  # bestandteile URL
bbox_str = f"{str(lon[0])},{str(lat[0])},{str(float(lon[0]+zellweite_lon[0]))},{str(float(lat[0]+zellweite_lat[0]))}"

In [11]:
# Define the number of processes to use
bbox = bbox_str  # erstellen der BBOX
page = 0
os.makedirs("osmtracks", exist_ok=True)  # Ordner erstellen um die .gpx zu speichern
while True:
    url = f"{url1}{bbox}{url2}{page}"
    response = requests.get(url)
    filestring = f"osmtracks/tracks_{page}.gpx"  # Dateipfad und Name
    with open(filestring, "wb") as file:
        file.write(response.content)
            
    # datei direkt in xml umwandeln
    new_files = filestring.replace(".gpx", ".xml")
    os.rename(filestring, new_files)
    
    page += 1  # Page um eins erhöhen
            
    size = os.path.getsize(new_files)  # Dateigröße für Abbruchbedingung
    # Abbruchbedingung sehr wichtig, sonst wiederholt sich die letzte Page unendlich oft, bessere Alternative als
    # Dateigröße finden
    if size < 500000:
        break


In [31]:
# Benötigte Dataframes usw.
tracks_df = pd.DataFrame(columns=["lat", "lon", "time", "trk_id","trkseg_id", "pt_id", "name"])
track_df = pd.DataFrame(columns=["lat", "lon", "time", "trk_id","trkseg_id", "pt_id", "name"])

In [32]:
files = []

for file in os.listdir("/Forschungsprojekt/G613/osmtracks/"):
    if file.endswith("_0.xml"):
        files.append(file)

trkseg_id = 0    
trk_id = 0
for file in files:
    tree = ET.parse("/Forschungsprojekt/G613/osmtracks/" + file)
    root = tree.getroot()

    
    for trk in root.findall(".//{http://www.topografix.com/GPX/1/0}trk"):
        trk_id += 1
        for trkseg in trk.findall(".//{http://www.topografix.com/GPX/1/0}trkseg"):
            trkseg_id +=1
            pt_id = 0
            for trkpt in trkseg.findall(".//{http://www.topografix.com/GPX/1/0}trkpt"):

                lat = trkpt.get("lat")
                lon = trkpt.get("lon")

                time_element = trkpt.find(".//{http://www.topografix.com/GPX/1/0}time")
                if time_element is not None:
                    time = time_element.text
                    if time.startswith('0000'):
                       time = '1970' + time[4:] 
                    time = pd.to_datetime(time)
                    time_str = time.strftime('%H:%M:%S')
                else:
                    time_str = None

                name = trkpt.find(".//{http://www.topografix.com/GPX/1/0}name").text if trkpt.find(".//{http://www.topografix.com/GPX/1/0}name") is not None else "NoName"

                pt_id += 1

                
                new_row = [lat, lon, time_str, trk_id,trkseg_id, pt_id, name]
                track_df.loc[len(track_df)] = new_row

    tracks_df = pd.concat([tracks_df, track_df], ignore_index=True)
    track_df = pd.DataFrame(columns=["lat", "lon", "time", "trk_id","trkseg_id", "pt_id", "name"])


In [33]:
# Aus Zeitstempel den Zeitlichen Abstand herausfinden

transformer = Transformer.from_crs("EPSG:4326", "EPSG:25832", always_xy=True)
geod = Geod(ellps="WGS84")
pd.options.mode.chained_assignment = None
tracks_df = tracks_df.sort_values(['trkseg_id','pt_id'],ascending = [True, True])
tracks_df_time = pd.DataFrame()
for trkseg_id in range(1,max(tracks_df['trkseg_id'])+1):# trackseg_count + 1):
    rs = tracks_df[tracks_df['trkseg_id'] == trkseg_id]

    rs['time'] = pd.to_datetime(rs['time'], format='%H:%M:%S').dt.time
    rs['zeit_abstand'] = 0
    rs['median_zeit_abstand'] = 0
    rs['durch_zeit_abstand'] = 0

    current_time = rs['time'].iloc[0]
    if current_time is not None:
        for pt_id in range(len(rs)):
            if rs['pt_id'].iloc[pt_id] == 1:
                rs['zeit_abstand'] = 0
            else:
                last_time = rs['time'].iloc[pt_id - 1]
                current_time = rs['time'].iloc[pt_id]

                abstand_seconds = (current_time.hour - last_time.hour) * 3600 + \
                                  (current_time.minute - last_time.minute) * 60 + \
                                  (current_time.second - last_time.second)
                rs['zeit_abstand'].iloc[pt_id] = abstand_seconds

    rs['median_zeit_abstand'] = rs['zeit_abstand'].tail(-1).median()
    rs['durch_zeit_abstand'] = rs['zeit_abstand'].tail(-1).mean()

    tracks_df_time = pd.concat([tracks_df_time, rs])

In [23]:
conn_string = "postgresql://postgres:Nummer11!@localhost/postgres"
db = create_engine(conn_string)
conn = db.connect()

column_types = {
    "lat": sqlalchemy.Float,
    "lon": sqlalchemy.Float,
    "time": sqlalchemy.Time,
    "trk_id": sqlalchemy.Integer,
    "pt_id": sqlalchemy.Integer,
    "name": sqlalchemy.String
}

tracks_df.to_sql('daten', conn, 'forschungsprojekt', if_exists="replace", index=False, dtype=column_types)
conn.close()