In [3]:
import pandas as pd
import numpy as np
import os
from shapely.geometry import LineString, box
from datetime import datetime, timedelta


class SyntheticFlightsGenerator:
    """Generates synthetic flight paths with dates constrained by station data availability."""

    def __init__(self, stations_file, num_flights=1000, output_dir='data/synthetic_flights'):
        self.stations_file = stations_file
        self.num_flights = num_flights
        self.output_dir = output_dir
        self.stations_df = None
        self.bbox = None

    def load_stations(self):
        """Loads radiosonde stations with date ranges from the filtered CSV file."""
        self.stations_df = pd.read_csv(self.stations_file)
        self.stations_df['start_date'] = pd.to_datetime(
            self.stations_df['date_range'].str.split(' to ').str[0])
        self.stations_df['end_date'] = pd.to_datetime(
            self.stations_df['date_range'].str.split(' to ').str[1])
        lats = self.stations_df['latitude']
        lons = self.stations_df['longitude']
        self.bbox = (lons.min(), lats.min(), lons.max(), lats.max())

    def generate_flight(self):
        """Generates a single synthetic flight with a date within the overlapping data range of departure and arrival stations."""
        dep = self.stations_df.sample(1)
        arr = self.stations_df.sample(1)
        while dep['station_id'].values[0] == arr['station_id'].values[0]:
            arr = self.stations_df.sample(1)
        dep_start = dep['start_date'].values[0]
        dep_end = dep['end_date'].values[0]
        arr_start = arr['start_date'].values[0]
        arr_end = arr['end_date'].values[0]
        overlap_start = max(dep_start, arr_start)
        overlap_end = min(dep_end, arr_end)
        if overlap_start > overlap_end:
            return None  # No overlapping date range
        flight_date = overlap_start + \
            timedelta(days=np.random.randint(
                0, (overlap_end - overlap_start).days))
        dep_lat, dep_lon = dep['latitude'].values[0], dep['longitude'].values[0]
        arr_lat, arr_lon = arr['latitude'].values[0], arr['longitude'].values[0]
        num_points = np.random.randint(5, 10)
        lats = np.linspace(dep_lat, arr_lat, num_points)
        lons = np.linspace(dep_lon, arr_lon, num_points)
        route = list(zip(lats, lons))
        flight_bbox = LineString(route).bounds
        return {
            'departure': dep['station_id'].values[0],
            'arrival': arr['station_id'].values[0],
            'date': flight_date,
            'route': route,
            'bbox': flight_bbox
        }

    def generate_flights(self):
        """Generates multiple synthetic flights with dates and saves them to a CSV file."""
        flights = []
        while len(flights) < self.num_flights:
            flight = self.generate_flight()
            if flight:
                flights.append(flight)
        df = pd.DataFrame(flights)
        os.makedirs(self.output_dir, exist_ok=True)
        df.to_csv(os.path.join(self.output_dir,
                  'synthetic_flights.csv'), index=False)
        with open(os.path.join(self.output_dir, 'flights_bbox.txt'), 'w') as f:
            f.write(
                f"{self.bbox[0]},{self.bbox[1]},{self.bbox[2]},{self.bbox[3]}")
        print(
            f"Synthetic flights saved to '{self.output_dir}/synthetic_flights.csv'")
        print(f"Bounding box saved to '{self.output_dir}/flights_bbox.txt'")

    def run(self):
        """Executes the flight generation process."""
        self.load_stations()
        self.generate_flights()


if __name__ == "__main__":
    stations_file = 'data/european_stations_2010_2025.csv'
    generator = SyntheticFlightsGenerator(stations_file)
    generator.run()

KeyboardInterrupt: 

In [9]:
import pandas as pd
import numpy as np
import os
from shapely.geometry import LineString, box
from datetime import datetime

# Configuration dictionary for easy customization
CONFIG = {
    'num_flights': 10000,                 # Number of flights to generate
    'min_route_points': 5,            # Minimum number of points in a flight route
    'max_route_points': 10,           # Maximum number of points in a flight route
    'output_dir': './data',           # Directory to save output files
    'stations_file': './data/european_stations_2010_2025.csv',  # Path to stations CSV
    'altitude_variation': 1000,       # Max altitude variation in meters
    'noise_level': 0.01               # Coordinate noise level (degrees)
}


class SyntheticFlightsGenerator:
    """Generates synthetic flight paths with data augmentation for single-station flights."""

    def __init__(self, config):
        self.config = config
        self.stations_df = None
        self.bbox = None

    def load_stations(self):
        """Loads radiosonde stations with exact datetimes from the filtered CSV file."""
        print("Loading stations...")
        self.stations_df = pd.read_csv(self.config['stations_file'])
        self.stations_df['datetime'] = pd.to_datetime(
            self.stations_df['date_range'].str.split(' to ').str[0])
        lats = self.stations_df['latitude']
        lons = self.stations_df['longitude']
        self.bbox = (lons.min(), lats.min(), lons.max(), lats.max())
        print(f"Stations loaded: {len(self.stations_df)} stations found.")

    def generate_flight(self, station, flight_num, total_flights):
        """Generates a single synthetic flight with a loop path and altitude variations for a given station."""
        station_id = station['station_id']
        lat, lon = station['latitude'], station['longitude']
        flight_datetime = station['datetime']
        num_points = np.random.randint(
            self.config['min_route_points'], self.config['max_route_points'] + 1)

        # Create a loop by generating intermediate points around the station
        angles = np.linspace(0, 2 * np.pi, num_points)
        radius = 0.1  # degrees, adjust as needed
        lats = lat + radius * \
            np.sin(
                angles) + np.random.uniform(-self.config['noise_level'], self.config['noise_level'], num_points)
        lons = lon + radius * \
            np.cos(
                angles) + np.random.uniform(-self.config['noise_level'], self.config['noise_level'], num_points)

        # Add altitude variations (in meters)
        altitudes = np.random.uniform(
            0, self.config['altitude_variation'], num_points)
        # Ensure start and end are at ground level (altitude = 0)
        altitudes[0] = 0
        altitudes[-1] = 0

        # Combine lat, lon, and altitude into route points
        route = list(zip(lats, lons, altitudes))
        flight_bbox = LineString([(lon, lat)
                                 for lat, lon, alt in route]).bounds
        print(
            f"Flight {flight_num}/{total_flights} generated for station {station_id} at {flight_datetime}")
        return {
            'station_id': station_id,
            'datetime': flight_datetime,
            'route': route,  # Now includes (lat, lon, altitude)
            'bbox': flight_bbox
        }

    def generate_flights(self):
        """Generates multiple synthetic flights and saves them to a CSV file."""
        print(f"Generating {self.config['num_flights']} flights...")
        flights = []
        for i in range(self.config['num_flights']):
            station = self.stations_df.sample(1).iloc[0]
            flight = self.generate_flight(
                station, i+1, self.config['num_flights'])
            flights.append(flight)
        df = pd.DataFrame(flights)
        print("All flights generated.")
        print("Saving flights to CSV...")
        os.makedirs(self.config['output_dir'], exist_ok=True)
        df.to_csv(os.path.join(
            self.config['output_dir'], 'synthetic_flights.csv'), index=False)
        print(
            f"Flights saved to '{self.config['output_dir']}/synthetic_flights.csv'")
        print("Saving bounding box...")
        with open(os.path.join(self.config['output_dir'], 'flights_bbox.txt'), 'w') as f:
            f.write(
                f"{self.bbox[0]},{self.bbox[1]},{self.bbox[2]},{self.bbox[3]}")
        print(
            f"Bounding box saved to '{self.config['output_dir']}/flights_bbox.txt'")

    def run(self):
        """Executes the flight generation process."""
        print("Starting flight generation...")
        self.load_stations()
        self.generate_flights()
        print("Flight generation completed.")


if __name__ == "__main__":
    generator = SyntheticFlightsGenerator(CONFIG)
    generator.run()

Starting flight generation...
Loading stations...
Stations loaded: 9 stations found.
Generating 10000 flights...
Flight 1/10000 generated for station GMM00010113 at 2011-06-20 18:00:00
Flight 2/10000 generated for station RSM00037099 at 2013-12-05 06:00:00
Flight 3/10000 generated for station SPM00008383 at 2018-12-12 00:00:00
Flight 4/10000 generated for station TUM00017196 at 2016-01-04 12:00:00
Flight 5/10000 generated for station ITM00016045 at 2016-06-16 18:00:00
Flight 6/10000 generated for station TUM00017196 at 2016-01-04 12:00:00
Flight 7/10000 generated for station UKM00033966 at 2011-01-01 00:00:00
Flight 8/10000 generated for station ITM00016037 at 2010-01-04 00:00:00
Flight 9/10000 generated for station GMM00010113 at 2011-06-20 18:00:00
Flight 10/10000 generated for station ITM00016037 at 2010-01-04 00:00:00
Flight 11/10000 generated for station UKM00033966 at 2011-01-01 00:00:00
Flight 12/10000 generated for station ITM00016546 at 2012-02-28 12:00:00
Flight 13/10000 gene