In [1]:
import pandas as pd
import numpy as np

import random
from datetime import datetime, timedelta


In [2]:
# LHR - London, Heathrow
# CDG - Paris, Charles De Gaulle
# AMS - Amsterdam, Schiphol
# FRA - Frankfurt
# BCN - Barcelona, El Prat 
# MXP - Milan, Malpensa
# DUB - Dublin
# CPH - Copenhagen, Kastrup
# PRG - Prague, Vaclav Havel
# ZUR - Zurich, Kloten
# FAO - Faro
# LIS - Lisbon
# KEF - Reykjavik, Keflavik
# ROM - Rome, Fiumicino

# Sample airport and aircraft pools
airports = ["LHR", "CDG", "AMS", "FRA", "BCN", "MXP", "DUB", "CPH", "PRG", "ZUR", "FAO", "LIS", "KEF", "ROM"]
aircraft_types = ["A320", "A319", "B737"]

In [3]:
def minute2hours(mins):
    return int(np.round(mins / 60, 0))

duration_lookup = pd.read_csv("data/flight_durations.csv")
duration_lookup

Unnamed: 0.1,Unnamed: 0,LHR,CDG,AMS,FRA,BCN,MXP,DUB,CPH,PRG,ZUR,FAO,LIS,KEF,ROM
0,LHR,0,80,75,75,140,135,75,95,95,85,170,145,185,135
1,CDG,80,0,65,70,120,125,95,95,85,90,150,145,200,125
2,AMS,75,65,0,70,140,145,110,80,85,95,200,185,210,140
3,FRA,75,70,70,0,125,60,120,70,75,55,185,170,200,95
4,BCN,140,120,140,125,0,85,165,160,125,160,55,100,210,95
5,MXP,135,125,145,60,85,0,150,95,80,60,185,170,230,85
6,DUB,75,95,110,120,165,150,0,160,140,170,210,195,185,180
7,CPH,95,95,80,70,160,95,160,0,70,85,220,215,185,125
8,PRG,95,85,85,75,125,80,140,70,0,65,200,190,195,80
9,ZUR,85,90,95,55,160,60,170,85,65,0,195,180,230,90


In [4]:
origin = "LHR"
destination = "FAO"

# Get flight duration from lookup table
# +1 to account for the first column being airport codes
duration = duration_lookup.iloc[airports.index(origin)][airports.index(destination)+1]
print(f"Flight duration from {origin} to {destination} is {duration} minutes.")

Flight duration from LHR to FAO is 170 minutes.


  duration = duration_lookup.iloc[airports.index(origin)][airports.index(destination)+1]


In [9]:


def generate_flight_schedule(days=5, flights_per_day=50):
    # Define parameters
    airports = ["LHR", "CDG", "AMS", "FRA", "BCN", "MXP", "DUB", "CPH", "PRG", "ZUR", "FAO", "LIS", "KEF", "ROM"]
    crew_bases = ['KEF', 'BCN', 'BCN', 'LHR', 'ZUR']
    aircraft_types = ["A320", "A319", "B737"]
    #aircraft_ids = [f"A320-{i}" for i in range(1, 11)]
    time_blocks = {
        'morning': (6, 9),
        'midday': (11, 14),
        'evening': (17, 21)
    }

    schedule = []

    for day in range(1, days + 1):
        used_flight_numbers = set()
        for _ in range(flights_per_day):
            # Generate unique flight number
            while True:
                flight_number = f"FR{random.randint(100, 999)}"
                if flight_number not in used_flight_numbers:
                    used_flight_numbers.add(flight_number)
                    break

            # Select origin and destination
            origin = random.choice(airports)
            destination = random.choice([a for a in airports if a != origin])

            # Select time block and generate realistic departure time
            block = random.choice(list(time_blocks.values()))
            dep_hour = random.randint(block[0], block[1] - 1)
            dep_minute = random.choice([0, 15, 30, 45])
            dep_time = datetime(2025, 1, 1, dep_hour, dep_minute)

            # Flight duration: 1–3 hours
            duration = duration_lookup.iloc[airports.index(origin)][airports.index(destination)+1]
            duration = timedelta(minutes=int(duration))
            arr_time = dep_time + duration

            # Format times
            dep_str = dep_time.strftime("%H:%M")
            arr_str = arr_time.strftime("%H:%M")

            # Assign aircraft and crew base
            aircraft = random.choice(aircraft_types)
            crew_base = random.choice([b for b in crew_bases if b == origin] or crew_bases)

            schedule.append({
                "day": f"{day}",
                "id": flight_number,
                "origin": origin,
                "dest": destination,
                "dep": dep_str,
                "arr": arr_str,
                "dur": minute2hours(duration.total_seconds() / 60),
                "type": aircraft,
                #"Crew Base": crew_base
            })

    return pd.DataFrame(schedule)

# Example usage
flights_df = generate_flight_schedule()
flights_df


  duration = duration_lookup.iloc[airports.index(origin)][airports.index(destination)+1]


Unnamed: 0,day,id,origin,dest,dep,arr,dur,type
0,1,FR889,LIS,DUB,11:45,15:00,3,A319
1,1,FR408,FRA,KEF,12:45,16:05,3,A320
2,1,FR120,ROM,AMS,08:45,11:05,2,A319
3,1,FR306,LHR,KEF,08:15,11:20,3,A320
4,1,FR141,ROM,KEF,08:00,11:35,4,A320
...,...,...,...,...,...,...,...,...
245,5,FR750,FRA,ROM,18:00,19:35,2,B737
246,5,FR856,PRG,BCN,17:45,19:50,2,B737
247,5,FR295,ZUR,CDG,20:45,22:15,2,B737
248,5,FR807,KEF,CPH,11:45,14:50,3,A320


In [None]:
# random seed for reproducibility
np.random.seed(1223)

# Generate 250 flights
flights = []
for day in range(1, 6):  # Day 1 to Day 5
    for i in range(50):
        fid = f"D{day}_F{i:03d}"
        origin, dest = np.random.choice(airports, 2, replace=False)
        dep_hour = np.random.randint(6, 20)
        duration = duration_lookup.get((origin, dest), np.random.randint(1, 3))
        arr_hour = dep_hour + duration
        aircraft = np.random.choice(aircraft_types)

        flights.append({
            "day": day,
            "id": fid,
            "origin": origin,
            "dest": dest,
            "dep": f"{dep_hour:02d}:00",
            "arr": f"{arr_hour:02d}:00",
            "duration": duration,
            "type": aircraft
        })



flights_df = pd.DataFrame(flights)

flights_df['dep'] = pd.to_datetime(flights_df['dep'], format='%H:%M').dt.time
flights_df['arr'] = pd.to_datetime(flights_df['arr'], format='%H:%M').dt.time
flights_df = flights_df.sort_values(by=['day', 'dep', 'arr'])
flights_df = flights_df.reset_index(drop=True)

flights_df

In [10]:
flights_df.to_json('data/flights.json')