In [102]:
import random
import pandas as pd
from faker import Faker
from datetime import datetime, timedelta
import string
import numpy as np
from pathlib import Path

In [103]:
fake = Faker()

In [104]:
aircraft_columns = [
    "Aircraft ID",
    "Aircraft Type",
    "Current Status"
]

airport_columns = [
    "Airport ID",
    "Name",
    "City"
]

gate_columns = [
    "Gate ID",
    "Terminal Name",
    "Gate Number",
    "Status",
    "Airport ID"
]

weather_columns = [
    "Weather ID",
    "Conditions",
    "Update Time",
    "Airport ID"
]

flight_columns = [
    "Flight Number",
    "Departure Time",
    "Arrival Time",
    "Flight Status",
    "Aircraft ID",
    "Departure Gate ID",
    "Departure Airport ID",
    "Arrival Gate ID",
    "Arrival Airport ID"
]

crew_columns = [
    "Crew ID",
    "Crew Type",
    "Member Name",
    "Flight Number"
]

passenger_columns = [
    "Passenger ID",
    "Name",
    "Ticket Number",
    "Flight Number"
]


aircraft_types = ["Airbus A320", "Airbus A380", "Boeing 737", "Boeing 747", "Boeing 787"]

weather_conditions = ["Clear skies", "Partly cloudy", "Light rain", "Snow", 
                                "Heavy rain", "Foggy", "Strong wind", "Thunderstorms with rain", 
                                "Thunderstorms with lightning", "Frost", "Icy runway"]


terminal_names = list(string.ascii_uppercase)

gate_statuses = ["Available", "Occupied", "Under maintenance"]

crew_types = ["Pilot", "First Officer", "Steward"]

aircraft_statuses = ["In flight", "Boarding", "Deboarding", "Taxiing", "Under maintenance",
                                "Parked"]

flight_statuses = ["Boarding", "Gate Closed", "On time", "Delayed", "Departed",
                            "Diverted", "Cancelled", "En Route"]

init_order = ["Aircraft", 
              "Airport", 
              "Gate",
              "Weather",
              "Flight",
              "Crew",
              "Passenger"]

In [105]:
test = False
passenger_length = 100000 if not test else 10000
gate_length = 25000 if not test else 250
flights_length = 50000 if not test else 1000
airport_length = 5000 if not test else 25
aircraft_length = 20000 if not test else 100
weather_length = 50000 if not test else 500
crew_length = 50000 if not test else 5000

In [106]:
def generate_ids_series(num_rows: int):
    num_digits = len(str(num_rows))

    min_id = 10 ** (num_digits - 1)
    max_id = (10 ** num_digits) - 1

    ids = np.random.choice(np.arange(min_id, max_id + 1), size=num_rows, replace=False)

    return ids

def generate_random_datetimes(count: int, seed=None):
    if seed is not None:
        random.seed(seed)
    
    # Generate a random date within a given range
    start_date = datetime(2020, 1, 1)  # Start date
    end_date = datetime(2023, 12, 31)  # End date
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    
    # First random datetime
    random_date1 = start_date + timedelta(days=random_days)
    random_time1 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
    datetime1 = datetime.combine(random_date1, (datetime.min + random_time1).time())
    
    if count == 1:
        return datetime1

    # Decide if the second datetime will be on the same day or the next day
    same_day = random.choice([True, False])
    
    if same_day:
        # Second datetime on the same day
        random_time2 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
        datetime2 = datetime.combine(random_date1, (datetime.min + random_time2).time())
    else:
        # Second datetime on the next day
        random_date2 = random_date1 + timedelta(days=1)
        random_time2 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
        datetime2 = datetime.combine(random_date2, (datetime.min + random_time2).time())
    
    # Ensure the datetime2 is after datetime1 if they are on the same day
    if datetime2 < datetime1 and same_day:
        datetime1, datetime2 = datetime2, datetime1

    return datetime1, datetime2

def generate_ticket_number(length):
    # Define the characters to include: digits and uppercase letters
    characters = string.ascii_uppercase + string.digits
    
    # Generate a random string of the specified length
    random_string = ''.join(random.choices(characters, k=length))
    
    return random_string

In [107]:
aircraft_dict = []

for i in range(aircraft_length):
    aircraft_type = random.choice(aircraft_types)
    status = random.choice(aircraft_statuses)
    
    column_values = {key: None for key in aircraft_columns}
    column_values = column_values | {"Aircraft Type": aircraft_type,
                     "Current Status": status}
    
    aircraft_dict.append(column_values)

aircraft_values_df = pd.DataFrame.from_records(aircraft_dict)
aircraft_values_df["Aircraft ID"] = generate_ids_series(aircraft_length)

In [108]:
airport_dict = []

for i in range(airport_length):
    city = fake.city()
    airport_name = ("".join(fake.random_letters(length=3))).upper()
    
    column_values = {key: None for key in airport_columns}
    column_values = column_values | {"Name": airport_name,
                     "City": city}
    
    airport_dict.append(column_values)

airport_values_df = pd.DataFrame.from_records(airport_dict)
airport_values_df["Airport ID"] = generate_ids_series(airport_length)

In [109]:
gate_dict = []

for i in range(gate_length):
    terminal = random.choice(terminal_names)
    gate_number = terminal + str(random.choice(range(1,100)))
    gate_status = random.choice(gate_statuses)
    airport_id = random.choice(airport_values_df["Airport ID"])

    column_values = {key: None for key in gate_columns}
    column_values = column_values | {"Terminal Name": terminal,
                     "Gate Number": gate_number,
                     "Status": gate_status,
                     "Airport ID": airport_id}
    
    gate_dict.append(column_values)

gate_values_df = pd.DataFrame.from_records(gate_dict)
gate_values_df["Gate ID"] = generate_ids_series(gate_length)

In [110]:
weather_dict = []

for i in range(weather_length):
    weather_condition = random.choice(weather_conditions)
    update_time = generate_random_datetimes(count=1, seed=i)
    airport_id = random.choice(airport_values_df["Airport ID"])
    
    column_values = {key: None for key in weather_columns}
    column_values = column_values | {"Conditions": weather_condition,
                     "Update Time": update_time,
                     "Airport ID": airport_id}
    
    weather_dict.append(column_values)

weather_values_df = pd.DataFrame.from_records(weather_dict)
weather_values_df["Weather ID"] = generate_ids_series(weather_length)

In [111]:
flights_dict = []

for i in range(flights_length):
    departure, arrival = generate_random_datetimes(count=2, seed=i)
    status = random.choice(flight_statuses)
    aircraft_id = random.choice(aircraft_values_df["Aircraft ID"])
    departure_gate_id = random.choice(gate_values_df["Gate ID"])
    departure_airport_id = gate_values_df.loc[gate_values_df["Gate ID"] == departure_gate_id, "Airport ID"].values[0]
    arrival_gate_id = random.choice(gate_values_df["Gate ID"])
    arrival_airport_id = gate_values_df.loc[gate_values_df["Gate ID"] == arrival_gate_id, "Airport ID"].values[0]
    
    column_values = {key: None for key in flight_columns}
    column_values = column_values | {"Departure Time": departure,
                     "Arrival Time": arrival,
                     "Flight Status": status,
                     "Aircraft ID": aircraft_id,
                     "Departure Gate ID": departure_gate_id,
                     "Departure Airport ID": departure_airport_id,
                     "Arrival Gate ID": arrival_gate_id,
                     "Arrival Airport ID": arrival_airport_id}
    
    flights_dict.append(column_values)

flights_values_df = pd.DataFrame.from_records(flights_dict)
flights_values_df["Flight Number"] = generate_ids_series(flights_length)



In [112]:
crew_dict = []

for i in range(crew_length):
    crew_type = random.choice(crew_types)
    member_name = fake.name()
    flight_number = random.choice(flights_values_df["Flight Number"])
    
    column_values = {key: None for key in crew_columns}
    column_values = column_values | {"Crew Type": crew_type,
                     "Member Name": member_name,
                     "Flight Number": flight_number}
    
    crew_dict.append(column_values)

crew_values_df = pd.DataFrame.from_records(crew_dict)
crew_values_df["Crew ID"] = generate_ids_series(crew_length)


In [113]:
passenger_dict = []
flights_tickets_dict = {}
for i in range(passenger_length):
    name = fake.name()
    ticket_number = generate_ticket_number(length=10)
    flight_number = random.choice(flights_values_df["Flight Number"])

    if flight_number not in flights_tickets_dict.keys():
        flights_tickets_dict[flight_number] = [ticket_number]
    else:
        while ticket_number in flights_tickets_dict[flight_number]:
            ticket_number = generate_ticket_number(length=10)
        flights_tickets_dict[flight_number] += [ticket_number]
        
    column_values = {key: None for key in passenger_columns}
    column_values = column_values | {"Ticket Number": ticket_number,
                     "Name": name,
                     "Flight Number": flight_number}
    
    passenger_dict.append(column_values)

pasenger_values_df = pd.DataFrame.from_records(passenger_dict)
pasenger_values_df["Passenger ID"] = generate_ids_series(passenger_length)

In [117]:
data_folder = Path("Data")

data_folder.mkdir(parents=True, exist_ok=True)

In [118]:
def save_sheets_to_csv(path: Path):
    flights_values_df.to_csv(path / 'flights.csv', index=False)
    gate_values_df.to_csv(path / 'gates.csv', index=False)
    crew_values_df.to_csv(path / 'crew.csv', index=False)
    pasenger_values_df.to_csv(path / 'passengers.csv', index=False)
    weather_values_df.to_csv(path / 'weather.csv', index=False)
    aircraft_values_df.to_csv(path / 'aircrafts.csv', index=False)
    airport_values_df.to_csv(path / 'airports.csv', index=False)


In [119]:
save_sheets_to_csv(data_folder)