In [20]:
from functools import reduce
import random
import pandas as pd
import names
from faker import Faker
from datetime import datetime, timedelta
import string

fake = Faker()

In [45]:
gate_columns = [
    "Terminal Name",
    "Gate Number",
    "Status",
    "Airport ID"
]

airport_columns = [
    "Name",
    "City"
]

weather_columns = [
    "Conditions",
    "Update Time",
    "Airport ID"
]

aircraft_columns = [
    "Aircraft Type",
    "Current Status"
]

passenger_columns = [
    "Name",
    "Ticket Number",
    "Flight Number"
]

crew_columns = [
    "Crew Type",
    "Member Name",
    "Flight Number"
]

flight_columns = [
    "Departure Time",
    "Arrival Time",
    "Flight Status",
    "Aircraft ID",
    "Departure Gate ID",
    "Departure Airport ID",
    "Arrival Gate ID",
    "Arrival Airport ID"
]


aircraft_types = ["Airbus A320", "Airbus A380", "Boeing 737", "Boeing 747", "Boeing 787"]

weather_conditions = ["Clear skies", "Partly cloudy", "Light rain", "Snow", 
                                "Heavy rain", "Foggy", "Strong wind", "Thunderstorms with rain", 
                                "Thunderstorms with lightning", "Frost", "Icy runway"]


terminal_names = list(string.ascii_uppercase)

gate_status = ["Available", "Occupied", "Under maintenance"]

crew_types = ["Pilot", "First Officer", "Steward"]

aircraft_status = ["In flight", "Boarding", "Deboarding", "Taxiing", "Under maintenance",
                                "Parked"]

flight_status = ["Boarding", "Gate Closed", "On time", "Delayed", "Departed",
                            "Diverted", "Cancelled", "En Route"]

In [58]:
passenger_length = 100000
gate_length = 25000
flights_length = 50000
airport_length = 5000
aircraft_length = 20000
weather_length = 50000
crew_length = 50000

In [21]:
def generate_random_datetimes(count: int, seed=None):
    if seed is not None:
        random.seed(seed)
    
    # Generate a random date within a given range
    start_date = datetime(2020, 1, 1)  # Start date
    end_date = datetime(2023, 12, 31)  # End date
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    
    # First random datetime
    random_date1 = start_date + timedelta(days=random_days)
    random_time1 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
    datetime1 = datetime.combine(random_date1, (datetime.min + random_time1).time())
    
    if count == 1:
        return datetime1

    # Decide if the second datetime will be on the same day or the next day
    same_day = random.choice([True, False])
    
    if same_day:
        # Second datetime on the same day
        random_time2 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
        datetime2 = datetime.combine(random_date1, (datetime.min + random_time2).time())
    else:
        # Second datetime on the next day
        random_date2 = random_date1 + timedelta(days=1)
        random_time2 = timedelta(hours=random.randint(0, 23), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
        datetime2 = datetime.combine(random_date2, (datetime.min + random_time2).time())
    
    # Ensure the datetime2 is after datetime1 if they are on the same day
    if datetime2 < datetime1 and same_day:
        datetime1, datetime2 = datetime2, datetime1

    return datetime1, datetime2

# Example usage with a seed:
seed_value = 42
dt1, dt2 = generate_random_datetimes(count=2, seed=seed_value)
print("Datetime 1:", dt1)
print("Datetime 2:", dt2)

    

Datetime 1: 2023-08-02 03:01:47
Datetime 2: 2023-08-03 07:14:08


In [40]:

flights_dict = []

for i in range(flights_length):
    departure, arrival = generate_random_datetimes(count=2, seed=i)
    status = random.choice(flight_status)
    column_values = {key: None for key in flight_columns}
    column_values = column_values | {"Departure Time": departure,
                     "Arrival Time": arrival,
                     "Flight Status": status}
    flights_dict.append(column_values)

flights_values_df = pd.DataFrame.from_records(flights_dict)



In [41]:
flights_values_df.head()

Unnamed: 0,Departure Time,Arrival Time,Flight Status,Aircraft ID,Departure Gate ID,Departure Airport ID,Arrival Gate ID,Arrival Airport ID
0,2022-02-27 13:02:16,2022-02-28 12:58:50,Departed,,,,,
1,2020-10-02 08:07:31,2020-10-02 18:54:51,En Route,,,,,
2,2020-04-25 02:05:23,2020-04-25 23:51:42,Departed,,,,,
3,2021-05-02 18:34:08,2021-05-03 19:30:40,Gate Closed,,,,,
4,2021-04-28 09:06:46,2021-04-29 15:09:05,Gate Closed,,,,,


In [46]:

crew_dict = []

for i in range(crew_length):
    crew_type = random.choice(crew_types)
    member_name = fake.name()
    column_values = {key: None for key in crew_columns}
    column_values = column_values | {"Crew Type": crew_type,
                     "Member Name": member_name}
    crew_dict.append(column_values)

crew_values_df = pd.DataFrame.from_records(crew_dict)


In [47]:
crew_values_df.head()

Unnamed: 0,Crew Type,Member Name,Flight Number
0,Steward,Chris Nunez,
1,Steward,Kaylee Choi,
2,Pilot,Amanda Ayers,
3,First Officer,Rebecca Smith,
4,First Officer,Michael Davis,


In [48]:
def generate_ticket_number(length):
    # Define the characters to include: digits and uppercase letters
    characters = string.ascii_uppercase + string.digits
    
    # Generate a random string of the specified length
    random_string = ''.join(random.choices(characters, k=length))
    
    return random_string

In [49]:

passenger_dict = []

for i in range(passenger_length):
    name = fake.name()
    ticket_number = generate_ticket_number(length=10)
    column_values = {key: None for key in passenger_columns}
    column_values = column_values | {"Ticket Number": ticket_number,
                     "Name": name}
    passenger_dict.append(column_values)

pasenger_values_df = pd.DataFrame.from_records(passenger_dict)

In [50]:
pasenger_values_df.head()

Unnamed: 0,Name,Ticket Number,Flight Number
0,Erik Smith,IQQDBK468F,
1,James Hensley,KT9SRES18I,
2,Jenna Morton,6GJWUY23PC,
3,Jennifer Walker,8L10RBPQSB,
4,Chase Hanson,W5XSEGGM5B,


In [51]:

aircraft_dict = []

for i in range(aircraft_length):
    aircraft_type = random.choice(aircraft_types)
    status = random.choice(aircraft_status)
    column_values = {key: None for key in aircraft_columns}
    column_values = column_values | {"Aircraft Type": aircraft_type,
                     "Current Status": status}
    aircraft_dict.append(column_values)

aircraft_values_df = pd.DataFrame.from_records(aircraft_dict)

In [52]:
aircraft_values_df.head()

Unnamed: 0,Aircraft Type,Current Status
0,Airbus A320,In flight
1,Boeing 737,In flight
2,Boeing 787,In flight
3,Boeing 787,Under maintenance
4,Boeing 787,In flight


In [53]:

weather_dict = []

for i in range(weather_length):
    weather_condition = random.choice(weather_conditions)
    update_time = generate_random_datetimes(count=1, seed=i)
    column_values = {key: None for key in weather_columns}
    column_values = column_values | {"Conditions": weather_condition,
                     "Update Time": update_time}
    weather_dict.append(column_values)

weather_values_df = pd.DataFrame.from_records(weather_dict)

In [54]:
weather_values_df.head()

Unnamed: 0,Conditions,Update Time,Airport ID
0,Thunderstorms with lightning,2022-02-27 13:02:16,
1,Thunderstorms with lightning,2020-10-02 18:54:51,
2,Partly cloudy,2020-04-25 02:05:23,
3,Light rain,2021-05-02 18:34:08,
4,Foggy,2021-04-28 09:06:46,


In [38]:
airport_dict = []

for i in range(airport_length):
    city = fake.city()
    airport_name = ("".join(fake.random_letters(length=3))).upper()
    column_values = {key: None for key in airport_columns}
    column_values = column_values | {"Name": airport_name,
                     "City": city}
    airport_dict.append(column_values)

airport_values_df = pd.DataFrame.from_records(airport_dict)

In [39]:
airport_values_df['Name']

0     KRE
1     LUX
2     HGL
3     WPR
4     VNW
     ... 
95    JBK
96    LGM
97    QNE
98    TOB
99    KRM
Name: Name, Length: 100, dtype: object

In [56]:
gate_dict = []

for i in range(gate_length):
    terminal = random.choice(list(string.ascii_uppercase))
    gate_number = terminal + str(random.choice(range(1,100)))
    gate_status = random.choice(gate_status)
    column_values = {key: None for key in gate_columns}
    column_values = column_values | {"Terminal Name": terminal,
                     "Gate Number": gate_number,
                     "Status": gate_status}
    gate_dict.append(column_values)

gate_values_df = pd.DataFrame.from_records(gate_dict)

In [57]:
gate_values_df.head()

Unnamed: 0,Terminal Name,Gate Number,Status,Airport ID
0,H,H18,Available,
1,I,I94,b,
2,Q,Q88,b,
3,T,T63,b,
4,N,N93,b,
