In [385]:
import psycopg2
from faker import Faker
import random
from datetime import datetime, timedelta
import duckdb
import pandas as pd

In [386]:
# Connect to PostgreSQL database
connection = psycopg2.connect(
    database="postgres",
    user="postgres",
    password="1234",
    host="localhost",
    port="5432"
)
connection.autocommit = True
cursor = connection.cursor()

cursor.execute("SELECT * FROM CUSTOMER")
rows = cursor.fetchall()
print(rows)

[]


In [387]:
cursor.execute("SELECT * FROM CUSTOMER")
rows = cursor.fetchall()
print(rows)

[]


In [388]:
# Initialize Faker
fake = Faker()
fake.seed_instance(42)  # Set seed for reproducibility

In [389]:
# Set pre-defined lists of fake data from which we pull data 

# Aircraft lLsts 
aircraft_types = [
    "Boeing 737", "Boeing 747", "Boeing 777", "Airbus A320", 
    "Airbus A380", "Embraer E190", "Bombardier CRJ700", "Cessna 172", 
    "Piper PA-28", "Gulfstream G650"
]

# Aircraft Companies 
aircraft_companies = [
    "American Airlines", "Delta Air Lines", "United Airlines", "Lufthansa", 
    "Air France", "Qatar Airways", "Singapore Airlines", "British Airways", 
    "Emirates", "Cathay Pacific"
]

# Flight status list 
flight_statuses = ["scheduled", "delayed", "canceled"]

# Airports list 
airports = ["JFK", "LAX", "ORD", "LHR", "CDG", "SIN", "DXB"]

# Maintenance type list 
maintenance_types = ["Scheduled", "Unscheduled"]

# Slot types 
slot_types = ['Flight', 'Maintenance']

# Reporter class list 
reporter_classes = ["Pilot", "Maintenance Personnel"]

# Work Order Status list 
work_order_statuses = ["in-progress", "pending", "completed"]

# AOS Types 
aos_types = ["MaintenanceService", "RevisionService"]

# OI Types 
oi_types = ["delayGenerating", "safetyGenerating"]

# Task Types 
task_types = ["Inspection", "Repair", "Servicing"]

# List of realistic delay codes (limited to 10 characters)
delay_codes = [
    "A1",  # Weather-related delays
    "B2",  # Aircraft mechanical issues
    "C3",  # Crew-related delays
    "D4",  # Airport operational issues
    "E5",  # Security delays
    "F6",  # Maintenance issues
    "G7",  # Late arrival of aircraft
    "H8",  # Late check-in of passengers
    "I9",  # Missing luggage
    "J10"  # Air traffic control delays
]

In [390]:
def generate_customer_data(num_entries=100):
    customers = []
    for _ in range(num_entries):
        customer = (
            fake.unique.random_int(min=1, max=1000),  # customer_id
            fake.name(),  # customer_name
            fake.unique.email(),  # email
            fake.random_number(digits=10),  # phone_number
            fake.address()  # address
        )
        customers.append(customer)
    return customers

customer_data = generate_customer_data(100) 

In [391]:
def generate_aircraft_data(num_entries=100):
    aircrafts = []
    for _ in range(num_entries):
        aircraft = (
            fake.unique.random_int(min=1, max=1000),  # aircraft_id
            random.choice(aircraft_types),  # aircraft_type
            random.choice(aircraft_companies),  # aircraft_company
            random.randint(50, 300)  # capacity
        )
        aircrafts.append(aircraft)
    return aircrafts

aircraft_data = generate_aircraft_data(100)



In [392]:
from datetime import timedelta

# Adjusted function to generate realistic AircraftSlot data
def generate_aircraft_slot_data(num_entries=50, aircraft_ids=None):
    slots = []
    for _ in range(num_entries):
        slot_type = random.choice(slot_types)  # Choose either 'Flight' or 'Maintenance'
        
        # Set start_time as a random datetime
        # Generate a start time between the beginning of this year and up to one year from today
        start_time = fake.date_time_between(start_date='-1y', end_date='+1y')

        
        # Set end_time based on slot_type
        if slot_type == 'Flight':
            # For flights, set end_time between 1 to 15 hours after start_time
            duration = timedelta(hours=random.randint(1, 15))
        else:
            # For maintenance, set end_time between 2 to 48 hours after start_time for variety
            duration = timedelta(hours=random.randint(2, 48))
        
        end_time = start_time + duration

        slot = (
            fake.unique.random_int(min=1, max=1000),  # slot_id
            random.choice(aircraft_ids),  # aircraft_id
            slot_type,
            start_time,
            end_time
        )
        slots.append(slot)
    return slots

# Generate the slot data with realistic start and end times
aircraft_slot_data = generate_aircraft_slot_data(250, aircraft_ids=[a[0] for a in aircraft_data])

In [393]:
def generate_flight_data(num_entries=20):
    flights = []
    for _ in range(num_entries):
        flight_status = random.choices(
            ["scheduled", "delayed", "canceled"],
            weights=[80, 15, 5],  # 80% scheduled, 15% delayed, 5% canceled
            k=1
        )[0]
        
        flight = (
            fake.unique.random_int(min=1, max=1000),  # flight_id
            random.choice(airports),  # departure_airport
            random.choice(airports),  # arrival_airport (ensure it’s different from departure)
            random.choice(delay_codes) if flight_status == "delayed" else None,  # delay_code only for delayed flights
            random.randint(0, 250),  # number_of_passengers
            random.randint(0, 10),  # number_of_cabin_crew
            random.randint(0, 5),  # number_of_flight_crew
            flight_status,  # flight_status
            fake.date_this_year(),  # date_of_flight
            fake.time_object(),  # actual_departure_time
            fake.time_object()  # actual_arrival_time
        )
        flights.append(flight)
    return flights

flight_data = generate_flight_data(200)

In [394]:
def generate_flight_slot_data(flight_ids=None, slot_ids=None):
    flight_slots = []
    for flight_id, slot_id in zip(flight_ids, slot_ids):
        flight_slot = (
            slot_id,  # slot_id from AircraftSlot
            flight_id  # flight_id from Flight
        )
        flight_slots.append(flight_slot)
    return flight_slots

flight_slot_data = generate_flight_slot_data(flight_ids=[f[0] for f in flight_data], slot_ids=[s[0] for s in aircraft_slot_data[:len(flight_data)]])

In [395]:
def generate_booking_data(num_entries=30, customer_ids=None):
    generated_ids = set()
    bookings = []
    
    for _ in range(num_entries):
        # Retry to generate a unique booking_id within a limited number of attempts
        for _ in range(100):  
            booking_id = fake.random_int(min=1, max=1000)
            if booking_id not in generated_ids:
                generated_ids.add(booking_id)
                booking = (
                    booking_id,
                    random.choice(customer_ids),
                    random.choice(["Economy", "Business", "First"]),
                    round(random.uniform(50, 1000), 2),
                    fake.boolean(),
                    random.choice(["confirmed", "cancelled", "pending"]),
                    fake.boolean(),
                    random.randint(1, 5),
                    fake.date_time_this_year(),
                    fake.date_time_this_year()
                )
                bookings.append(booking)
                break
        else:
            print("Warning: Could not generate unique booking_id within 100 attempts.")
    return bookings

booking_data = generate_booking_data(300, customer_ids=[c[0] for c in customer_data])

In [396]:
def generate_maintenance_event_data(num_entries=150):
    events = []
    for _ in range(num_entries):
        event = (
            fake.unique.random_int(min=1, max=10000),  # maintenance_event_id
            random.choice(maintenance_types),  # maintenance_type
            random.choice(airports),  # airport
            fake.word()  # subsystem
        )
        events.append(event)
    return events

maintenance_event_data = generate_maintenance_event_data(150)

In [397]:
def generate_trip_data(booking_data, flight_ids):
    trips = []
    existing_pairs = set()  # To track (booking_id, flight_id) pairs and avoid duplicates
    
    for booking in booking_data:
        booking_id = booking[0]
        is_roundtrip = booking[6]  # `is_roundtrip` column
        
        # Generate first leg
        for _ in range(100):  # Retry up to 100 times for uniqueness
            flight_id = random.choice(flight_ids)
            pair = (booking_id, flight_id)
            if pair not in existing_pairs:
                existing_pairs.add(pair)
                trip = (
                    fake.unique.random_int(min=1, max=10000),  # Unique trip_id
                    booking_id,
                    flight_id
                )
                trips.append(trip)
                break
        else:
            print(f"Warning: Could not generate a unique trip for booking_id {booking_id}")

        # Generate return leg if it's a round-trip
        if is_roundtrip:
            for _ in range(100):  # Retry up to 100 times for uniqueness
                return_flight_id = random.choice([fid for fid in flight_ids if fid != flight_id])
                return_pair = (booking_id, return_flight_id)
                if return_pair not in existing_pairs:
                    existing_pairs.add(return_pair)
                    return_trip = (
                        fake.unique.random_int(min=1, max=10000),  # Unique trip_id
                        booking_id,
                        return_flight_id
                    )
                    trips.append(return_trip)
                    break
            else:
                print(f"Warning: Could not generate a unique return trip for booking_id {booking_id}")

    return trips

trip_data = generate_trip_data(booking_data, [f[0] for f in flight_data])

In [398]:
# Function to generate data for ScheduledMaintenanceEvent
def generate_scheduled_maintenance_event_data(maintenance_event_ids, num_entries=10):
    scheduled_events = []
    for event_id in random.sample(maintenance_event_ids, num_entries):
        aos_type = random.choice(["MaintenanceService", "RevisionService"])
        
        # Set duration based on aos_type
        if aos_type == "RevisionService":
            duration = timedelta(hours=random.randint(25, 48))  # More than 24 hours
        else:
            duration = timedelta(hours=random.randint(1, 23))  # Less than 24 hours
        
        scheduled_event = (
            event_id,  # scheduled_maintenance_event_id
            duration,
            aos_type,
            fake.date_time_this_year(),  # forecasted_date
            random.choice(["Weekly", "Monthly", "Quarterly", "Annually"])  # frequency
        )
        scheduled_events.append(scheduled_event)
    return scheduled_events

# Function to generate data for UnscheduledMaintenanceEvent
def generate_unscheduled_maintenance_event_data(maintenance_event_ids, flight_ids, num_entries=10):
    unscheduled_events = []
    for event_id in random.sample(maintenance_event_ids, num_entries):
        oi_type = random.choice(["delayGenerating", "safetyGenerating"])
        
        # Set duration based on oi_type
        duration = None if oi_type == "safetyGenerating" else timedelta(minutes=random.randint(30, 180))
        
        unscheduled_event = (
            event_id,  # unscheduled_maintenance_event_id
            random.choice(flight_ids),  # flight_id
            duration,
            oi_type,
            random.choice(["Pilot", "Maintenance Personnel"]),  # reporter_class
            fake.random_int(min=1, max=1000),  # reporter_id
            fake.date_time_this_year()  # reporting_date
        )
        unscheduled_events.append(unscheduled_event)
    return unscheduled_events

In [399]:
# Adjusted function to generate multiple WorkOrderAOS entries per scheduled event
def generate_work_order_aos_data(scheduled_event_ids, max_orders_per_event=3):
    aos_orders = []
    for event_id in scheduled_event_ids:
        # Generate a random number of work orders for each maintenance event
        num_orders = random.randint(1, max_orders_per_event)
        for _ in range(num_orders):
            aos_order = (
                fake.unique.random_int(min=1, max=10000),  # aos_work_order_id
                event_id,  # scheduled_maintenance_event_id
                random.choice(["Inspection", "Repair", "Servicing"]),  # task_type
                fake.date_time_this_year(),  # execution_date
                random.randint(1, 10),  # number_of_workers
                random.choice(["in-progress", "pending", "completed"])  # status
            )
            aos_orders.append(aos_order)
    return aos_orders

# Adjusted function to generate multiple WorkOrderOI entries per unscheduled event
def generate_work_order_oi_data(unscheduled_event_ids, max_orders_per_event=3):
    oi_orders = []
    for event_id in unscheduled_event_ids:
        # Generate a random number of work orders for each maintenance event
        num_orders = random.randint(1, max_orders_per_event)
        for _ in range(num_orders):
            oi_order = (
                fake.unique.random_int(min=1, max=10000),  # oi_work_order_id
                event_id,  # unscheduled_maintenance_event_id
                fake.word() + " parts",  # required_parts
                random.randint(30, 180),  # estimated_completion_time in minutes
                random.choice(["in-progress", "pending", "completed"])  # status
            )
            oi_orders.append(oi_order)
    return oi_orders

In [400]:
# Define other tables similarly, such as AircraftSlot, MaintenanceEvent, Booking, etc.
def insert_data(query, data):
    try:
        with connection.cursor() as cursor:
            cursor.executemany(query, data)
            connection.commit()
            print(f"{len(data)} records inserted successfully.")
    except Exception as e:
        print(f"Error: {e}")
        connection.rollback()

In [401]:
# Define the insert queries
customer_query = """
    INSERT INTO Customer (customer_id, customer_name, email, phone_number, address)
    VALUES (%s, %s, %s, %s, %s)
"""

aircraft_query = """
    INSERT INTO Aircraft (aircraft_id, aircraft_type, aircraft_company, capacity)
    VALUES (%s, %s, %s, %s)
"""

flight_query = """
    INSERT INTO Flight (flight_id, departure_airport, arrival_airport, delay_code, 
                        number_of_passengers, number_of_cabin_crew, number_of_flight_crew, 
                        flight_status, date_of_flight, actual_departure_time, actual_arrival_time)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

aircraft_slot_query = """
    INSERT INTO AircraftSlot (slot_id, aircraft_id, slot_type, start_time, end_time)
    VALUES (%s, %s, %s, %s, %s);
"""

maintenance_slot_query = """
    INSERT INTO MaintenanceSlot (slot_id, m)
    VALUES (%s, %s, %s, %s, %s);
"""

flight_slots_query = """
    INSERT INTO FlightSlot (slot_id, flight_id)
    VALUES (%s, %s);
"""

booking_query = """
    INSERT INTO Booking (booking_id, customer_id, seat_class, price, 
                         payment_status, booking_status, is_roundtrip, 
                         number_passengers, created_at, updated_at)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

trip_query = """
    INSERT INTO Trip (trip_id, booking_id, flight_id)
    VALUES (%s, %s, %s)
"""

maintenance_event_query = """
    INSERT INTO MaintenanceEvent (maintenance_event_id, maintenance_type, airport, subsystem)
    VALUES (%s, %s, %s, %s)
"""

maintenance_slots_query = """
    INSERT INTO MaintenanceSlot (slot_id, maintenance_event_id, is_scheduled)
    VALUES (%s, %s, %s)
"""

scheduled_maintenance_event_query = """
    INSERT INTO ScheduledMaintenanceEvent (scheduled_maintenance_event_id, duration, aos_type, 
                                           forecasted_date, frequency)
    VALUES (%s, %s, %s, %s, %s)
"""

unscheduled_maintenance_event_query = """
    INSERT INTO UnscheduledMaintenanceEvent (unscheduled_maintenance_event_id, flight_id, duration, 
                                             oi_type, reporter_class, reporter_id, reporting_date)
    VALUES (%s, %s, %s, %s, %s, %s, %s)
"""

work_order_aos_query = """
    INSERT INTO WorkOrderAOS (aos_work_order_id, scheduled_maintenance_event_id, task_type, 
                              execution_date, number_of_workers, status)
    VALUES (%s, %s, %s, %s, %s, %s)
"""

work_order_oi_query = """
    INSERT INTO WorkOrderOI (oi_work_order_id, unscheduled_maintenance_event_id, required_parts, 
                             estimated_completion_time, status)
    VALUES (%s, %s, %s, %s, %s)
"""


# Generate and insert data into tables
insert_data(customer_query, customer_data)
insert_data(aircraft_query, aircraft_data)
insert_data(aircraft_slot_query, aircraft_slot_data)
insert_data(flight_query, flight_data)
insert_data(flight_slots_query, flight_slot_data)
insert_data(booking_query, booking_data)
insert_data(maintenance_event_query, maintenance_event_data)

# Retrieve necessary IDs from MaintenanceEvent and Flight tables
cursor.execute("SELECT maintenance_event_id FROM MaintenanceEvent")
maintenance_event_ids = [row[0] for row in cursor.fetchall()]

cursor.execute("SELECT flight_id FROM Flight")
flight_ids = [row[0] for row in cursor.fetchall()]

# Generate data
scheduled_maintenance_data = generate_scheduled_maintenance_event_data(maintenance_event_ids, num_entries=75)
unscheduled_maintenance_data = generate_unscheduled_maintenance_event_data(maintenance_event_ids, flight_ids, num_entries=75)

# Continue inserting 
insert_data(trip_query, trip_data)
insert_data(scheduled_maintenance_event_query, scheduled_maintenance_data )
insert_data(unscheduled_maintenance_event_query, unscheduled_maintenance_data)


# Retrieve necessary IDs from ScheduledMaintenanceEvent and UnscheduledMaintenanceEvent tables
cursor.execute("SELECT scheduled_maintenance_event_id FROM ScheduledMaintenanceEvent")
scheduled_event_ids = [row[0] for row in cursor.fetchall()]

cursor.execute("SELECT unscheduled_maintenance_event_id FROM UnscheduledMaintenanceEvent")
unscheduled_event_ids = [row[0] for row in cursor.fetchall()]

# Generate data
work_order_aos_data = generate_work_order_aos_data(scheduled_event_ids, max_orders_per_event=3)
work_order_oi_data = generate_work_order_oi_data(unscheduled_event_ids, max_orders_per_event=3)


insert_data(work_order_aos_query, work_order_aos_data )
insert_data(work_order_oi_query, work_order_oi_data)

100 records inserted successfully.
100 records inserted successfully.
250 records inserted successfully.
200 records inserted successfully.
200 records inserted successfully.
300 records inserted successfully.
150 records inserted successfully.
449 records inserted successfully.
75 records inserted successfully.
75 records inserted successfully.
144 records inserted successfully.
149 records inserted successfully.


In [402]:
# Close the connection
connection.close()