In [1]:
pip install pandas openpyxl faker

Collecting faker
  Downloading faker-37.8.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.8.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ---------------- ----------------------- 0.8/2.0 MB 7.2 MB/s eta 0:00:01
   ---------------------------------------- 2.0/2.0 MB 6.1 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.8.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from faker import Faker
import pandas as pd
import random
from datetime import datetime, timedelta

In [4]:

fake = Faker()

# 1. Generate Passengers
def generate_passengers(n=300):
    passengers = []
    for i in range(1, n+1):
        passengers.append({
            "PassengerID": f"PSG{i:05d}",
            "FullName": fake.name(),
            "PassportNumber": fake.unique.bothify(text='??######'),
            "Nationality": fake.country(),
            "DOB": fake.date_of_birth(minimum_age=18, maximum_age=80).strftime('%Y-%m-%d')
        })
    return pd.DataFrame(passengers)

# 2. Generate Flights
def generate_flights(n=300):
    flights = []
    for i in range(1, n+1):
        flights.append({
            "FlightID": f"FLT{i:03d}",
            "FlightNumber": fake.unique.bothify(text='AI###'),
            "Origin": fake.city(),
            "Destination": fake.city(),
            "DurationMinutes": random.randint(60, 720)
        })
    return pd.DataFrame(flights)

# 3. Generate Aircrafts
def generate_aircrafts(n=300):
    aircrafts = []
    for i in range(1, n+1):
        aircrafts.append({
            "AircraftID": f"ACF{i:03d}",
            "Model": fake.bothify(text='Airbus A###'),
            "Capacity": random.randint(100, 300),
            "Manufacturer": random.choice(['Airbus', 'Boeing', 'Embraer', 'Bombardier'])
        })
    return pd.DataFrame(aircrafts)

# 4. Generate FlightSchedules
def generate_flight_schedules(n=300, flights_df=None, aircrafts_df=None):
    schedules = []
    for i in range(1, n+1):
        flight = flights_df.sample(1).iloc[0]
        aircraft = aircrafts_df.sample(1).iloc[0]
        departure = fake.date_time_between(start_date='-1y', end_date='now')
        duration_minutes = int(flight['DurationMinutes'])  # Fix: Convert numpy.int64 to int
        arrival = departure + timedelta(minutes=duration_minutes)
        schedules.append({
            "ScheduleID": f"SCH{i:04d}",
            "FlightID": flight['FlightID'],
            "AircraftID": aircraft['AircraftID'],
            "DepartureDateTime": departure.strftime('%Y-%m-%d %H:%M:%S'),
            "ArrivalDateTime": arrival.strftime('%Y-%m-%d %H:%M:%S')
        })
    return pd.DataFrame(schedules)

# 5. Generate Reservations
def generate_reservations(n=300, passengers_df=None, schedules_df=None):
    reservations = []
    for i in range(1, n+1):
        passenger = passengers_df.sample(1).iloc[0]
        schedule = schedules_df.sample(1).iloc[0]
        booking_date = fake.date_between(start_date='-1y', end_date='today')
        reservations.append({
            "ReservationID": f"RSV{i:05d}",
            "PassengerID": passenger['PassengerID'],
            "ScheduleID": schedule['ScheduleID'],
            "FlightID": schedule['FlightID'],  # ✅ Add FlightID from schedule
            "SeatNumber": f"{random.randint(1, 30)}{random.choice(['A','B','C','D','E','F'])}",
            "BookingDate": booking_date.strftime('%Y-%m-%d'),
            "Class": random.choice(['Economy', 'Business', 'First'])
        })
    return pd.DataFrame(reservations)


# 6. Generate Payments
def generate_payments(n=300, reservations_df=None):
    payments = []
    for i in range(1, n+1):
        reservation = reservations_df.sample(1).iloc[0]
        booking_date = datetime.strptime(reservation['BookingDate'], '%Y-%m-%d')
        payment_date = fake.date_time_between(start_date=booking_date, end_date='now')
        amount = round(random.uniform(50, 1500), 2)
        payments.append({
            "PaymentID": f"PAY{i:05d}",
            "ReservationID": reservation['ReservationID'],
            "AmountPaid": amount,
            "PaymentMode": random.choice(['Credit Card', 'Debit Card', 'UPI', 'Cash']),
            "PaymentDate": payment_date.strftime('%Y-%m-%d %H:%M:%S')
        })
    return pd.DataFrame(payments)

# Generate all data
passengers_df = generate_passengers()
flights_df = generate_flights()
aircrafts_df = generate_aircrafts()
schedules_df = generate_flight_schedules(flights_df=flights_df, aircrafts_df=aircrafts_df)
reservations_df = generate_reservations(passengers_df=passengers_df, schedules_df=schedules_df)
payments_df = generate_payments(reservations_df=reservations_df)

print("Data is sucessfully created")

Data is sucessfully created


In [6]:
# Preview (optional)
print(passengers_df)
print(flights_df.head())
print(aircrafts_df.head())
print(schedules_df.head())
print(reservations_df.head())
print(payments_df.head())

    PassengerID                  FullName PassportNumber  \
0      PSG00001           Patricia Wright       Sa148158   
1      PSG00002            Tabitha Booker       vv566980   
2      PSG00003  Mrs. Jennifer Mccullough       Vj777163   
3      PSG00004            Scott Benjamin       KS295943   
4      PSG00005            Margaret Allen       Sk082726   
..          ...                       ...            ...   
295    PSG00296               George Barr       xB520859   
296    PSG00297             Michael Gomez       bU655780   
297    PSG00298        Steven Christensen       Di566429   
298    PSG00299               Jacob Moore       Or852606   
299    PSG00300          Laura Villarreal       WJ514570   

                     Nationality         DOB  
0                       Bulgaria  2003-11-07  
1                        Comoros  1986-07-13  
2                   Cook Islands  1984-07-26  
3                           Oman  1948-09-01  
4                  Cote d'Ivoire  1965-01-25

In [7]:
# Save DataFrames to CSV files
passengers_df.to_csv('passengers.csv',index=False)
flights_df.to_csv('flights.csv',index=False)
aircrafts_df.to_csv('aircrafts.csv',index=False)
schedules_df.to_csv('schedules.csv',index=False)
reservations_df.to_csv('reservations.csv',index=False)
payments_df.to_csv('payments.csv',index=False)

print("All CSV files have been saved successfully.")


All CSV files have been saved successfully.
