In [None]:
from os import getenv
from connect_db import connect_db
con = connect_db(
    username=getenv("ORACLE_USERNAME"),
    password=getenv("ORACLE_PASSWORD"),
    host=getenv("ORACLE_HOST"),
    port=int(getenv("ORACLE_PORT"))
)
cur = con.cursor()

In [None]:
SEATS_PER_BOARDING_GROUP = 40

In [None]:
import networkx as nx

graph = nx.Graph()
stmt = "SELECT ROUTE_ID, FROM_AIRPORT_ID, TO_AIRPORT_ID, DISTANCE_IN_KM FROM ROUTE"

for row in cur.execute(stmt):
    graph.add_edge(row[1], row[2], weight=row[3], route_id=row[0])

In [None]:
import numpy as np
stmt = "SELECT FROM_AIRPORT_ID FROM ROUTE"
AIRPORTS = list(set(row[0] for row in cur.execute(stmt)))
AIRPORT_WEIGHTS = [1, 2, 1, 1, 6, 2, 2, 1, 3, 4, 1, 1, 2, 2, 1, 3, 1]
AIRPORT_WEIGHTS = np.array(AIRPORT_WEIGHTS) / sum(AIRPORT_WEIGHTS)

In [None]:
from typing import Iterable

def get_random_airports(start_n: int = 1) -> Iterable[str]:
    for i in range(len(AIRPORTS) - start_n):
        if np.random.randint(1, np.power(2, i) + 1) != 1:
            break
        start_n += 1

    yield from np.random.choice(AIRPORTS, size=start_n, replace=False, p=AIRPORT_WEIGHTS)

In [None]:
def get_random_number_of_tickets(max_tickets: int = 10) -> int:
    for i in range(max_tickets):
        if np.random.randint(1, np.power(2, i) + 1) != 1:
            return i
    return max_tickets

In [None]:
from collections import defaultdict
ACCOUNT_AIRPORT_PREFERENCES: defaultdict[str, list[str]] = defaultdict(list)
stmt = "SELECT ACCOUNT_ID FROM ACCOUNT"

for row in cur.execute(stmt):
    for airport_id in get_random_airports():
        ACCOUNT_AIRPORT_PREFERENCES[row[0]].append(airport_id)

In [None]:
def get_random_travel_rate():
    return np.random.choice(
        np.arange(20, 100),
        p=np.concatenate((
            np.ones(20) * 0.1 / 20,
            np.ones(20) * 0.1 / 20,
            np.ones(20) * 0.3 / 20,
            np.ones(20) * 0.5 / 20,
        ))
    ).item()
ACCOUNT_TRAVEL_RATE = {account_id: get_random_travel_rate() for account_id in ACCOUNT_AIRPORT_PREFERENCES}

In [None]:
from functools import cache
import itertools

@cache
def get_airports_stops(from_airport: str, to_airport: str) -> list[str]:
    return nx.dijkstra_path(graph, from_airport, to_airport)

@cache
def get_routes(from_airport: str, to_airport: str) -> list[str]:
    out = []
    for i, j in itertools.pairwise(get_airports_stops(from_airport, to_airport)):
        out.append(graph.get_edge_data(i, j)["route_id"])
    return out

In [None]:
PRICE_PER_KM = 0.238
def calc_flight_price(distance: int, number_of_seats: int):
    return distance * PRICE_PER_KM

stmt = """SELECT FLIGHT_ID, DISTANCE_IN_KM, NUMBER_OF_SEAT FROM FLIGHT
    JOIN ROUTE ON FLIGHT.ROUTE_ID = ROUTE.ROUTE_ID
    JOIN AIRCRAFT ON FLIGHT.AIRCRAFT_ID = AIRCRAFT.AIRCRAFT_ID
    JOIN AIRCRAFT_MODEL ON AIRCRAFT.AIRCRAFT_MODEL_ID = AIRCRAFT_MODEL.AIRCRAFT_MODEL_ID"""
FLIGHT_PRICE = {row[0]: calc_flight_price(row[1], row[2]) for row in cur.execute(stmt)}

In [None]:
import random
@cache
def find_seats(aircraft_id: str, seat_class_weight: tuple[int] = (20, 5, 1)) -> list[tuple[str, str]]:
    seat_class = random.sample(("ECON", "BUSI", "FIRS"), k=1, counts=seat_class_weight)[0]
    stmt = f"""SELECT SEAT_ID, SEAT_CLASS_ID FROM SEAT WHERE AIRCRAFT_ID = '{aircraft_id}' AND SEAT_CLASS_ID = '{seat_class}'"""
    return [(row[0], row[1]) for row in cur.execute(stmt)]

In [None]:
from datetime import datetime, timedelta
ROUTE_TO_FLIGHT: defaultdict[str, list[str]] = defaultdict(list)
stmt = "SELECT FLIGHT_ID, ROUTE_ID FROM FLIGHT"
for row in cur.execute(stmt):
    ROUTE_TO_FLIGHT[row[1]].append(row[0])

stmt = "SELECT FLIGHT_ID, DEPARTURE_DATETIME FROM FLIGHT"
FLIGHT_DATETIME: dict[str, datetime] = {row[0]: row[1] for row in cur.execute(stmt)}

stmt = "SELECT FLIGHT_ID, EST_DURATION_IN_HOUR FROM FLIGHT"
FLIGHT_DURATION: dict[str, float] = {row[0]: row[1] for row in cur.execute(stmt)}

stmt = "SELECT FLIGHT_ID, AIRCRAFT_ID FROM FLIGHT"
FLIGHT_AIRCRAFT: dict[str, str] = {row[0]: row[1] for row in cur.execute(stmt)}

USED_SEAT: defaultdict[str, set[str]] = defaultdict(set)

def find_closest_flight(current_dt: datetime, route_id: str, n_tickets: int, dt: datetime) -> tuple[str, datetime, list[str], list[str]] | None:
    filtered_flights = [t[0] for t in filter(lambda t: t[1] > current_dt, FLIGHT_DATETIME.items())]
    flights_that_fly_the_route = ROUTE_TO_FLIGHT[route_id]
    filtered_route_flights = filter(lambda id: id in flights_that_fly_the_route, filtered_flights)
    for flight_id in filtered_route_flights:
        seats = set()
        seat_classes = {}
        for seat, seat_class in find_seats(FLIGHT_AIRCRAFT[flight_id]):
            seats.add(seat)
            seat_classes[seat] = seat_class
        seats -= USED_SEAT[flight_id]

        if len(seats) < n_tickets:
            continue

        seats = list(seats)
        seats = seats[:n_tickets]
        for seat in seats:
            USED_SEAT[flight_id].add(seat)
        return flight_id, FLIGHT_DATETIME[flight_id] + timedelta(hours=FLIGHT_DURATION[flight_id] + 12), seats, list(seat_classes[seat] for seat in seats)
    for flight_id in filtered_flights:
        seats = set()
        seat_classes = {}
        for seat, seat_class in find_seats(FLIGHT_AIRCRAFT[flight_id]):
            seats.add(seat)
            seat_classes[seat] = seat_class
        seats -= USED_SEAT[flight_id]

        if len(seats) < n_tickets:
            continue

        seats = list(seats)
        seats = seats[:n_tickets]
        for seat in seats:
            USED_SEAT[flight_id].add(seat)
        return flight_id, FLIGHT_DATETIME[flight_id] + timedelta(hours=FLIGHT_DURATION[flight_id] + 12), seats, list(seat_classes[seat] for seat in seats)
    filtered_flights = [t[0] for t in filter(lambda t: t[1] > dt, FLIGHT_DATETIME.items())]
    for flight_id in filtered_flights:
        seats = set()
        seat_classes = {}
        for seat, seat_class in find_seats(FLIGHT_AIRCRAFT[flight_id]):
            seats.add(seat)
            seat_classes[seat] = seat_class
        seats -= USED_SEAT[flight_id]

        if len(seats) < n_tickets:
            continue

        seats = list(seats)
        seats = seats[:n_tickets]
        for seat in seats:
            USED_SEAT[flight_id].add(seat)
        return flight_id, FLIGHT_DATETIME[flight_id] + timedelta(hours=FLIGHT_DURATION[flight_id] + 12), seats, list(seat_classes[seat] for seat in seats)
    
def cancel_seat(flight_id: str, seat_ids: list[str]):
    for seat_id in seat_ids:
        try:
            USED_SEAT[flight_id].remove(seat_id)
        except KeyError:
            pass

In [None]:
ACCOUNT_WAIT_UNTIL: dict[str, datetime] = {}
stmt = "SELECT ACCOUNT_ID, JOINED_DATETIME FROM ACCOUNT"
for row in cur.execute(stmt):
    ACCOUNT_WAIT_UNTIL[row[0]] = row[1]

In [None]:
ACCOUNT_SPECIAL_SERVICE: defaultdict[str, str] = defaultdict(lambda: "NULL")
stmt = "SELECT ACCOUNT_ID FROM ACCOUNT"
for row in cur.execute(stmt):
    rand_choice = np.random.randint(1, 1000)
    if rand_choice == 1:
        ACCOUNT_SPECIAL_SERVICE[row[0]] = "'VVIP'"
    elif rand_choice <= 200:
        ACCOUNT_SPECIAL_SERVICE[row[0]] = "'DISA'"
    elif rand_choice <= 400:
        ACCOUNT_SPECIAL_SERVICE[row[0]] = "'ENTE'"
    elif rand_choice <= 600:
        ACCOUNT_SPECIAL_SERVICE[row[0]] = "'WIFI'"

In [None]:
def get_payment_method() -> str:
    return random.choices(("CRCD", "DBCD", "PYPL", "BANK", "CASH"), k=1, weights=[0.3, 0.2, 0.1, 0.2, 0.2])[0]

In [None]:
from collections import Counter

payment_i = 0
def PAYMENT_ID_GEN():
    global payment_i
    out = f"P{payment_i:08}"
    payment_i += 1
    return out

flight_ticket_i: Counter = Counter()
def TICKET_ID_GEN(dt: datetime):
    global flight_ticket_i
    formatted_dt = f"{dt:%y%m}"
    out = f"SFS{formatted_dt}{flight_ticket_i[formatted_dt]:06}"
    flight_ticket_i[formatted_dt] += 1
    return out

In [None]:
from utils import paginate_insert_all

flight_ticket_fmt = "    INTO FLIGHT_TICKET (FLIGHT_TICKET_ID, ACCOUNT_ID, PAYMENT_ID, CREATED_AT, BOARDING_GROUP, TICKET_PRICE, TICKET_STATUS, SPECIAL_SERVICE_ID) VALUES ('{}', {}, '{}', TO_TIMESTAMP('{:%Y-%m-%d %H:%M:%S}', 'YYYY-MM-DD HH24:MI:SS'), {}, {:.2f}, 'Completed', {})\n"
flight_sequence_fmt = "    INTO FLIGHT_SEQUENCE (FLIGHT_TICKET_ID, FLIGHT_ID, SEAT_ID, FLIGHT_SEQUENCE) VALUES ('{}', {}, '{}', {})\n"
payment_fmt = "    INTO PAYMENT (PAYMENT_METHOD_ID, ACCOUNT_ID, CREATED_AT, AMOUNT) VALUES ('{}', {}, TO_TIMESTAMP('{:%Y-%m-%d %H:%M:%S}', 'YYYY-MM-DD HH24:MI:SS'), {:.2f})\n"

dt = datetime(2023, 1, 1)
with open("flight_ticket.sql", "w") as flight_ticket_file, \
    open("flight_seq.sql" , "w") as flight_sequence_file, \
    open("payment.sql", "w") as payment_file:
    
    def insert_flight_ticket(ticket_id: str, account_id: str, payment_id: str, timestamp_created: datetime, boarding_group: int, ticket_price: float, special_service_id: str):
        flight_ticket_file.write(flight_ticket_fmt.format(ticket_id, account_id, payment_id, timestamp_created, boarding_group, ticket_price, special_service_id))
    
    def insert_flight_sequence(ticket_id: str, flight_id: str, seat_id: str, route_seq: int):
        flight_sequence_file.write(flight_sequence_fmt.format(ticket_id, flight_id, seat_id, route_seq))
        
    def insert_payment(payment_method_id: str, account_id: str, timestamp_created: datetime, payment_amount: float):
        payment_file.write(payment_fmt.format(payment_method_id, account_id, timestamp_created, payment_amount))
    
    insert_flight_ticket = paginate_insert_all(f=insert_flight_ticket, fp=flight_ticket_file)
    insert_flight_sequence = paginate_insert_all(f=insert_flight_sequence, fp=flight_sequence_file)
    insert_payment = paginate_insert_all(f=insert_payment, fp=payment_file)
    
    try:
        while dt < datetime(2024, 4, 19, 15):
            accounts_to_select = filter(lambda t: dt > t[1], ACCOUNT_WAIT_UNTIL.items())
            
            accounts_selected = []
            for account, _ in accounts_to_select:
                if np.random.randint(1, 101 - ACCOUNT_TRAVEL_RATE[account]) == 1:
                    accounts_selected.append(account)
        
            for account_selected in accounts_selected:
                payment_id = PAYMENT_ID_GEN()
                payment_amt = 0
                
                airports_to_choose = set(ACCOUNT_AIRPORT_PREFERENCES[account_selected])
                if random.randint(1, 10) == 1:
                    for airport in get_random_airports(0):
                        airports_to_choose.add(airport)
                start_place, end_place = random.sample(list(airports_to_choose), k=2)
                routes = get_routes(start_place, end_place)
                # 1 1 S1
                # 2 1 S2
                # 1 2 S3
                # 2 2 S4
                n_tickets = get_random_number_of_tickets()
                ticket_ids = []
                ticket_prices = []
                for n in range(n_tickets):
                    ticket_ids.append(TICKET_ID_GEN(dt))
                    ticket_prices.append(0)
        
                cancel = False
                temps = []
                wait_until = ACCOUNT_WAIT_UNTIL[account_selected]
                for route in routes:
                    temp = find_closest_flight(wait_until, route, n_tickets, dt)
                    temps.append(temp)
                    if temp is None:
                        cancel = True
                        break
                    wait_until = temp[1]
        
                if cancel or not temps:
                    for temp in temps: 
                        if temp:
                            flight_id, _, seat_ids, _ = temp
                            cancel_seat(flight_id, seat_ids)
                    payment_i -= 1
                    for _ in ticket_ids:
                        flight_ticket_i[f"{dt:%y%m}"] -= 1
                    continue
                print(account_selected, temps)
                for route_seq, (flight_id, wait_until, seat_ids, seat_classes) in enumerate(temps, start=1):
                    ACCOUNT_WAIT_UNTIL[account_selected] = wait_until
        
                    flight_price = FLIGHT_PRICE[flight_id]
        
                    for i, (ticket_id, seat_id, seat_class) in enumerate(zip(ticket_ids, seat_ids, seat_classes)):
                        insert_flight_sequence(ticket_id, flight_id, seat_id, route_seq)
        
                        seat_rate = 1
                        if seat_class == "BUSI":
                            seat_rate = 2
                        elif seat_class == "FIRS":
                            seat_rate = 3
        
                        ticket_prices[i] += flight_price * seat_rate
                        payment_amt += flight_price * seat_rate
                        
                for ticket_id, ticket_price in zip(ticket_ids, ticket_prices):
                    insert_flight_ticket(ticket_id, account_selected, payment_id, dt, random.randint(1, 10), ticket_price, ACCOUNT_SPECIAL_SERVICE[account_selected])  # TODO: fix boarding group
        
                insert_payment(get_payment_method(), account_selected, dt, payment_amt)
                
                dt += timedelta(minutes=random.randint(1, 5), seconds=random.randint(1, 60))
            dt += timedelta(minutes=random.randint(1, 120), seconds=random.randint(1, 60))
    except KeyboardInterrupt:
        print("stopping")
    finally:
        for fp in (flight_ticket_file, flight_sequence_file, payment_file):
            fp.write("SELECT 1 FROM DUAL;\n")

In [None]:
con.close()