In [179]:
import pandas as pd
from pathlib import Path

In [180]:
airafts_data = pd.read_csv("../data/aircrafts.csv")
airafts_data.rename(
    columns={"aircraft_code": "iata_name", "model": "name"}, inplace=True
)
airafts_data = pd.concat(
    [airafts_data, airafts_data.sample(frac=0.7)], ignore_index=True
)
airafts_data["id"] = [i + 1 for i in range(len(airafts_data))]
airafts_data.head()

Unnamed: 0,iata_name,name,count,id
0,733,Боинг 737-300,130,1
1,773,Боинг 777-300,402,2
2,321,Аэробус A321-200,170,3
3,319,Аэробус A319-100,116,4
4,CR2,Бомбардье CRJ-200,50,5


In [181]:
Path.mkdir(Path("../data/results"), exist_ok=True)
airafts_data.to_csv("../data/results/aircrafts.csv", index=False)

In [182]:
for id in airafts_data["id"]:
    print(id)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


In [183]:
import datetime
import random
import re
from zoneinfo import ZoneInfo


def __generate_dates_for_flight_num(
    min_year: int,
    max_year: int = 2015,
    timezone: str = "Asia/Yekaterinburg",
) -> list[datetime.datetime]:

    res = []
    count = 0
    step = max(3, (max_year - min_year) // 3)
    for i in range(min_year, max_year, step):
        year = random.randint(i, i + step)
        month = random.randint(1, 12)
        day = random.randint(1, 28)
        date = datetime.datetime(
            year,
            month,
            day,
            hour=0,
            minute=0,
            second=0,
            tzinfo=ZoneInfo(timezone),
        )
        if count > 0:
            last_date = date - datetime.timedelta(days=1)
            res.append(last_date)
        res.append(date)
        count += 1
    date = datetime.datetime(
        year=9999,
        month=12,
        day=31,
        hour=23,
        minute=59,
        second=59,
        tzinfo=ZoneInfo(timezone),
    )
    res.append(date)
    return res

In [184]:
import rstr


def generate_aircraft_number_story(
    aircraft_id: str,
    pattern: str,
    min_year: int,
    airlines: pd.DataFrame,
    max_year: int = 2015,
    timezone: str = "Asia/Yekaterinburg",
) -> pd.DataFrame:

    dates = __generate_dates_for_flight_num(min_year, max_year, timezone)

    temp_dic = {}
    temp_dic["aircraft_id"] = []
    temp_dic["aircraft_num"] = []
    temp_dic["registration_date"] = []
    temp_dic["deregistration_date"] = []
    temp_dic["iata_airlines"] = []
    for i in range(1, len(dates), 2):

        temp_dic["aircraft_id"].append(str(aircraft_id))
        temp_dic["aircraft_num"].append(
            rstr.xeger(string_or_regex=pattern)
        )
        temp_dic["iata_airlines"].append(
            airlines.sample(n=1)["iata_name"].iloc[0]
        )
        temp_dic["registration_date"].append(dates[i - 1])
        temp_dic["deregistration_date"].append(dates[i])

    return pd.DataFrame(temp_dic)

In [185]:
def __get_aircraft_number_story(
    id_series: pd.Series, airlines: pd.DataFrame
) -> pd.DataFrame:
    result = pd.DataFrame()
    for id in id_series:
        result = pd.concat(
            [
                result,
                generate_aircraft_number_story(
                    id,
                    pattern=r"^[A-Z]-[A-Z]{4}|[A-Z]{2}-[A-Z]{3}|N[0-9]{3}[A-Z]{3}$",
                    min_year=2000,
                    max_year=2015,
                    airlines=airlines,
                ),
            ]
        )
    return result

In [186]:
airlines = pd.read_csv("../data/results/airlines.csv")
aircraft_number_story = __get_aircraft_number_story(
    airafts_data["id"], airlines
)

In [188]:
aircraft_number_story.to_csv(
    "../data/results/aircrafts_number.csv", index=False
)