In [156]:
import pandas as pd
import pytz

from common_utils.utils import get_json_obj
from haversine import haversine, Unit
from datetime import datetime, timedelta


In [157]:
airports_geometry = get_json_obj("/home/magnus9102/Mostafa/Py/Github/data-science/mostafa_vahdani_bachelor_project/source_codes/data/static/airports_geometry.json")

In [158]:
df = pd.read_csv("/home/magnus9102/Mostafa/Py/Github/data-science/mostafa_vahdani_bachelor_project/data/raw/flight_tickets_dataset.csv")

In [171]:
airport_name1, coord1 = airports_geometry["KIH"]["aeroway_name"], airports_geometry["KIH"]["geometry"]


In [172]:
airport_name2, coord2 = airports_geometry["TBZ"]["aeroway_name"], airports_geometry["TBZ"]["geometry"]


In [173]:
coord1, coord2

([26.5299696, 53.975013], [38.1355697, 46.2460114])

In [174]:
airport_name1, airport_name2

('Kish International Airport', 'Tabriz International Airport')

In [177]:
estimate_great_distance = haversine(coord1, coord2, Unit.KILOMETERS, normalize=True)

# 10 to 20 min
max_landing_hour_time = 10 / 60
# 10 to 30 min
max_hour_time_to_cruise_from_take_off = 10 / 60
# 15 to 45 min
max_flight_delay_time = 30 / 60
# max -> 50km
airplane_route_average_changes_km = 50
# speed of airplane on cruise -> 885.13 to 965.6
v_km = 885.13  # 885.13 and 965.6 kmph

time_t = ((estimate_great_distance + airplane_route_average_changes_km) / v_km) + max_landing_hour_time + \
    max_hour_time_to_cruise_from_take_off + max_flight_delay_time

hour_t = int(time_t)
min_t = int((time_t - hour_t) * 60)

max_flight_length = f"duration: {hour_t} H - {min_t} Min"

estimate_great_distance, max_flight_length

(1479.3508295325785, 'duration: 2 H - 33 Min')

In [164]:
departure_timezone = pytz.timezone('Asia/Tehran')
arrival_timezone = pytz.timezone('Asia/Dubai')

departure_time = datetime(2023, 8, 29, 15, 50, tzinfo=departure_timezone)

# Calculate estimated arrival time (e.g., 6 hours flight duration)
estimated_arrival_time = departure_time + timedelta(hours=time_t)

time_difference = estimated_arrival_time - departure_time

converted_departure_datetime = departure_time.astimezone(departure_timezone)
converted_arrival_datetime = estimated_arrival_time.astimezone(arrival_timezone)


In [165]:
estimate_distance, time_t, estimated_arrival_time, time_difference, converted_departure_datetime, converted_arrival_datetime


(175.21575764813966,
 2.8891794825396606,
 datetime.datetime(2023, 8, 29, 18, 43, 21, 46137, tzinfo=<DstTzInfo 'Asia/Tehran' LMT+3:26:00 STD>),
 datetime.timedelta(seconds=10401, microseconds=46137),
 datetime.datetime(2023, 8, 29, 15, 50, tzinfo=<DstTzInfo 'Asia/Tehran' LMT+3:26:00 STD>),
 datetime.datetime(2023, 8, 29, 19, 17, 21, 46137, tzinfo=<DstTzInfo 'Asia/Dubai' +04+4:00:00 STD>))

In [166]:
def difference_drop(df, *args):
    """difference drop with column names that you will give on *args"""
    return df.drop(columns=df.columns.difference([*args]), axis=1)


In [167]:
df2 = difference_drop(df, "national_departure_code", "national_arrival_code")


In [169]:
def add_orthodromic_distance_col(row, airports_geometry_dict):
    departure_airport_code = row["national_departure_code"]
    arrival_airport_code = row["national_arrival_code"]

    departure_airport_coordinate = airports_geometry_dict[departure_airport_code]["geometry"]
    arrival_airport_coordinate = airports_geometry_dict[arrival_airport_code]["geometry"]

    return round(haversine(departure_airport_coordinate, 
                     arrival_airport_coordinate, 
                     Unit.KILOMETERS, 
                     
                     normalize=True))


df["distance_KM"] = df2.apply(func=add_orthodromic_distance_col,
                           args=(airports_geometry,),
                           axis=1)


In [170]:
df["distance_KM"].head(5)

0     538
1    1057
2    1057
3     570
4    1057
Name: distance_KM, dtype: int64

In [40]:
df.to_csv("/home/magnus9102/Mostafa/Py/Github/data-science/mostafa_vahdani_bachelor_project/data/processed/flight_tickets_dataset.csv")