## TODO: distinguish return journeies

In [129]:
import geopandas as gpd
import pandas as pd
from geographiclib.geodesic import Geodesic
from shapely.geometry import MultiLineString
from shapely.geometry import Point

import folium

In [130]:
airline_routes = pd.read_json('./data/airline-route-data/airline_routes.json',orient='index')
airline_routes.head()

Unnamed: 0,city_name,continent,country,country_code,display_name,elevation,iata,icao,latitude,longitude,name,routes,timezone
AAA,Anaa,OC,French Polynesia,PF,"Anaa (AAA), French Polynesia",23.0,AAA,NTGA,-17.355648,-145.50913,Anaa Airport,"[{'carriers': [{'iata': 'VT', 'name': 'Air Tah...",Pacific/Tahiti
AAD,Adado,AF,Somalia,SO,"Adado (AAD), Somalia",1005.0,AAD,,6.095833,46.6375,Adado Airport,"[{'carriers': [], 'iata': 'MGQ', 'km': 474, 'm...",Africa/Mogadishu
AAE,Annaba,AF,Algeria,DZ,"Annaba (AAE), Algeria",16.0,AAE,DABB,36.821392,7.811857,Annaba,"[{'carriers': [{'iata': 'AH', 'name': 'Air Alg...",Africa/Algiers
AAK,Aranuka,OC,Kiribati,KI,"Aranuka (AAK), Kiribati",0.0,AAK,NGUK,0.166667,173.583333,Aranuka Airport,"[{'carriers': [{'iata': 'IK', 'name': 'Air Kir...",Pacific/Tarawa
AAL,Aalborg,EU,Denmark,DK,"Aalborg (AAL), Denmark",10.0,AAL,EKYT,57.086551,9.872241,Aalborg,"[{'carriers': [{'iata': 'D8', 'name': 'Norwegi...",Europe/Copenhagen


In [131]:
my_flights_df = pd.read_excel('./data/Flight History.xlsx')
my_flights_df.head()

Unnamed: 0,Date,Flight Number,Departure,Arrival,Aircraft,Aircraft Registration,Class
0,2015-07-28,ZH9250,XIY,SZX,Airbus A320,B6740,Economy
1,2015-08-01,ZH9249,SZX,XIY,Airbus A320,B6570,Economy
2,2019-07-25,JD5343,XIY,DLC,Airbus A319,B6245,Economy
3,2019-07-30,JD5344,DLC,XIY,Airbus A320,B6958,Economy
4,2022-11-03,HO1212,XIY,SHA,Airbus A320,B1681,Economy


In [132]:
my_flights_df['Departure Latitude'] = airline_routes['latitude'][my_flights_df.Departure].values
my_flights_df['Departure Longitude'] = airline_routes['longitude'][my_flights_df.Departure].values
my_flights_df['Arrival Latitude'] = airline_routes['latitude'][my_flights_df.Arrival].values
my_flights_df['Arrival Longitude'] = airline_routes['longitude'][my_flights_df.Arrival].values
my_flights_df['Route'] = my_flights_df.Departure + '->' +my_flights_df.Arrival 

In [133]:
def geodesic(lat1, lon1, lat2, lon2):
    inverse = Geodesic.WGS84.Inverse(lat1, lon1, lat2, lon2)
    dist = inverse['s12']/1000
    steps = int(dist /50)
    linestrings = []
    coordinates = []

    for i in range(0, steps + 1):
        direct = Geodesic.WGS84.Direct(inverse['lat1'], inverse['lon1'], inverse['azi1'], (i / float(steps)) * inverse['s12'])
        if len(coordinates) > 0:
            if (coordinates[-1][0] < -90 and direct['lon2'] > 90) or (coordinates[-1][0] > 90 and direct['lon2'] < -90):
                linestrings.append(coordinates)
                coordinates = []
        coordinates.append((direct['lon2'], direct['lat2']))

    linestrings.append(coordinates)
    return MultiLineString(linestrings)


def geodesic_with_perturbation(lat1, lon1, lat2, lon2, perturbation_factor=0.0001):
    # Calculate the inverse solution (distance, azimuth, etc.)
    inverse = Geodesic.WGS84.Inverse(lat1, lon1, lat2, lon2)
    dist = inverse['s12'] / 1000  # Convert meters to kilometers
    steps = int(dist / 50)  # One step every 50 km
    linestrings = []
    coordinates = []

    for i in range(0, steps + 1):
        # Generate points along the great circle
        direct = Geodesic.WGS84.Direct(
            inverse['lat1'],
            inverse['lon1'],
            inverse['azi1'],
            (i / float(steps)) * inverse['s12']
        )

        # Introduce a perturbation at the midpoint to differentiate direction
        if i == steps // 2:  # Midpoint
            direct['lat2'] += perturbation_factor if lat1 < lat2 else -perturbation_factor

        # Handle crossing the dateline
        if len(coordinates) > 0:
            if (coordinates[-1][0] < -90 and direct['lon2'] > 90) or (coordinates[-1][0] > 90 and direct['lon2'] < -90):
                linestrings.append(coordinates)
                coordinates = []

        # Add the point to the coordinates
        coordinates.append((direct['lon2'], direct['lat2']))

    # Add the last segment
    linestrings.append(coordinates)

    return MultiLineString(linestrings)


In [134]:
geometry = my_flights_df.apply(lambda x: geodesic(x['Departure Latitude'], x['Departure Longitude'], x['Arrival Latitude'], x['Arrival Longitude']), axis=1)

In [135]:
my_flights_df['Distance'] = my_flights_df.apply(lambda x: Geodesic.WGS84.Inverse(x['Departure Latitude'], x['Departure Longitude'], x['Arrival Latitude'], x['Arrival Longitude'])['s12']/1000, axis=1).astype(int).astype(str) + ' km'

In [136]:
n_airports = (my_flights_df.groupby('Departure').count().iloc[:,0]).add(my_flights_df.groupby('Arrival').count().iloc[:,0], fill_value=0).astype(int)

In [137]:
my_airports = airline_routes.loc[n_airports.index].drop(columns='routes')

In [138]:
my_airports['No. visit'] = n_airports

In [139]:
geometry_airports = my_airports.apply(lambda x: Point(x['longitude'], x['latitude']), axis=1)

In [140]:
my_airports_gdf = gpd.GeoDataFrame(data=my_airports, geometry=geometry_airports, crs=4326)

In [141]:
n_route = my_flights_df.groupby('Route').count().iloc[:,0]

In [142]:
my_flights_df['No. flight'] = my_flights_df.Route.apply(lambda x: n_route[x])

In [143]:
my_airports_gdf['Name'] = my_airports_gdf.name

In [144]:
m = folium.Map(location=[40,  70], zoom_start=3,world_copy_jump=True, tiles='cartodb voyager')

folium.GeoJson(
    my_airports_gdf,
    name="Airports",
    marker=folium.CircleMarker(radius=20, fill_color="orangered", fill_opacity=0.6, color="black", weight=1),
    tooltip=folium.GeoJsonTooltip(fields=["Name",'No. visit']),
    style_function=lambda x: {
        "fillColor": 'orange',
        "radius": (x['properties']['No. visit'])*1 + 3,
    },
    highlight_function=lambda x: {"fillOpacity": 1},
).add_to(m)

my_flights_gdf = gpd.GeoDataFrame(data=my_flights_df, geometry=geometry, crs=4326)



folium.GeoJson(data=my_flights_gdf.iloc[:,1:],
               tooltip=folium.GeoJsonTooltip(fields=["Route", "No. flight"]),
               style_function=lambda x: {
                   "color":"black",
                   "weight": (x['properties']['No. flight']) *1.5,
               }
               
               ).add_to(m)

m

In [145]:
# m.save('my_flight_log.html')

In [146]:
my_flights_df

Unnamed: 0,Date,Flight Number,Departure,Arrival,Aircraft,Aircraft Registration,Class,Departure Latitude,Departure Longitude,Arrival Latitude,Arrival Longitude,Route,Distance,No. flight
0,2015-07-28,ZH9250,XIY,SZX,Airbus A320,B6740,Economy,34.438416,108.760946,22.639444,113.810833,XIY->SZX,1397 km,2
1,2015-08-01,ZH9249,SZX,XIY,Airbus A320,B6570,Economy,22.639444,113.810833,34.438416,108.760946,SZX->XIY,1397 km,1
2,2019-07-25,JD5343,XIY,DLC,Airbus A319,B6245,Economy,34.438416,108.760946,38.96102,121.539991,XIY->DLC,1245 km,2
3,2019-07-30,JD5344,DLC,XIY,Airbus A320,B6958,Economy,38.96102,121.539991,34.438416,108.760946,DLC->XIY,1245 km,1
4,2022-11-03,HO1212,XIY,SHA,Airbus A320,B1681,Economy,34.438416,108.760946,31.19779,121.333473,XIY->SHA,1229 km,2
5,2022-11-09,9C8947,SHA,XIY,Airbus A321,B30EU,Economy,31.19779,121.333473,34.438416,108.760946,SHA->XIY,1229 km,3
6,2023-04-13,CZ3762,XIY,ZUH,Boeing 737,B6067,Economy,34.438416,108.760946,22.011788,113.370601,XIY->ZUH,1449 km,1
7,2023-04-17,CZ6567,CAN,HGH,Airbus A350,B309W,Economy,23.387862,113.29734,30.236934,120.432356,CAN->HGH,1038 km,1
8,2023-04-24,CZ3548,SHA,CAN,Airbus A350,B32AV,Economy,31.19779,121.333473,23.387862,113.29734,SHA->CAN,1174 km,1
9,2023-04-24,CZ3215,CAN,XIY,Boeing 737,B2695,Economy,23.387862,113.29734,34.438416,108.760946,CAN->XIY,1301 km,1


In [147]:
my_flights_df.columns

Index(['Date', 'Flight Number', 'Departure', 'Arrival', 'Aircraft',
       'Aircraft Registration', 'Class', 'Departure Latitude',
       'Departure Longitude', 'Arrival Latitude', 'Arrival Longitude', 'Route',
       'Distance', 'No. flight'],
      dtype='object')

In [148]:
my_flights_df

Unnamed: 0,Date,Flight Number,Departure,Arrival,Aircraft,Aircraft Registration,Class,Departure Latitude,Departure Longitude,Arrival Latitude,Arrival Longitude,Route,Distance,No. flight
0,2015-07-28,ZH9250,XIY,SZX,Airbus A320,B6740,Economy,34.438416,108.760946,22.639444,113.810833,XIY->SZX,1397 km,2
1,2015-08-01,ZH9249,SZX,XIY,Airbus A320,B6570,Economy,22.639444,113.810833,34.438416,108.760946,SZX->XIY,1397 km,1
2,2019-07-25,JD5343,XIY,DLC,Airbus A319,B6245,Economy,34.438416,108.760946,38.96102,121.539991,XIY->DLC,1245 km,2
3,2019-07-30,JD5344,DLC,XIY,Airbus A320,B6958,Economy,38.96102,121.539991,34.438416,108.760946,DLC->XIY,1245 km,1
4,2022-11-03,HO1212,XIY,SHA,Airbus A320,B1681,Economy,34.438416,108.760946,31.19779,121.333473,XIY->SHA,1229 km,2
5,2022-11-09,9C8947,SHA,XIY,Airbus A321,B30EU,Economy,31.19779,121.333473,34.438416,108.760946,SHA->XIY,1229 km,3
6,2023-04-13,CZ3762,XIY,ZUH,Boeing 737,B6067,Economy,34.438416,108.760946,22.011788,113.370601,XIY->ZUH,1449 km,1
7,2023-04-17,CZ6567,CAN,HGH,Airbus A350,B309W,Economy,23.387862,113.29734,30.236934,120.432356,CAN->HGH,1038 km,1
8,2023-04-24,CZ3548,SHA,CAN,Airbus A350,B32AV,Economy,31.19779,121.333473,23.387862,113.29734,SHA->CAN,1174 km,1
9,2023-04-24,CZ3215,CAN,XIY,Boeing 737,B2695,Economy,23.387862,113.29734,34.438416,108.760946,CAN->XIY,1301 km,1


In [149]:
my_flights_df.columns = ['Date', 'Flight Number', 'Departure', 'Arrival', 'Aircraft',
       'Tail Number', 'Class', 'Departure Latitude',
       'Departure Longitude', 'Arrival Latitude', 'Arrival Longitude', 'Route',
       'Distance', 'No. flight']

In [150]:
airlines = pd.read_json('./data/Airlines/airlines.json')
airlines.head()

Unnamed: 0,name,code,is_lowcost,logo
0,Riyadh Air,RX,False,https://pics.avs.io/200/200/RX@2x.png
1,Lanmei Airlines,LQ,False,https://pics.avs.io/200/200/LQ@2x.png
2,Air Uganda,U7,False,https://pics.avs.io/200/200/U7@2x.png
3,Cubana de Aviación,CU,False,https://pics.avs.io/200/200/CU@2x.png
4,Jonika Airlines,JO,False,https://pics.avs.io/200/200/JO@2x.png


In [151]:
code2fullname = dict(zip(airlines.code, airlines.name))
code2logo = dict(zip(airlines.code, airlines.logo))
my_flights_df['Airlines'] = my_flights_df['Flight Number'].str[:2].apply(lambda x: code2fullname[x])
my_flights_df['Airlines logo'] = my_flights_df['Flight Number'].str[:2].apply(lambda x: code2logo[x])
my_flights_df['Airlines logo'] = "<img src=\"" + my_flights_df['Airlines logo'] + "\" alt=\"" +my_flights_df['Airlines']+ " Logo\" width=\"50\" height=\"50\">"

In [152]:
my_flights_df[['Airlines','Airlines logo']].to_html(escape=False)

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Airlines</th>\n      <th>Airlines logo</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Shenzhen Airlines</td>\n      <td><img src="https://pics.avs.io/200/200/ZH@2x.png" alt="Shenzhen Airlines Logo" width="50" height="50"></td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Shenzhen Airlines</td>\n      <td><img src="https://pics.avs.io/200/200/ZH@2x.png" alt="Shenzhen Airlines Logo" width="50" height="50"></td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Beijing Capital Airlines</td>\n      <td><img src="https://pics.avs.io/200/200/JD@2x.png" alt="Beijing Capital Airlines Logo" width="50" height="50"></td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Beijing Capital Airlines</td>\n      <td><img src="https://pics.avs.io/200/200/JD@2x.png" alt="Beijing Capital Airlines Logo" width="50" height="50"></td>\n    </tr>\n    <tr>\n     

In [160]:
my_flights_df[['Date', 'Airlines', 'Flight Number', 'Departure', 'Arrival', 'Aircraft',
       'Tail Number', 'Distance']].to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Date</th>\n      <th>Airlines</th>\n      <th>Flight Number</th>\n      <th>Departure</th>\n      <th>Arrival</th>\n      <th>Aircraft</th>\n      <th>Tail Number</th>\n      <th>Distance</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2015-07-28</td>\n      <td>Shenzhen Airlines</td>\n      <td>ZH9250</td>\n      <td>XIY</td>\n      <td>SZX</td>\n      <td>Airbus A320</td>\n      <td>B6740</td>\n      <td>1397 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2015-08-01</td>\n      <td>Shenzhen Airlines</td>\n      <td>ZH9249</td>\n      <td>SZX</td>\n      <td>XIY</td>\n      <td>Airbus A320</td>\n      <td>B6570</td>\n      <td>1397 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2019-07-25</td>\n      <td>Beijing Capital Airlines</td>\n      <td>JD5343</td>\n      <td>XIY</td>\n      <td>DLC</td>\n      <td>Airbus A319</td

In [154]:
airline_counts = my_flights_df.groupby('Airlines',as_index=False).count().iloc[:,[0,1]]
airline_counts.columns = [' ','Flights']
airline_counts['Airline'] = my_flights_df.groupby('Airlines',as_index=False).apply(lambda x : x['Airlines logo']).unique()

In [155]:
airline_counts = airline_counts.sort_values(by='Flights',ascending=False).set_index('Airline')

In [156]:
airline_counts.to_html(escape=False)

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n      <th>Flights</th>\n    </tr>\n    <tr>\n      <th>Airline</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th><img src="https://pics.avs.io/200/200/CZ@2x.png" alt="China Southern Airlines Logo" width="50" height="50"></th>\n      <td>China Southern Airlines</td>\n      <td>10</td>\n    </tr>\n    <tr>\n      <th><img src="https://pics.avs.io/200/200/TK@2x.png" alt="Turkish Airlines Logo" width="50" height="50"></th>\n      <td>Turkish Airlines</td>\n      <td>6</td>\n    </tr>\n    <tr>\n      <th><img src="https://pics.avs.io/200/200/MU@2x.png" alt="China Eastern Airlines Logo" width="50" height="50"></th>\n      <td>China Eastern Airlines</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th><img src="https://pics.avs.io/200/200/U2@2x.png" alt="EasyJet Logo" width="50" height="50"></th>\n      <td>EasyJet</td>\n      

In [157]:
aircraft_counts = my_flights_df.groupby('Aircraft',as_index=False).count().iloc[:,[0,1]]
aircraft_counts.columns = [' ','Flights']
com2con = {'Airbus':'eu', 'Boeing':'us', 'COMAC': 'cn'}

aircraft_counts['Aircraft'] = [com2con[i[0]] for i in aircraft_counts[' '].str.split()]
aircraft_counts['Aircraft'] = "<img src=\"./assets/images/flagicons/" + aircraft_counts['Aircraft'] + ".svg\" width=\"30\" height=\"30\">"

In [158]:
aircraft_counts = aircraft_counts.sort_values(by='Flights',ascending=False).set_index('Aircraft')

In [159]:
aircraft_counts.to_html(escape=False)

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n      <th>Flights</th>\n    </tr>\n    <tr>\n      <th>Aircraft</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th><img src="./assets/images/flagicons/eu.svg" width="30" height="30"></th>\n      <td>Airbus A320</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th><img src="./assets/images/flagicons/eu.svg" width="30" height="30"></th>\n      <td>Airbus A321</td>\n      <td>9</td>\n    </tr>\n    <tr>\n      <th><img src="./assets/images/flagicons/eu.svg" width="30" height="30"></th>\n      <td>Airbus A330</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th><img src="./assets/images/flagicons/us.svg" width="30" height="30"></th>\n      <td>Boeing 737</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th><img src="./assets/images/flagicons/us.svg" width="30" height="30"></th>\n      <td>Boeing 777</td>\n      <td>3</td>\