In [1]:
import pandas as pd
from geopy import distance

from FlightRadar24.api import FlightRadar24API
fr_api = FlightRadar24API()

# ETL


In [4]:
df_airlines = pd.DataFrame(fr_api.get_airlines())[["Name","ICAO"]]
df_airlines.to_parquet("data/Airlines.parquet")

In [5]:
df_airlines = pd.read_parquet("data/Airlines.parquet")
df_airlines

Unnamed: 0,Name,ICAO
0,21 Air,CSB
1,25only Aviation,ONY
2,2Excel Aviation,BRO
3,40-Mile Air,MLA
4,748 Air Services,IHO
...,...,...
1802,Zil Air,SYZ
1803,Zimex Aviation,IMX
1804,Zimex Aviation Austria,AZD
1805,ZIPAIR,TZP


In [6]:
df_airports=pd.read_parquet("data/Airports.parquet")
df_airports

Unnamed: 0,name,country,Continent,iata,lat,lon
0,\tMasai Mara Keekorok Airport,Kenya,africa,KEU,-1.586377,35.257462
1,A Coruna Airport,Spain,europe,LCG,43.302059,-8.377250
2,Aachen Merzbruck Airport,Germany,europe,AAH,50.823051,6.186111
3,Aalborg Airport,Denmark,europe,AAL,57.092781,9.849164
4,Aarhus Airport,Denmark,europe,AAR,56.300011,10.619000
...,...,...,...,...,...,...
4936,Zunyi Xinzhou Airport,China,asia,ZYI,27.811167,107.245972
4937,Zurich Airport,Switzerland,europe,ZRH,47.464722,8.549167
4938,Zwara Airport,Libya,africa,WAX,32.933334,12.083333
4939,Zweibrucken Airport,Germany,europe,ZQW,49.209518,7.400647


# Current Flights

In [2]:
def active_airplane(a_icao):
    flight_list=[]
    flights = fr_api.get_flights(airline = a_icao)
    for flight in flights :
        try :
            dest_iata = flight.destination_airport_iata
        except :
            dest_iata = None
        try :
            org_iata = flight.origin_airport_iata
        except :
            org_iata = None
        try :
            model = flight.aircraft_code
        except :
            model = None 
        try :
            registration = flight.registration
        except :
            registration = None
        try :
            speed = flight.ground_speed
        except :
            speed = None

        f = {
            "id" : flight.id,
            "dest_iata" : dest_iata,
            "org_iata" : org_iata,
            "model" : model,
            "registration" : registration,
            "speed" : speed,
            "company" : a_icao
            }
        flight_list.append(f)
    return flight_list

In [7]:
active_flights = []
for airline in df_airlines["ICAO"]:
    active_flights += active_airplane(airline)

In [8]:
df_flights = pd.DataFrame(active_flights)
df_flights = df_flights[df_flights["dest_iata"] != 'N/A']
df_flights = df_flights[df_flights["org_iata"] != 'N/A']
df_flights

Unnamed: 0,id,dest_iata,org_iata,model,registration,speed,company
0,2df932a6,NAP,GLA,B733,G-SWRD,454,BRO
2,2df95f63,HAK,CSX,B738,B-1551,402,JYH
3,2df96342,BAR,CAN,B738,B-1592,324,JYH
5,2df8f78e,SAT,CVG,B762,N797AX,10,ABX
6,2df91368,VNY,HHR,CL30,N286JR,0,WSN
...,...,...,...,...,...,...,...
10205,2df8e1dd,SNA,FRG,F2TH,N215FH,431,XEN
10206,2df954b4,LAS,OAK,GLF5,N51BN,526,XEN
10207,2df8f388,BSL,PRN,AT45,HB-ALN,251,IMX
10208,2df858d5,LAX,NRT,B788,JA825J,535,TZP


In [9]:
df_flights.to_parquet("data/Flights.parquet")

# Q2

In [335]:
df= pd.read_parquet("data/Flights.parquet",columns=["dest_iata","org_iata","company"])
airports = pd.read_parquet("data/Airports.parquet", columns=["iata","Continent"])

def same_continent(x):
    dest = x["dest_iata"]
    org = x["org_iata"]
    try :
        if airports[airports["iata"] == org]["Continent"].iloc[0] == airports[airports["iata"] == dest]["Continent"].iloc[0]:
            return airports[airports["iata"] == org]["Continent"].iloc[0]
        else :
            return None
    except IndexError as e:
        return None

S = df.apply(same_continent, axis=1)
S.sort_values()

6531     africa
1058     africa
1057     africa
4262     africa
1056     africa
          ...  
10585      None
10586      None
10587      None
10590      None
10591      None
Length: 9989, dtype: object

In [336]:
df["Continent"] = df.apply(same_continent, axis=1)

In [337]:
df

Unnamed: 0,dest_iata,org_iata,company,Continent
1,NWI,SXB,BRO,europe
5,CAN,WUH,JYH,asia
6,YYA,CAN,JYH,asia
8,PHX,CVG,ABX,northamerica
9,CVG,YMX,ABX,northamerica
...,...,...,...,...
10586,THR,KER,IZG,
10587,NJF,IKA,IZG,
10588,AFW,TEB,XEN,northamerica
10590,LAX,NRT,TZP,


In [338]:
for continent in fr_api.get_zones().keys():
    count = df[df["Continent"] == continent]["company"].value_counts()
    if not count.empty:
        print(continent.upper(), ":\n", df_airlines[df_airlines["ICAO"] == count.index[0]].iloc[0]["Name"] , ":", count.max())

EUROPE :
 Ryanair : 269
NORTHAMERICA :
 American Airlines : 620
SOUTHAMERICA :
 LATAM Airlines : 107
OCEANIA :
 AirAsia : 48
ASIA :
 IndiGo : 123
AFRICA :
 Ethiopian Airlines : 32
NORTHATLANTIC :
 Air Greenland : 1


In [339]:
fr_api.get_zones().keys()

dict_keys(['europe', 'northamerica', 'southamerica', 'oceania', 'asia', 'africa', 'atlantic', 'maldives', 'northatlantic'])

# Q3

In [340]:
airports = pd.read_parquet("data/Airports.parquet", columns=["iata","lat","lon"])
airports

Unnamed: 0,iata,lat,lon
0,KEU,-1.586377,35.257462
1,LCG,43.302059,-8.377250
2,AAH,50.823051,6.186111
3,AAL,57.092781,9.849164
4,AAR,56.300011,10.619000
...,...,...,...
4936,ZYI,27.811167,107.245972
4937,ZRH,47.464722,8.549167
4938,WAX,32.933334,12.083333
4939,ZQW,49.209518,7.400647


In [341]:
df3 = pd.read_parquet("data/Flights.parquet",columns=["id","dest_iata","org_iata"])
df3

Unnamed: 0,id,dest_iata,org_iata
1,2da38c2b,NWI,SXB
5,2da391a3,CAN,WUH
6,2da39caa,YYA,CAN
8,2da33137,PHX,CVG
9,2da358be,CVG,YMX
...,...,...,...
10586,2da38d01,THR,KER
10587,2da3a59f,NJF,IKA
10588,2da36b3f,AFW,TEB
10590,2da2ac2c,LAX,NRT


In [342]:
def airport_dist(x):
    org = x["org_iata"]
    dest = x["dest_iata"]
    try :
        coord_org = (airports[airports["iata"] == org]["lat"].iloc[0], airports[airports["iata"] == org]["lon"].iloc[0])
        coord_dest = (airports[airports["iata"] == dest]["lat"].iloc[0], airports[airports["iata"] == dest]["lon"].iloc[0])
        return distance.distance(coord_org, coord_dest).km
    except IndexError as e:
        return None

In [343]:
S = df3.apply(airport_dist, axis=1)
S.sort_values()

8636     0.0
6609     0.0
7388     0.0
6696     0.0
6697     0.0
        ... 
6705     NaN
6929     NaN
7180     NaN
7700     NaN
10428    NaN
Length: 9989, dtype: float64

In [344]:
df3["travel_size"] = df3.apply(airport_dist, axis=1)
df3

Unnamed: 0,id,dest_iata,org_iata,travel_size
1,2da38c2b,NWI,SXB,642.661665
5,2da391a3,CAN,WUH,823.940460
6,2da39caa,YYA,CAN,655.941500
8,2da33137,PHX,CVG,2525.036282
9,2da358be,CVG,YMX,1144.694290
...,...,...,...,...
10586,2da38d01,THR,KER,798.612446
10587,2da3a59f,NJF,IKA,731.573506
10588,2da36b3f,AFW,TEB,2239.487656
10590,2da2ac2c,LAX,NRT,8772.806823


In [345]:
df3.max()

id                 2da3b22d
dest_iata               ZUH
org_iata                ZYL
travel_size    15348.640441
dtype: object

In [346]:
print("The flight with the longuest journey started at",end=" ")
print(df_airports[df_airports["iata"] == df3.max()["org_iata"]].iloc[0]["name"],end=" ")
print("and will end at",end=" ")
print(df_airports[df_airports["iata"] == df3.max()["dest_iata"]].iloc[0]["name"],end=" ")
print(f'for a journey of {df3.max()["travel_size"]} km')

The flight with the longuest journey started at Sylhet Osmani International Airport and will end at Zhuhai Jinwan Airport for a journey of 15348.64044117391 km


# Q4

In [405]:
airports = pd.read_parquet("data/Airports.parquet", columns=["iata","Continent","lon","lat"])
airports

Unnamed: 0,iata,Continent,lon,lat
0,KEU,africa,35.257462,-1.586377
1,LCG,europe,-8.377250,43.302059
2,AAH,europe,6.186111,50.823051
3,AAL,europe,9.849164,57.092781
4,AAR,europe,10.619000,56.300011
...,...,...,...,...
4936,ZYI,asia,107.245972,27.811167
4937,ZRH,europe,8.549167,47.464722
4938,WAX,africa,12.083333,32.933334
4939,ZQW,europe,7.400647,49.209518


In [348]:
def cont_org(x):
    airports = pd.read_parquet("data/Airports.parquet", columns=["iata","Continent","lon","lat"])
    try :
        return airports[airports["iata"] == x["org_iata"]]["Continent"].iloc[0]
    except IndexError:
        return None

In [406]:
df4 = pd.read_parquet("data/Flights.parquet",columns=["id","dest_iata","org_iata"])
df4["travel_size"] = df4.apply(airport_dist, axis=1)
df4["org_Continent"] = df4.apply(cont_org, axis=1)
print("The average route distance is :")
for continent in fr_api.get_zones().keys():
    print(continent.upper(), ":\n", df4[df4["org_Continent"] == continent]["travel_size"].mean(), "km")

The average route distance is :
EUROPE :
 2701.0356175863944 km
NORTHAMERICA :
 1938.0856360745843 km
SOUTHAMERICA :
 1892.514419650731 km
OCEANIA :
 3699.206982780523 km
ASIA :
 2945.727475950224 km
AFRICA :
 2732.9561730810556 km
ATLANTIC :
 nan km
MALDIVES :
 nan km
NORTHATLANTIC :
 3451.9474000484147 km


# Q5.1

In [423]:
print(df_flights["model"].value_counts().index[0], df_flights["model"].value_counts().iloc[0])

B738 1543


# Q5.2

In [433]:
df5 = pd.read_parquet("data/Flights.parquet",columns=["id","dest_iata","org_iata","model"])
df5["org_Continent"] = df5.apply(cont_org, axis=1)
print("The average route distance is :")
for continent in fr_api.get_zones().keys():
    count = df5[df5["org_Continent"] == continent]["model"].value_counts()
    if not count.empty:
        print(continent.upper(), ":\n", count.index[0], count.iloc[0],"planes")

The average route distance is :
EUROPE :
 B738 652 planes
NORTHAMERICA :
 B738 433 planes
SOUTHAMERICA :
 A320 66 planes
OCEANIA :
 A320 79 planes
ASIA :
 B738 292 planes
AFRICA :
 B738 83 planes
NORTHATLANTIC :
 B38M 4 planes


# Q6

In [109]:
def company_country(x):
    airline = pd.read_parquet("data/AirlineCountries.parquet", columns=["ICAO","Country"])
    try :
        return airline[airline["ICAO"] == x["company"]]["Country"].iloc[0]
    except IndexError:
        return None

In [110]:
df6 = pd.read_parquet("data/Flights.parquet", columns=["model","company"])
df6["company_country"] = df6.apply(company_country,axis=1)

In [108]:
df6

Unnamed: 0,model,company,company_country
0,B733,BRO,United Kingdom
2,B738,JYH,
3,B738,JYH,
5,B762,ABX,United States
6,CL30,WSN,United States
...,...,...,...
10205,F2TH,XEN,
10206,GLF5,XEN,
10207,AT45,IMX,Switzerland
10208,B788,TZP,


In [111]:
df6.dropna(inplace=True)
df6.reset_index(allow_duplicates=False, inplace=True)

In [112]:
countries = df6[["company_country"]].drop_duplicates()
countries.dropna(inplace=True)
countries = countries["company_country"].to_list()

In [114]:
dic = {"Company registration country":[],"model_1":[],"number_1":[],"model_2":[],"number_2":[],"model_3":[],"number_3":[]}

for country in countries:
    models = df6[df6["company_country"] == country].value_counts("model")
    
    if len(models) == 0:
        pass
    elif len(models) == 1:
        dic["Company registration country"].append(country)
        dic["model_1"].append(models.index[0])
        dic["number_1"].append(models[0])
        dic["model_2"].append("")
        dic["number_2"].append("")
        dic["model_3"].append("")
        dic["number_3"].append("")

    elif len(models) == 2:
        dic["Company registration country"].append(country)
        dic["model_1"].append(models.index[0])
        dic["number_1"].append(models[0])
        dic["model_2"].append(models.index[1])
        dic["number_2"].append(models[1])
        dic["model_3"].append("")
        dic["number_3"].append("")

    elif len(models) == 3:
        dic["Company registration country"].append(country)
        dic["model_1"].append(models.index[0])
        dic["number_1"].append(models[0])
        dic["model_2"].append(models.index[1])
        dic["number_2"].append(models[1])
        dic["model_3"].append(models.index[2])
        dic["number_3"].append(models[2])

df_answer6 = pd.DataFrame(dic)
df_answer6

Unnamed: 0,Company registration country,model_1,number_1,model_2,number_2,model_3,number_3
0,Moldova,A319,2,A320,2.0,B744,1.0
1,Albania,A320,1,,,,
2,Algeria,B738,9,A332,2.0,AT75,1.0
3,French Guiana,AT45,1,AT46,1.0,,
4,Latvia,BCS3,17,SF34,1.0,,
5,Democratic Republic of the Congo,B748,1,,,,
6,Ivory Coast,A319,2,A320,2.0,A20N,1.0
7,Greenland,DH8B,3,,,,
8,Madagascar,AT76,1,,,,
9,Mauritius,A339,2,A359,2.0,,


# Q7.1

In [408]:
airports = pd.read_parquet("data/Airports.parquet", columns=["iata","name","lon","lat"])

df4

Unnamed: 0,id,dest_iata,org_iata,travel_size,org_Continent
1,2da38c2b,NWI,SXB,642.661665,europe
5,2da391a3,CAN,WUH,823.940460,asia
6,2da39caa,YYA,CAN,655.941500,asia
8,2da33137,PHX,CVG,2525.036282,northamerica
9,2da358be,CVG,YMX,1144.694290,northamerica
...,...,...,...,...,...
10586,2da38d01,THR,KER,798.612446,asia
10587,2da3a59f,NJF,IKA,731.573506,europe
10588,2da36b3f,AFW,TEB,2239.487656,northamerica
10590,2da2ac2c,LAX,NRT,8772.806823,asia


In [409]:
for continent in fr_api.get_zones().keys():
    count = df4[df4["org_Continent"] == continent]["dest_iata"].value_counts()
    if not count.empty:
        print(continent.upper(), ":\n", "to", airports[airports["iata"] == count.index[0]].iloc[0]["name"] , ":", count.max(), "flights")

EUROPE :
 to Istanbul Airport : 65 flights
NORTHAMERICA :
 to Dallas Fort Worth International Airport : 161 flights
SOUTHAMERICA :
 to Sao Paulo Guarulhos International Airport : 22 flights
OCEANIA :
 to Singapore Changi Airport : 32 flights
ASIA :
 to Delhi Indira Gandhi International Airport : 54 flights
AFRICA :
 to Addis Ababa Bole Airport : 22 flights
NORTHATLANTIC :
 to New York John F. Kennedy International Airport : 2 flights


# Q7.2

In [352]:
df72 = pd.read_parquet("data/Flights.parquet",columns=["org_iata","dest_iata"])
df_inbounds = pd.read_parquet("data/Airports.parquet",columns=["iata","Continent"])
df_inbounds["inbounds"] = [0]*len(df_inbounds.index)

for i in range(len(df72.index)):
    flight = df72.iloc[i]
    try :
        df_inbounds.iat[df_inbounds[df_inbounds["iata"] == flight["org_iata"]]["inbounds"].index[0],2] -=1
        df_inbounds.iat[df_inbounds[df_inbounds["iata"] == flight["dest_iata"]]["inbounds"].index[0],2] +=1
    except :
        pass

In [353]:
ind = df_inbounds[df_inbounds["iata"] == "LCG"]["inbounds"].index[0]
df_inbounds.iat[ind,2] += 1

In [354]:
df_inbounds

Unnamed: 0,iata,Continent,inbounds
0,KEU,africa,0
1,LCG,europe,3
2,AAH,europe,0
3,AAL,europe,2
4,AAR,europe,-2
...,...,...,...
4936,ZYI,asia,-3
4937,ZRH,europe,4
4938,WAX,africa,0
4939,ZQW,europe,0


In [355]:
df_inbounds["inbounds"] = df_inbounds["inbounds"].apply(lambda x: abs(x))
df_inbounds.max()

  df_inbounds.max()


iata        ZZV
inbounds     83
dtype: object

In [356]:
print("greatest inbound/outbound flights difference at ", df_airports[df_airports["iata"] == df_inbounds["iata"].max()].iloc[0]["name"], end=" ")
print("with a difference of", df_inbounds["inbounds"].max())

greatest inbound/outbound flights difference at  Zanesville Municipal Airport with a difference of 83


# Q8  

- Q8: By continent, what is the average active flight speed ? (flight localization by airport of origin)

In [357]:
airports = pd.read_parquet("data/Airports.parquet", columns=["iata","name","Continent"])

df8 = pd.read_parquet("data/Flights.parquet",columns=["org_iata","speed"])
df8["org_Continent"] = df8.apply(cont_org, axis=1)
df8

Unnamed: 0,org_iata,speed,org_Continent
1,SXB,230,europe
5,WUH,382,asia
6,CAN,434,asia
8,CVG,322,northamerica
9,YMX,11,northamerica
...,...,...,...
10586,KER,452,asia
10587,IKA,482,europe
10588,TEB,375,northamerica
10590,NRT,517,asia


In [358]:
for continent in fr_api.get_zones().keys():
    count = df8[df8["org_Continent"] == continent]["speed"].mean()
    if count :
        print(continent.upper(), ":\n", "Avearage speed", count, "km/h")

EUROPE :
 Avearage speed 365.8327955412144 km/h
NORTHAMERICA :
 Avearage speed 309.2066390041494 km/h
SOUTHAMERICA :
 Avearage speed 339.17630057803467 km/h
OCEANIA :
 Avearage speed 376.7236286919831 km/h
ASIA :
 Avearage speed 392.45952380952383 km/h
AFRICA :
 Avearage speed 371.9095127610209 km/h
ATLANTIC :
 Avearage speed nan km/h
MALDIVES :
 Avearage speed nan km/h
NORTHATLANTIC :
 Avearage speed 399.9166666666667 km/h


# other

In [415]:
df_flights

Unnamed: 0,id,dest_icao,org_icao,dest_iata,org_iata,model,registration,speed,company
1,2da38c2b,,,NWI,SXB,,G-JASS,230,BRO
5,2da391a3,,,CAN,WUH,,B-1472,382,JYH
6,2da39caa,,,YYA,CAN,,B-1715,434,JYH
8,2da33137,,,PHX,CVG,,N317CM,322,ABX
9,2da358be,,,CVG,YMX,,N767AX,11,ABX
...,...,...,...,...,...,...,...,...,...
10586,2da38d01,,,THR,KER,,EP-ZAV,452,IZG
10587,2da3a59f,,,NJF,IKA,,EP-ZAX,482,IZG
10588,2da36b3f,,,AFW,TEB,,N789JC,375,XEN
10590,2da2ac2c,,,LAX,NRT,,JA824J,517,TZP
