# Collect Flight Data

In [171]:
import pandas as pd
from datetime import datetime, date, timedelta
import requests
from pytz import timezone

In [115]:
# coordinates for Berlin
latitudes = [50.110556]
longitudes = [8.682222]

In [116]:
url = "https://aerodatabox.p.rapidapi.com/airports/search/location"

querystring = {"lat":latitudes,"lon":longitudes,"radiusKm":"50","limit":"2","withFlightInfoOnly":"true"}

headers = {
	"X-RapidAPI-Key": "b29f63cc15msh418375f14ae6f52p1633c4jsne1771678df40",
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

response.json()

{'searchBy': {'lat': 52.52, 'lon': 13.405},
 'count': 1,
 'items': [{'icao': 'EDDB',
   'iata': 'BER',
   'name': 'Berlin Brandenburg',
   'shortName': 'Brandenburg',
   'municipalityName': 'Berlin',
   'location': {'lat': 52.35139, 'lon': 13.493889},
   'countryCode': 'DE'}]}

## Create a Dataframe with ICAO Codes

In [124]:
def icao_airport_codes(latitudes, longitudes):

    list_for_df = []

    for index, value in enumerate(latitudes):

        url = "https://aerodatabox.p.rapidapi.com/airports/search/location"
        querystring = {"lat":value,"lon":longitudes[index],"radiusKm":"50","limit":"2","withFlightInfoOnly":"true"}
        headers = {
            "X-RapidAPI-Key": "b29f63cc15msh418375f14ae6f52p1633c4jsne1771678df40",
            "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
        }

        response = requests.get(url, headers=headers, params=querystring)
        list_for_df.append(pd.json_normalize(response.json()['items']))

    return pd.concat(list_for_df, ignore_index=True)

# coordinates for Berlin, 'Frankfurt', 'Munich', 'Cologne', 'Stuttgart', 'Leipzig'
latitudes = [52.5200, 50.110556, 48.1351, 50.936389, 48.777500, 51.340000]
longitudes = [13.4050, 8.682222, 11.5820, 6.952778, 9.180000, 12.375000]

icao_airport_codes(latitudes, longitudes)

Unnamed: 0,icao,iata,name,shortName,municipalityName,countryCode,location.lat,location.lon
0,EDDB,BER,Berlin Brandenburg,Brandenburg,Berlin,DE,52.35139,13.493889
1,EDDF,FRA,Frankfurt-am-Main,Frankfurt-am-Main,Frankfurt-am-Main,DE,50.0264,8.543129
2,EDDK,CGN,Cologne Bonn,Bonn,Cologne,DE,50.8659,7.142739
3,EDDS,STR,Stuttgart,Stuttgart,Stuttgart,DE,48.6899,9.22196
4,EDDP,LEJ,Leipzig Halle,Halle,Leipzig,DE,51.4324,12.2416


## Setting up the ICAO dataframe

In [152]:
cities = ['Berlin', 'Frankfurt', 'Munich', 'Cologne', 'Stuttgart', 'Leipzig']
icao_codes = ['EDDB', 'EDDF', 'EDDM', 'EDDK', 'EDDS', 'EDDP']

icao_df = pd.DataFrame({
    'City': cities,
    'ICAO': icao_codes
})

icao_df

Unnamed: 0,City,ICAO
0,Berlin,EDDB
1,Frankfurt,EDDF
2,Munich,EDDM
3,Cologne,EDDK
4,Stuttgart,EDDS
5,Leipzig,EDDP


In [180]:
icao_df['city_id'] = range(1, len(icao_df) + 1)
icao_df

Unnamed: 0,City,ICAO,city_id
0,Berlin,EDDB,1
1,Frankfurt,EDDF,2
2,Munich,EDDM,3
3,Cologne,EDDK,4
4,Stuttgart,EDDS,5
5,Leipzig,EDDP,6


## Create a Dataframe with Arrival information

In [133]:
url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/EDDB/2023-11-28T11:00/2023-11-28T23:00"

querystring = {"withLeg":"false","withCancelled":"true","withCodeshared":"true","withLocation":"false"}

headers = {
	"X-RapidAPI-Key": "b29f63cc15msh418375f14ae6f52p1633c4jsne1771678df40",
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

arrival_information_berlin = response.json()

In [134]:
#We will need flight number, status and local revised arrival time
#Optional departure airport

In [135]:
pd.json_normalize(arrival_information_berlin['arrivals'])

Unnamed: 0,number,callSign,status,codeshareStatus,isCargo,movement.airport.icao,movement.airport.iata,movement.airport.name,movement.scheduledTime.utc,movement.scheduledTime.local,...,movement.baggageBelt,movement.quality,aircraft.reg,aircraft.modeS,aircraft.model,airline.name,airline.iata,airline.icao,movement.runwayTime.utc,movement.runwayTime.local
0,SN 2581,BEL8CE,Arrived,IsOperator,False,EBBR,BRU,Brussels,2023-11-28 10:05Z,2023-11-28 11:05+01:00,...,B4,"[Basic, Live]",OO-TCQ,44D071,Airbus A320,Brussels,SN,BEL,,
1,PC 977,PGT5ML,Arrived,IsOperator,False,LTFJ,SAW,Istanbul,2023-11-28 09:55Z,2023-11-28 10:55+01:00,...,A2,"[Basic, Live]",TC-RDR,4BC892,Airbus A321 NEO,Pegasus,PC,PGT,,
2,BA 992,BAW992G,Arrived,IsOperator,False,EGLL,LHR,London,2023-11-28 10:20Z,2023-11-28 11:20+01:00,...,A3,"[Basic, Live]",G-EUOF,400942,Airbus A319,British Airways,BA,BAW,2023-11-28 10:17Z,2023-11-28 11:17+01:00
3,KL 1823,KLM95E,Arrived,IsOperator,False,EHAM,AMS,Amsterdam,2023-11-28 10:05Z,2023-11-28 11:05+01:00,...,A1,"[Basic, Live]",PH-HSD,484EE4,Boeing 737,KLM,KL,KLM,2023-11-28 10:18Z,2023-11-28 11:18+01:00
4,EW 8001,,Arrived,IsOperator,False,EDDS,STR,Stuttgart,2023-11-28 09:00Z,2023-11-28 10:00+01:00,...,A4,"[Basic, Live]",D-AENG,,Airbus A320 NEO,Eurowings,EW,EWG,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,AF 1134,,Expected,IsOperator,False,LFPG,CDG,Paris,2023-11-28 21:40Z,2023-11-28 22:40+01:00,...,A4,"[Basic, Live]",,,Airbus A220-300,Air France,AF,AFR,,
152,EC 5106,,Expected,IsCodeshared,False,LEMG,AGP,Málaga,2023-11-28 21:40Z,2023-11-28 22:40+01:00,...,A3,"[Basic, Live]",OE-IWW,440B1E,Airbus A320,EasyJet Europe,EC,EJU,,
153,U2 5106,,Expected,IsOperator,False,LEMG,AGP,Málaga,2023-11-28 21:40Z,2023-11-28 22:40+01:00,...,A3,"[Basic, Live]",OE-IWW,440B1E,Airbus A320,easyJet,U2,EZY,,
154,FR 9165,,Expected,IsOperator,False,LIPH,TSF,Treviso,2023-11-28 21:45Z,2023-11-28 22:45+01:00,...,C2,"[Basic, Live]",9H-QBC,4D2274,Boeing 737-800,Ryanair,FR,RYR,,


In [136]:
arrival_information_berlin['arrivals'][0]

{'movement': {'airport': {'icao': 'EBBR', 'iata': 'BRU', 'name': 'Brussels'},
  'scheduledTime': {'utc': '2023-11-28 10:05Z',
   'local': '2023-11-28 11:05+01:00'},
  'revisedTime': {'utc': '2023-11-28 10:05Z',
   'local': '2023-11-28 11:05+01:00'},
  'terminal': '1',
  'gate': 'B20',
  'baggageBelt': 'B4',
  'quality': ['Basic', 'Live']},
 'number': 'SN 2581',
 'callSign': 'BEL8CE',
 'status': 'Arrived',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'OO-TCQ', 'modeS': '44D071', 'model': 'Airbus A320'},
 'airline': {'name': 'Brussels', 'iata': 'SN', 'icao': 'BEL'}}

In [137]:
arrival_information_berlin['departures'][0]

{'movement': {'airport': {'icao': 'EGLL', 'iata': 'LHR', 'name': 'London'},
  'scheduledTime': {'utc': '2023-11-28 10:00Z',
   'local': '2023-11-28 11:00+01:00'},
  'revisedTime': {'utc': '2023-11-28 10:00Z',
   'local': '2023-11-28 11:00+01:00'},
  'runwayTime': {'utc': '2023-11-28 10:17Z',
   'local': '2023-11-28 11:17+01:00'},
  'terminal': '1',
  'checkInDesk': '511-514',
  'gate': 'C10',
  'quality': ['Basic', 'Live']},
 'number': 'BA 991',
 'callSign': 'BAW91J',
 'status': 'Departed',
 'codeshareStatus': 'IsOperator',
 'isCargo': False,
 'aircraft': {'reg': 'G-EUYW',
  'modeS': '406AE3',
  'model': 'Airbus A320 (Sharklets)'},
 'airline': {'name': 'British Airways', 'iata': 'BA', 'icao': 'BAW'}}

In [138]:
#Flight Number
arrival_information_berlin['arrivals'][0]['number']

'SN 2581'

In [138]:
#Flight Number
arrival_information_berlin['arrivals'][0]['number']

'SN 2581'

In [139]:
#Status
arrival_information_berlin['arrivals'][0]['status']

'Arrived'

In [140]:
#Departure Airport
arrival_information_berlin['arrivals'][0]['movement']['airport']['name']

'Brussels'

In [141]:
#Revised arrival time
arrival_information_berlin['arrivals'][0]['movement']['revisedTime']['local']

'2023-11-28 11:05+01:00'

In [142]:
#Scheduled arrival time
arrival_information_berlin['arrivals'][0]['movement']['scheduledTime']['local']

'2023-11-28 11:05+01:00'

In [155]:
#Aircraft
arrival_information_berlin['arrivals'][0]['aircraft']['model']

'Airbus A320'

## Creating a For Loop for the different aiports and adapting the time

In [32]:
# ICAO Codes for Berlin, 'Frankfurt', 'Munich', 'Cologne', 'Stuttgart', 'Leipzig'
list_of_airports = ['EDDB']
#, 'EDDF', 'EDDM', 'EDDK', 'EDDS', 'EDDP']

In [33]:
current_datetime = datetime.now()
time_from = current_datetime.strftime("%Y-%m-%d %H:%M")
time_from

'2023-11-28 13:49'

In [34]:
time_to = current_datetime + timedelta(hours=12)
time_to = time_to.strftime("%Y-%m-%d %H:%M")
time_to

'2023-11-29 01:49'

In [144]:
flight_data = []

for airport in list_of_airports:
    
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport}/{time_from}/{time_to}"
    querystring = {"withLeg":"false","withCancelled":"true","withCodeshared":"true","withLocation":"false"}
    headers = {
        "X-RapidAPI-Key": "b29f63cc15msh418375f14ae6f52p1633c4jsne1771678df40",
        "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"}

    response = requests.get(url, headers=headers, params=querystring)
    flight_json = response.json()

    for entry in flight_json['arrivals']:
        try:
            revised_time = entry['movement']['revisedTime']['local']
        except KeyError:
            revised_time = 'N/A' 

        flight_data.append({
            "ICAO": airport,
            "Flight Number": entry['number'],
            "Departure Airport": entry['movement']['airport']['name'],
            "Status": entry['status'],
            "Scheduled arrival date_time": entry['movement']['scheduledTime']['local'],
            "Revised arrival date_time": revised_time
        })

flight_df = pd.DataFrame(flight_data)
flight_df

Unnamed: 0,ICAO,Flight Number,Departure Airport,Status,Scheduled arrival date_time,Revised arrival date_time
0,EDDB,BA 982,London,Arrived,2023-11-28 13:25+01:00,2023-11-28 13:52+01:00
1,EDDB,LX 966,Zurich,Arrived,2023-11-28 14:05+01:00,2023-11-28 14:06+01:00
2,EDDB,EC 5366,Prishtina,Arrived,2023-11-28 13:55+01:00,2023-11-28 14:22+01:00
3,EDDB,U2 5366,Prishtina,Arrived,2023-11-28 13:55+01:00,2023-11-28 14:22+01:00
4,EDDB,LH 184,Frankfurt-am-Main,Arrived,2023-11-28 13:55+01:00,2023-11-28 14:25+01:00
...,...,...,...,...,...,...
111,EDDB,U2 5106,Málaga,Expected,2023-11-28 22:40+01:00,2023-11-28 22:40+01:00
112,EDDB,FR 9165,Treviso,Expected,2023-11-28 22:45+01:00,2023-11-28 22:45+01:00
113,EDDB,FR 176,London,Expected,2023-11-28 22:55+01:00,2023-11-28 22:55+01:00
114,EDDB,I2 3674,Madrid,Expected,2023-11-28 23:05+01:00,2023-11-28 23:05+01:00


In [None]:
# ICAO Codes for Berlin, 'Frankfurt', 'Munich', 'Cologne', 'Stuttgart', 'Leipzig'
#list_of_airports = ['EDDB', 'EDDF', 'EDDM', 'EDDK', 'EDDS', 'EDDP']

# Final Version with function

In [9]:
import pandas as pd
from datetime import datetime, date, timedelta
import requests
from pytz import timezone

In [10]:
list_of_airports = ['EDDB', 'EDDF']

In [1]:
def get_flight_data(list_of_airports):
    
    import pandas as pd
    import requests
    from gans_keys import API_key_aerobox

    flight_data = []
    
    today = datetime.now().astimezone(timezone('Europe/Berlin')).date()
    tomorrow = (today + timedelta(days=1))

    for airport in list_of_airports:
        
        times = [["00:00","11:59"],["12:00","23:59"]]

        for time in times:
            
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"
            querystring = {"withLeg":"false","withCancelled":"true","withCodeshared":"true","withLocation":"false"}
            headers = {
                "X-RapidAPI-Key": API_key_aerobox,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"}

            response = requests.get(url, headers=headers, params=querystring)
            flight_json = response.json()

            for entry in flight_json['arrivals']:
                try:
                    revised_time = entry['movement']['revisedTime']['local']
                except KeyError:
                    revised_time = 'N/A' 
                    
                try:
                    aircraft_model = entry['aircraft']['model']
                except KeyError:
                    aircraft_model = 'N/A'

                flight_data.append({
                    "ICAO": airport,
                    "Flight_Number": entry['number'],
                    "Departure_Airport": entry['movement']['airport']['name'],
                    "Flight_Status": entry['status'],
                    "Scheduled_arrival_date": entry['movement']['scheduledTime']['local'],
                    "Revised_arrival_date": revised_time,
                    "Aircraft": aircraft_model,
                    "Data_retrieved_on": today
                })

    flight_df = pd.DataFrame(flight_data)
    return flight_df


In [14]:
flight_df = get_flight_data(list_of_airports)
flight_df

Unnamed: 0,ICAO,Flight_Number,Departure_Airport,Flight_Status,Scheduled_arrival_date,Revised_arrival_date,Aircraft,Data_retrieved_on
0,EDDB,QR 79,Doha,Expected,2023-12-02 06:50+01:00,2023-12-02 06:50+01:00,Boeing 787-9,2023-12-01
1,EDDB,A3 3219,Riga,Expected,2023-12-02 07:55+01:00,2023-12-02 07:55+01:00,Airbus A220-300,2023-12-01
2,EDDB,BT 211,Riga,Expected,2023-12-02 07:55+01:00,2023-12-02 07:55+01:00,Airbus A220-300,2023-12-01
3,EDDB,IV 680,Prishtina,Expected,2023-12-02 07:50+01:00,2023-12-02 07:50+01:00,Boeing 737-400,2023-12-01
4,EDDB,JU 7659,Riga,Expected,2023-12-02 07:55+01:00,2023-12-02 07:55+01:00,Airbus A220-300,2023-12-01
...,...,...,...,...,...,...,...,...
2114,EDDF,TK 1597,Istanbul,Expected,2023-12-02 22:05+01:00,2023-12-02 22:05+01:00,Airbus A321 (sharklets),2023-12-01
2115,EDDF,TP 574,Lisbon,Expected,2023-12-02 22:40+01:00,2023-12-02 22:40+01:00,Airbus A320,2023-12-01
2116,EDDF,UA 6902,Istanbul,Expected,2023-12-02 22:05+01:00,2023-12-02 22:05+01:00,Airbus A321 (sharklets),2023-12-01
2117,EDDF,VY 5520,Madrid,Expected,2023-12-02 22:45+01:00,2023-12-02 22:45+01:00,Bombardier CRJX,2023-12-01


In [15]:
def clean_flight_data(flight_df):
    
    flight_df['Scheduled_arrival_date'] = pd.to_datetime(flight_df['Scheduled_arrival_date'])
    flight_df['Data_retrieved_on'] = pd.to_datetime(flight_df['Data_retrieved_on'])
    flight_df['Revised_arrival_date'] = pd.to_datetime(flight_df['Revised_arrival_date'], errors='coerce')
    
    return flight_df

In [16]:
flight_df_final = clean_flight_data(flight_df)
flight_df_final

Unnamed: 0,ICAO,Flight_Number,Departure_Airport,Flight_Status,Scheduled_arrival_date,Revised_arrival_date,Aircraft,Data_retrieved_on
0,EDDB,QR 79,Doha,Expected,2023-12-02 06:50:00+01:00,2023-12-02 06:50:00+01:00,Boeing 787-9,2023-12-01
1,EDDB,A3 3219,Riga,Expected,2023-12-02 07:55:00+01:00,2023-12-02 07:55:00+01:00,Airbus A220-300,2023-12-01
2,EDDB,BT 211,Riga,Expected,2023-12-02 07:55:00+01:00,2023-12-02 07:55:00+01:00,Airbus A220-300,2023-12-01
3,EDDB,IV 680,Prishtina,Expected,2023-12-02 07:50:00+01:00,2023-12-02 07:50:00+01:00,Boeing 737-400,2023-12-01
4,EDDB,JU 7659,Riga,Expected,2023-12-02 07:55:00+01:00,2023-12-02 07:55:00+01:00,Airbus A220-300,2023-12-01
...,...,...,...,...,...,...,...,...
2114,EDDF,TK 1597,Istanbul,Expected,2023-12-02 22:05:00+01:00,2023-12-02 22:05:00+01:00,Airbus A321 (sharklets),2023-12-01
2115,EDDF,TP 574,Lisbon,Expected,2023-12-02 22:40:00+01:00,2023-12-02 22:40:00+01:00,Airbus A320,2023-12-01
2116,EDDF,UA 6902,Istanbul,Expected,2023-12-02 22:05:00+01:00,2023-12-02 22:05:00+01:00,Airbus A321 (sharklets),2023-12-01
2117,EDDF,VY 5520,Madrid,Expected,2023-12-02 22:45:00+01:00,2023-12-02 22:45:00+01:00,Bombardier CRJX,2023-12-01


In [17]:
flight_df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2119 entries, 0 to 2118
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype                    
---  ------                  --------------  -----                    
 0   ICAO                    2119 non-null   object                   
 1   Flight_Number           2119 non-null   object                   
 2   Departure_Airport       2119 non-null   object                   
 3   Flight_Status           2119 non-null   object                   
 4   Scheduled_arrival_date  2119 non-null   datetime64[ns, UTC+01:00]
 5   Revised_arrival_date    2112 non-null   datetime64[ns, UTC+01:00]
 6   Aircraft                2119 non-null   object                   
 7   Data_retrieved_on       2119 non-null   datetime64[ns]           
dtypes: datetime64[ns, UTC+01:00](2), datetime64[ns](1), object(5)
memory usage: 132.6+ KB


# Export Data to MySQL

In [18]:
import sqlalchemy
from gans_con import con

In [19]:
flight_df_final.to_sql('flights',
              if_exists='append',
              con=con,
              index=False)

2119

# Export Data to MySQL on AWS cloud server

In [20]:
import sqlalchemy
from gans_con_aws import con

In [24]:
flight_df_2 = flight_df_final

In [25]:
flight_df_2['ICAO'].value_counts()

ICAO
EDDF    1918
EDDB     201
Name: count, dtype: int64

In [26]:
flight_df_2.to_sql('flights',
              if_exists='append',
              con=con,
              index=False)

2119

# Old version

In [176]:
def get_flight_data(list_of_airports):
    
    flight_data = []
    
    current_datetime = datetime.now()
    time_from = current_datetime.strftime("%Y-%m-%d %H:%M")
    time_to = current_datetime + timedelta(hours=12)
    time_to = time_to.strftime("%Y-%m-%d %H:%M")

    for airport in list_of_airports:

        url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport}/{time_from}/{time_to}"
        querystring = {"withLeg":"false","withCancelled":"true","withCodeshared":"true","withLocation":"false"}
        headers = {
            "X-RapidAPI-Key": "b29f63cc15msh418375f14ae6f52p1633c4jsne1771678df40",
            "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"}

        response = requests.get(url, headers=headers, params=querystring)
        flight_json = response.json()

        for entry in flight_json['arrivals']:

            flight_data.append({
                "Arrival ICAO": airport,
                "Flight Number": entry['number'],
                "Departure Airport": entry['movement']['airport']['name'],
                "Scheduled arrival date_time": entry['movement']['scheduledTime']['local'],
                "Aircraft": entry['aircraft']['model'],
                "Data retrieved on": time_from
            })

    flight_df = pd.DataFrame(flight_data)
    return flight_df

## Change data types and spilt up dates and time

In [62]:
flight_df['Scheduled arrival date_time'] = pd.to_datetime(flight_df['Scheduled arrival date_time'])
flight_df['Revised arrival date_time'] = pd.to_datetime(flight_df['Revised arrival date_time'], errors='coerce')

In [63]:
flight_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113 entries, 0 to 112
Data columns (total 5 columns):
 #   Column                       Non-Null Count  Dtype                    
---  ------                       --------------  -----                    
 0   Flight Number                113 non-null    object                   
 1   Departure Airport            113 non-null    object                   
 2   Status                       113 non-null    object                   
 3   Scheduled arrival date_time  113 non-null    datetime64[ns, UTC+01:00]
 4   Revised arrival date_time    111 non-null    datetime64[ns, UTC+01:00]
dtypes: datetime64[ns, UTC+01:00](2), object(3)
memory usage: 4.5+ KB


In [64]:
flight_df['Scheduled arrival date'] = flight_df['Scheduled arrival date_time'].dt.date
flight_df['Scheduled arrival time'] = flight_df['Scheduled arrival date_time'].dt.time

In [65]:
flight_df['Revised arrival date'] = flight_df['Revised arrival date_time'].dt.date
flight_df['Revised arrival time'] = flight_df['Revised arrival date_time'].dt.time

In [66]:
flight_df

Unnamed: 0,Flight Number,Departure Airport,Status,Scheduled arrival date_time,Revised arrival date_time,Scheduled arrival date,Scheduled arrival time,Revised arrival date,Revised arrival time
0,EC 5148,Paris,Delayed,2023-11-28 13:25:00+01:00,2023-11-28 14:37:00+01:00,2023-11-28,13:25:00,2023-11-28,14:37:00
1,EC 5366,Prishtina,Delayed,2023-11-28 13:55:00+01:00,2023-11-28 14:22:00+01:00,2023-11-28,13:55:00,2023-11-28,14:22:00
2,LH 184,Frankfurt-am-Main,Delayed,2023-11-28 13:55:00+01:00,2023-11-28 14:28:00+01:00,2023-11-28,13:55:00,2023-11-28,14:28:00
3,BA 982,London,Arrived,2023-11-28 13:25:00+01:00,2023-11-28 13:51:00+01:00,2023-11-28,13:25:00,2023-11-28,13:51:00
4,U2 5148,Paris,Delayed,2023-11-28 13:25:00+01:00,2023-11-28 14:37:00+01:00,2023-11-28,13:25:00,2023-11-28,14:37:00
...,...,...,...,...,...,...,...,...,...
108,U2 5108,Barcelona,Expected,2023-11-28 22:30:00+01:00,2023-11-28 22:30:00+01:00,2023-11-28,22:30:00,2023-11-28,22:30:00
109,U2 5112,Gran Canaria Island,Expected,2023-11-28 22:30:00+01:00,2023-11-28 22:30:00+01:00,2023-11-28,22:30:00,2023-11-28,22:30:00
110,U2 8631,London,Expected,2023-11-28 22:35:00+01:00,2023-11-28 22:35:00+01:00,2023-11-28,22:35:00,2023-11-28,22:35:00
111,I2 3674,Madrid,Expected,2023-11-28 23:05:00+01:00,2023-11-28 23:05:00+01:00,2023-11-28,23:05:00,2023-11-28,23:05:00


In [67]:
flight_df = flight_df.drop(columns=['Scheduled arrival date_time', 'Revised arrival date_time'])

In [68]:
flight_df

Unnamed: 0,Flight Number,Departure Airport,Status,Scheduled arrival date,Scheduled arrival time,Revised arrival date,Revised arrival time
0,EC 5148,Paris,Delayed,2023-11-28,13:25:00,2023-11-28,14:37:00
1,EC 5366,Prishtina,Delayed,2023-11-28,13:55:00,2023-11-28,14:22:00
2,LH 184,Frankfurt-am-Main,Delayed,2023-11-28,13:55:00,2023-11-28,14:28:00
3,BA 982,London,Arrived,2023-11-28,13:25:00,2023-11-28,13:51:00
4,U2 5148,Paris,Delayed,2023-11-28,13:25:00,2023-11-28,14:37:00
...,...,...,...,...,...,...,...
108,U2 5108,Barcelona,Expected,2023-11-28,22:30:00,2023-11-28,22:30:00
109,U2 5112,Gran Canaria Island,Expected,2023-11-28,22:30:00,2023-11-28,22:30:00
110,U2 8631,London,Expected,2023-11-28,22:35:00,2023-11-28,22:35:00
111,I2 3674,Madrid,Expected,2023-11-28,23:05:00,2023-11-28,23:05:00


In [73]:
flight_df['Scheduled arrival date'] = pd.to_datetime(flight_df['Scheduled arrival date'])
flight_df['Revised arrival date'] = pd.to_datetime(flight_df['Revised arrival date'])

In [74]:
flight_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113 entries, 0 to 112
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Flight Number           113 non-null    object        
 1   Departure Airport       113 non-null    object        
 2   Status                  113 non-null    object        
 3   Scheduled arrival date  113 non-null    datetime64[ns]
 4   Scheduled arrival time  113 non-null    object        
 5   Revised arrival date    111 non-null    datetime64[ns]
 6   Revised arrival time    111 non-null    object        
dtypes: datetime64[ns](2), object(5)
memory usage: 6.3+ KB
