# Ian's Data Discovery

## All the tables
Just to get all the tables first

In [1]:
import cadspy
import numpy as np
import pandas as pd
import datetime as dt

In [2]:
icw = cadspy.DatabaseConnection(system='ICW',user='u243696')

In [3]:
# diplay all rows and cols when using 'dataframe'.head() or 'dataframe'.tail()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Lounge Eligibility Data

In [4]:
query = """

sel * from LDB_SBOX_OR.HACKATHON_OPS_LOUNGE_ELIGIBILITY

"""

df_lounge_eligibility = icw.queryToDataframe(query)

In [5]:
df_lounge_eligibility.head(5)

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax
0,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Gold,EMER,Tier 2,3
1,2023-05-17920,BA,920,2023-05-17,LHR,STR,C,C,,,Tier 3,13
2,2019-04-15770,BA,770,2019-04-15,LHR,OSL,M,M,,,Not eligible,75
3,2019-09-23348,BA,348,2019-09-23,LHR,NCE,M,M,,EMER,Tier 2,6
4,2019-09-16155,BA,155,2019-09-16,LHR,CAI,J,J,,,Tier 3,23


### Flight Info

In [6]:
query = """

select * from LDB_SBOX_OR.HACKATHON_OPS_FLIGHT_INFO

"""

df_flight_info = icw.queryToDataframe(query)

In [7]:
df_flight_info.head(5)

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE
0,2019-09-04 06:45:00,2019-09-04 06:43:00,BA,1434,LHR,5,EDI,EDI,32Q,N6,LHREDI
1,2019-05-18 12:15:00,2019-05-18 12:16:00,BA,632,LHR,5,ATH,ATH,320,R3,LHRATH
2,2019-07-19 15:10:00,2019-07-19 15:11:00,BA,718,LHR,5,ZRH,ZRH,319,A4,LHRZRH
3,2019-04-18 18:25:00,2019-04-18 18:40:00,BA,257,LHR,5,DEL,DEL,777,W7,LHRDEL
4,2019-08-11 12:20:00,2019-08-11 12:16:00,BA,460,LHR,5,MAD,MAD,777,V7,LHRMAD


### Station Code Decode

In [8]:
query = """

select * from LDB_SBOX_OR.HACKATHON_OPS_COUNTRY_DECODE

"""

df_country = icw.queryToDataframe(query)

In [9]:
df_country.head(5)

Unnamed: 0,ROUTE,COUNTRY_CD,COUNTRY_NM,CORP_GEOG_CTRY_GRP_NM,CORP_GEOG_CONTINENT_NM
0,LHRINV,GB,United Kingdom and Northern Ireland,UK,UK
1,LHRSVO,RU,Russia in Europe,EASTERN EUROPE,EUROPE EXC UK
2,LHREZE,AR,Argentina,SOUTH AMERICA,SOUTH AMERICA INC CARIBBEAN
3,LHRLUX,LU,Luxembourg,BENELUX,EUROPE EXC UK
4,LHRCAI,EG,Egypt,NORTH AFRICA,AFRICA


### Aircraft Type

In [10]:
query = """

select * from LDB_SBOX_OR.HACKATHON_OPS_AC_TYPE

"""

df_acft_typ = icw.queryToDataframe(query)

In [11]:
df_acft_typ

Unnamed: 0,IATA_AC_TYP_CD,ACT_AC_TYP_CD,WB_NB_CAT,FIRST_SEATS_QTY,CLUB_SEATS_QTY,PREM_ECONOMY_SEATS_QTY,ECONOMY_SEATS_QTY
0,320,A3,NB,0,24,0,132
1,777,K7,WB,0,48,24,203
2,32N,N3,NB,0,20,0,150
3,321,M6,NB,0,23,0,131
4,32Q,N6,NB,0,32,0,172
5,789,L8,WB,8,42,39,127
6,32A,H3,NB,0,20,0,150
7,319,A4,NB,0,16,0,119
8,788,B8,WB,0,35,25,154
9,77W,G7,WB,14,56,44,183


## Basic Pre-processing

In [12]:
def headers_and_first_row(df):
    '''
    print headers and first row of a df to deal with data types
    '''
    
    headers = df.columns
    first_row = []

    for col in headers:
        first_row.append(df[col][0])
    
    dictionary = dict( zip( headers, first_row) )

    return dictionary

# Define a function that fine all string fields and remove all blank spaces
def data_cleaning_string(df):
    # Get names of all fields in a dataframe
    fields = df.columns
    # loop for all fields, if data type is string then remove blank spaces
    for f in fields:
        if type(df[f][0]) == str:
            df[f] = df[f].str.strip()
    return df

In [13]:
tables = [df_lounge_eligibility,df_flight_info,df_country,df_acft_typ]

for i,t in enumerate(tables):
    tables[i] = data_cleaning_string(t)

tables = [df_lounge_eligibility,df_flight_info,df_country,df_acft_typ]

[headers_and_first_row(t) for t in tables]

[{'Skew_Id': '2019-08-101414',
  'OPERATING_AIRLINE_CD': 'BA',
  'OPERATING_FLT_NO': 1414,
  'GMT_UPLIFT_DT': datetime.date(2019, 8, 10),
  'UPLIFT_STN_CD': 'LHR',
  'DISCHARGE_STN_CD': 'BHD',
  'BOOKED_CABIN_CD': 'M',
  'TRAVEL_CABIN_CD': 'M',
  'BA_PAX_TIER': 'Gold',
  'ONEWORLD_TIER': 'EMER',
  'Lounge_eligibility_tier': 'Tier 2',
  'pax': 3},
 {'GMT_PLND_DEP_TS': Timestamp('2019-09-04 06:45:00'),
  'GMT_ACT_DEP_TS': Timestamp('2019-09-04 06:43:00'),
  'OPG_ALN_CD': 'BA',
  'OPG_FLT_NO': 1434,
  'ACT_DEP_STN_CD': 'LHR',
  'ACT_DEP_TML_CD': '5',
  'PLND_ARR_STN_CD': 'EDI',
  'ACT_ARR_STN_CD': 'EDI',
  'IATA_AC_TYP_CD': '32Q',
  'ACT_AC_TYP_CD': 'N6',
  'ROUTE': 'LHREDI'},
 {'ROUTE': 'LHRINV',
  'COUNTRY_CD': 'GB',
  'COUNTRY_NM': 'United Kingdom and Northern Ireland',
  'CORP_GEOG_CTRY_GRP_NM': 'UK',
  'CORP_GEOG_CONTINENT_NM': 'UK'},
 {'IATA_AC_TYP_CD': '320',
  'ACT_AC_TYP_CD': 'A3',
  'WB_NB_CAT': 'NB',
  'FIRST_SEATS_QTY': 0,
  'CLUB_SEATS_QTY': 24,
  'PREM_ECONOMY_SEATS_QTY': 0,

In [14]:
# Create date and time fields for the time stamp fields
df_flight_info['GMT_PLND_DEP_DATE'] = pd.to_datetime(df_flight_info['GMT_PLND_DEP_TS']).dt.date
df_flight_info['GMT_PLND_DEP_TIME'] = pd.to_datetime(df_flight_info['GMT_PLND_DEP_TS']).dt.time
df_flight_info['GMT_ACT_DEP_DATE'] = pd.to_datetime(df_flight_info['GMT_ACT_DEP_TS']).dt.date
df_flight_info['GMT_ACT_DEP_TIME'] = pd.to_datetime(df_flight_info['GMT_ACT_DEP_TS']).dt.time

# Remove timestaps due to redundancy
# df_flight_info = df_flight_info.drop(columns= ['GMT_PLND_DEP_TS','GMT_ACT_DEP_TS'])

# Check
headers_and_first_row(df_flight_info)

{'GMT_PLND_DEP_TS': Timestamp('2019-09-04 06:45:00'),
 'GMT_ACT_DEP_TS': Timestamp('2019-09-04 06:43:00'),
 'OPG_ALN_CD': 'BA',
 'OPG_FLT_NO': 1434,
 'ACT_DEP_STN_CD': 'LHR',
 'ACT_DEP_TML_CD': '5',
 'PLND_ARR_STN_CD': 'EDI',
 'ACT_ARR_STN_CD': 'EDI',
 'IATA_AC_TYP_CD': '32Q',
 'ACT_AC_TYP_CD': 'N6',
 'ROUTE': 'LHREDI',
 'GMT_PLND_DEP_DATE': datetime.date(2019, 9, 4),
 'GMT_PLND_DEP_TIME': datetime.time(6, 45),
 'GMT_ACT_DEP_DATE': datetime.date(2019, 9, 4),
 'GMT_ACT_DEP_TIME': datetime.time(6, 43)}

# Checkings
Basically just to check duplicated data

## 1. Loungue Eligibility
Norally each Skew ID should only have one destination

- We first group the lpungue eligibility by Skew ID and flight number, showing the discharge station code and total passenegr

In [15]:
df_lounge_by_skew = df_lounge_eligibility.groupby(['Skew_Id','OPERATING_FLT_NO','DISCHARGE_STN_CD'], as_index = False).agg(pax_count = ('pax','sum'))

df_lounge_by_skew.head(5)

Unnamed: 0,Skew_Id,OPERATING_FLT_NO,DISCHARGE_STN_CD,pax_count
0,2019-03-31103,103,YYC,197
1,2019-03-31105,105,DXB,209
2,2019-03-31107,107,DXB,201
3,2019-03-31109,109,DXB,217
4,2019-03-3111,11,SIN,443


- We then find all duplicated Skew ID's, and sorted by Skew ID for readability

In [16]:
duplicated_flag = df_lounge_by_skew.duplicated('Skew_Id',keep = False)

df_lounge_by_skew_duplicated = df_lounge_by_skew[duplicated_flag].sort_values(['Skew_Id','OPERATING_FLT_NO'])

df_lounge_by_skew_duplicated.head(20)

Unnamed: 0,Skew_Id,OPERATING_FLT_NO,DISCHARGE_STN_CD,pax_count
38,2019-03-311448,1448,ABZ,1
39,2019-03-311448,1448,EDI,139
55,2019-03-3115,15,SIN,165
56,2019-03-3115,15,SYD,114
165,2019-03-31462,462,MAD,173
166,2019-03-31462,462,PHX,15
348,2019-04-011314,1314,ABZ,119
349,2019-04-011314,1314,BHD,1
395,2019-04-0115,15,SIN,97
396,2019-04-0115,15,SYD,114


- Check Flight 1448 on 2019-3-31

In [17]:
df_flight_info[(df_flight_info['OPG_FLT_NO'] == 1448) & (df_flight_info['GMT_PLND_DEP_DATE'] == dt.date(2019,3,31))]

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
62735,2019-03-31 14:20:00,2019-03-31 14:24:00,BA,1448,LHR,5,EDI,EDI,320,A3,LHREDI,2019-03-31,14:20:00,2019-03-31,14:24:00


Has the flight ever flew to Aberdeen?

- Check Flight 15 on 2019-3-31

In [18]:
df_flight_info[(df_flight_info['OPG_FLT_NO'] == 15) & (df_flight_info['GMT_PLND_DEP_DATE'] == dt.date(2019,3,31))]

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
377,2019-03-31 20:30:00,2019-03-31 20:36:00,BA,15,LHR,5,SIN,SIN,77W,G7,LHRSIN,2019-03-31,20:30:00,2019-03-31,20:36:00


No record to Sydney? Let's check for any flight to Sydney

In [19]:
df_flight_info[(df_flight_info['PLND_ARR_STN_CD']=="SYD") | (df_flight_info['ACT_ARR_STN_CD']=="SYD")]

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME


No record to sydney, is it because already included into the LHR-SIN route?

## 2. Flight Info

Check if there is any duplicated flights

In [20]:
duplicated_flag = df_flight_info.duplicated(subset=['OPG_FLT_NO','GMT_PLND_DEP_TS'],keep=False)
df_flight_info_duplicated = df_flight_info[duplicated_flag].sort_values(['OPG_FLT_NO','GMT_PLND_DEP_TS','GMT_ACT_DEP_TS'])
df_flight_info_duplicated = df_flight_info_duplicated[['OPG_FLT_NO','GMT_PLND_DEP_TS','GMT_ACT_DEP_TS','ROUTE','PLND_ARR_STN_CD','ACT_ARR_STN_CD']]

df_flight_info_duplicated.head(10)

Unnamed: 0,OPG_FLT_NO,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,ROUTE,PLND_ARR_STN_CD,ACT_ARR_STN_CD
37730,119,2019-07-03 13:15:00,2019-07-03 14:01:00,LHRBLR,BLR,LHR
38306,119,2019-07-03 13:15:00,2019-07-03 20:28:00,LHRBLR,BLR,BLR
543,223,2019-04-29 13:45:00,2019-04-29 13:44:00,LHRBNA,BNA,LHR
1119,223,2019-04-29 13:45:00,2019-04-29 16:37:00,LHRBNA,BNA,BNA
58380,249,2019-07-17 11:20:00,2019-07-17 11:18:00,LHRGIG,GIG,LHR
58879,249,2019-07-17 11:20:00,2019-07-17 18:13:00,LHRGIG,GIG,GIG
28002,251,2019-07-05 21:00:00,2019-07-05 21:01:00,LHRSCL,SCL,LHR
27426,251,2019-07-05 21:00:00,2019-07-06 16:28:00,LHRSCL,SCL,SCL
32766,289,2019-09-29 13:45:00,2019-09-29 14:09:00,LHRPHX,PHX,LHR
33342,289,2019-09-29 13:45:00,2019-09-29 19:09:00,LHRPHX,PHX,PHX


It seesm like the flightd are just being rescheduled.

Let's also check if the LHRLHR route actually exists

In [21]:
df_country[df_country['ROUTE'] == 'LHRLHR']

Unnamed: 0,ROUTE,COUNTRY_CD,COUNTRY_NM,CORP_GEOG_CTRY_GRP_NM,CORP_GEOG_CONTINENT_NM


Of course why would it exist. BA 31 is due to Typhoon Hanna (Haikui 海葵).

## Data Cleaning

### Flight info
- First we sort out all flights by planned departure time and flight number

In [26]:
df_flight_info = df_flight_info.sort_values(['GMT_PLND_DEP_TS','OPG_FLT_NO','GMT_ACT_DEP_TS'])

df_flight_info.head(10)

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
5726,2019-03-31 05:15:00,2019-03-31 05:08:00,BA,472,LHR,3,BCN,BCN,320,A3,LHRBCN,2019-03-31,05:15:00,2019-03-31,05:08:00
32201,2019-03-31 05:20:00,2019-03-31 05:16:00,BA,456,LHR,5,MAD,MAD,32Q,N6,LHRMAD,2019-03-31,05:20:00,2019-03-31,05:16:00
34780,2019-03-31 05:35:00,2019-03-31 05:36:00,BA,360,LHR,3,LYS,LYS,320,A3,LHRLYS,2019-03-31,05:35:00,2019-03-31,05:36:00
63777,2019-03-31 05:40:00,2019-03-31 05:40:00,BA,428,LHR,5,AMS,AMS,32A,H3,LHRAMS,2019-03-31,05:40:00,2019-03-31,05:40:00
8668,2019-03-31 05:45:00,2019-03-31 06:41:00,BA,638,LHR,5,ATH,ATH,321,V6,LHRATH,2019-03-31,05:45:00,2019-03-31,06:41:00
7516,2019-03-31 05:45:00,2019-03-31 05:44:00,BA,724,LHR,5,GVA,GVA,319,A4,LHRGVA,2019-03-31,05:45:00,2019-03-31,05:44:00
8092,2019-03-31 05:45:00,2019-03-31 05:48:00,BA,948,LHR,5,MUC,MUC,321,V6,LHRMUC,2019-03-31,05:45:00,2019-03-31,05:48:00
43242,2019-03-31 05:50:00,2019-03-31 05:47:00,BA,538,LHR,5,BLQ,BLQ,321,V6,LHRBLQ,2019-03-31,05:50:00,2019-03-31,05:47:00
19536,2019-03-31 05:55:00,2019-03-31 05:51:00,BA,812,LHR,5,CPH,CPH,32N,N3,LHRCPH,2019-03-31,05:55:00,2019-03-31,05:51:00
40906,2019-03-31 06:00:00,2019-03-31 05:56:00,BA,552,LHR,5,FCO,FCO,320,A3,LHRFCO,2019-03-31,06:00:00,2019-03-31,05:56:00


We would like to keep the first occourance of the duplicates

In [27]:
df_flight_info_unique = df_flight_info.drop_duplicates(subset = ['OPG_FLT_NO','GMT_PLND_DEP_TS'],keep = "first")
df_flight_info_unique.head(10)
# df_flight_info_unique.shape,df_flight_info.shape

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
5726,2019-03-31 05:15:00,2019-03-31 05:08:00,BA,472,LHR,3,BCN,BCN,320,A3,LHRBCN,2019-03-31,05:15:00,2019-03-31,05:08:00
32201,2019-03-31 05:20:00,2019-03-31 05:16:00,BA,456,LHR,5,MAD,MAD,32Q,N6,LHRMAD,2019-03-31,05:20:00,2019-03-31,05:16:00
34780,2019-03-31 05:35:00,2019-03-31 05:36:00,BA,360,LHR,3,LYS,LYS,320,A3,LHRLYS,2019-03-31,05:35:00,2019-03-31,05:36:00
63777,2019-03-31 05:40:00,2019-03-31 05:40:00,BA,428,LHR,5,AMS,AMS,32A,H3,LHRAMS,2019-03-31,05:40:00,2019-03-31,05:40:00
8668,2019-03-31 05:45:00,2019-03-31 06:41:00,BA,638,LHR,5,ATH,ATH,321,V6,LHRATH,2019-03-31,05:45:00,2019-03-31,06:41:00
7516,2019-03-31 05:45:00,2019-03-31 05:44:00,BA,724,LHR,5,GVA,GVA,319,A4,LHRGVA,2019-03-31,05:45:00,2019-03-31,05:44:00
8092,2019-03-31 05:45:00,2019-03-31 05:48:00,BA,948,LHR,5,MUC,MUC,321,V6,LHRMUC,2019-03-31,05:45:00,2019-03-31,05:48:00
43242,2019-03-31 05:50:00,2019-03-31 05:47:00,BA,538,LHR,5,BLQ,BLQ,321,V6,LHRBLQ,2019-03-31,05:50:00,2019-03-31,05:47:00
19536,2019-03-31 05:55:00,2019-03-31 05:51:00,BA,812,LHR,5,CPH,CPH,32N,N3,LHRCPH,2019-03-31,05:55:00,2019-03-31,05:51:00
40906,2019-03-31 06:00:00,2019-03-31 05:56:00,BA,552,LHR,5,FCO,FCO,320,A3,LHRFCO,2019-03-31,06:00:00,2019-03-31,05:56:00


Check if there's duplicated, here we keep the LHRLHR route, but we are changeing the route to the original planned one

In [28]:
# Removed all duplicates which are not the first one, which means the non 'LHRLHR' will be removed
duplicated_flag = df_flight_info_unique.duplicated(subset=['OPG_FLT_NO','GMT_PLND_DEP_TS'],keep=False)

# df_flight_info_duplicated = df_flight_info_unique[duplicated_flag].sort_values(['OPG_FLT_NO','GMT_PLND_DEP_TS','GMT_ACT_DEP_TS'])
# df_flight_info_duplicated = df_flight_info_duplicated[['OPG_FLT_NO','GMT_PLND_DEP_TS','GMT_ACT_DEP_TS','ROUTE','PLND_ARR_STN_CD','ACT_ARR_STN_CD']]

df_flight_info_unique[df_flight_info_unique['ROUTE'] == "LHRLHR"].head(5)

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME


Merge set the new route to be 'LHR{'PLND_ARR_STN_CD'}'

In [29]:
df_flight_info_unique['ROUTE'] = df_flight_info_unique['ACT_DEP_STN_CD'] + df_flight_info_unique['PLND_ARR_STN_CD']

df_flight_info_unique.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
5726,2019-03-31 05:15:00,2019-03-31 05:08:00,BA,472,LHR,3,BCN,BCN,320,A3,LHRBCN,2019-03-31,05:15:00,2019-03-31,05:08:00
32201,2019-03-31 05:20:00,2019-03-31 05:16:00,BA,456,LHR,5,MAD,MAD,32Q,N6,LHRMAD,2019-03-31,05:20:00,2019-03-31,05:16:00
34780,2019-03-31 05:35:00,2019-03-31 05:36:00,BA,360,LHR,3,LYS,LYS,320,A3,LHRLYS,2019-03-31,05:35:00,2019-03-31,05:36:00
63777,2019-03-31 05:40:00,2019-03-31 05:40:00,BA,428,LHR,5,AMS,AMS,32A,H3,LHRAMS,2019-03-31,05:40:00,2019-03-31,05:40:00
8668,2019-03-31 05:45:00,2019-03-31 06:41:00,BA,638,LHR,5,ATH,ATH,321,V6,LHRATH,2019-03-31,05:45:00,2019-03-31,06:41:00


In [30]:
df_flight_info_unique[df_flight_info_unique['ROUTE'] == "LHRLHR"].head(5)

Unnamed: 0,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME


In [31]:
df_lounge_eligibility[df_lounge_eligibility['Skew_Id'] == "2023-06-01800"]

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax
470261,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,,SAPP,Tier 3,2
470837,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,Gold,EMER,Tier 2,2
471413,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,Premier,EMER,Tier 1,1
471989,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,Silver,SAPP,Tier 3,3
472565,2023-06-01800,BA,800,2023-06-01,LHR,KEF,M,M,,EMER,Tier 2,2
473141,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,,,Tier 3,19
473717,2023-06-01800,BA,800,2023-06-01,LHR,KEF,M,M,Gold,EMER,Tier 2,2
474293,2023-06-01800,BA,800,2023-06-01,LHR,KEF,C,C,,EMER,Tier 2,3
474869,2023-06-01800,BA,800,2023-06-01,LHR,KEF,M,M,,,Not eligible,74


Replace the table

In [32]:
df_flight_info = df_flight_info_unique

## Joining Tables

### `df_lounge_eligibility` and `df_flight_info`
- Get the info of the columns

In [33]:
[headers_and_first_row(t) for t in [df_lounge_eligibility,df_flight_info]]

[{'Skew_Id': '2019-08-101414',
  'OPERATING_AIRLINE_CD': 'BA',
  'OPERATING_FLT_NO': 1414,
  'GMT_UPLIFT_DT': datetime.date(2019, 8, 10),
  'UPLIFT_STN_CD': 'LHR',
  'DISCHARGE_STN_CD': 'BHD',
  'BOOKED_CABIN_CD': 'M',
  'TRAVEL_CABIN_CD': 'M',
  'BA_PAX_TIER': 'Gold',
  'ONEWORLD_TIER': 'EMER',
  'Lounge_eligibility_tier': 'Tier 2',
  'pax': 3},
 {'GMT_PLND_DEP_TS': Timestamp('2019-09-04 06:45:00'),
  'GMT_ACT_DEP_TS': Timestamp('2019-09-04 06:43:00'),
  'OPG_ALN_CD': 'BA',
  'OPG_FLT_NO': 1434,
  'ACT_DEP_STN_CD': 'LHR',
  'ACT_DEP_TML_CD': '5',
  'PLND_ARR_STN_CD': 'EDI',
  'ACT_ARR_STN_CD': 'EDI',
  'IATA_AC_TYP_CD': '32Q',
  'ACT_AC_TYP_CD': 'N6',
  'ROUTE': 'LHREDI',
  'GMT_PLND_DEP_DATE': datetime.date(2019, 9, 4),
  'GMT_PLND_DEP_TIME': datetime.time(6, 45),
  'GMT_ACT_DEP_DATE': datetime.date(2019, 9, 4),
  'GMT_ACT_DEP_TIME': datetime.time(6, 43)}]

Joints:
- `df_lounge_eligibility`: `'OPERATING_AIRLINE_CD','OPERATING_FLT_NO','GMT_UPLIFT_DT'`
- `df_flight_info`: `'OPG_ALN_CD','OPG_FLT_NO','GMT_PLND_DEP_DATE'`
-Inner join, as we want to make sure only valid flighs are joined

In [34]:
# joining df_lounge_eligibility and df_flight_info

df_lounge_elig_flight_info = pd.merge(df_lounge_eligibility,# left table
                                     df_flight_info, # right table
                                     left_on = ['OPERATING_AIRLINE_CD','OPERATING_FLT_NO','GMT_UPLIFT_DT'], # left on? e.g. which columns from the left table are you joining on to?
                                     right_on = ['OPG_ALN_CD','OPG_FLT_NO','GMT_PLND_DEP_DATE'] , # right on? # left on? e.g. which columns from the right table are you joining on to?
                                     how = "inner" # how? e.g. left, right, inner,etc
                                     )

# df_lounge_elig_flight_info = df_lounge_elig_flight_info.drop_duplicates()

df_lounge_elig_flight_info.head(5)

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME
0,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Gold,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00
1,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,,,Not eligible,114,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00
2,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00
3,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Silver,SAPP,Tier 3,7,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00
4,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,SAPP,Tier 3,1,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00


In [35]:
df_lounge_elig_flight_info.shape

(860516, 27)

### `df_country`

In [36]:
[headers_and_first_row(d) for d in [df_lounge_elig_flight_info,df_country]]

[{'Skew_Id': '2019-08-101414',
  'OPERATING_AIRLINE_CD': 'BA',
  'OPERATING_FLT_NO': 1414,
  'GMT_UPLIFT_DT': datetime.date(2019, 8, 10),
  'UPLIFT_STN_CD': 'LHR',
  'DISCHARGE_STN_CD': 'BHD',
  'BOOKED_CABIN_CD': 'M',
  'TRAVEL_CABIN_CD': 'M',
  'BA_PAX_TIER': 'Gold',
  'ONEWORLD_TIER': 'EMER',
  'Lounge_eligibility_tier': 'Tier 2',
  'pax': 3,
  'GMT_PLND_DEP_TS': Timestamp('2019-08-10 06:10:00'),
  'GMT_ACT_DEP_TS': Timestamp('2019-08-10 06:11:00'),
  'OPG_ALN_CD': 'BA',
  'OPG_FLT_NO': 1414,
  'ACT_DEP_STN_CD': 'LHR',
  'ACT_DEP_TML_CD': '5',
  'PLND_ARR_STN_CD': 'BHD',
  'ACT_ARR_STN_CD': 'BHD',
  'IATA_AC_TYP_CD': '320',
  'ACT_AC_TYP_CD': 'A3',
  'ROUTE': 'LHRBHD',
  'GMT_PLND_DEP_DATE': datetime.date(2019, 8, 10),
  'GMT_PLND_DEP_TIME': datetime.time(6, 10),
  'GMT_ACT_DEP_DATE': datetime.date(2019, 8, 10),
  'GMT_ACT_DEP_TIME': datetime.time(6, 11)},
 {'ROUTE': 'LHRINV',
  'COUNTRY_CD': 'GB',
  'COUNTRY_NM': 'United Kingdom and Northern Ireland',
  'CORP_GEOG_CTRY_GRP_NM': 'UK

Joints:
`df_lounge_elig_flight_info_country`:`'ROUTE'`
`df_df_country`:`ROUTE`

In [37]:
df_lounge_elig_flight_info_country = pd.merge(df_lounge_elig_flight_info,# left table
                                     df_country, # right table
                                     left_on = ['ROUTE'], # left on? e.g. which columns from the left table are you joining on to?
                                     right_on = ['ROUTE'] , # right on? # left on? e.g. which columns from the right table are you joining on to?
                                     how = "inner" # how? e.g. left, right, inner,etc
                                     )

# df_lounge_elig_flight_info = df_lounge_elig_flight_info.drop_duplicates()

df_lounge_elig_flight_info_country.head(5)

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME,COUNTRY_CD,COUNTRY_NM,CORP_GEOG_CTRY_GRP_NM,CORP_GEOG_CONTINENT_NM
0,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Gold,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK
1,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,,,Not eligible,114,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK
2,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK
3,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Silver,SAPP,Tier 3,7,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK
4,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,SAPP,Tier 3,1,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK


In [40]:
df_lounge_elig_flight_info_country.shape

(860516, 31)

### `df_acft_typ`

In [41]:
[headers_and_first_row(d) for d in [df_lounge_elig_flight_info_country,df_acft_typ]]

[{'Skew_Id': '2019-08-101414',
  'OPERATING_AIRLINE_CD': 'BA',
  'OPERATING_FLT_NO': 1414,
  'GMT_UPLIFT_DT': datetime.date(2019, 8, 10),
  'UPLIFT_STN_CD': 'LHR',
  'DISCHARGE_STN_CD': 'BHD',
  'BOOKED_CABIN_CD': 'M',
  'TRAVEL_CABIN_CD': 'M',
  'BA_PAX_TIER': 'Gold',
  'ONEWORLD_TIER': 'EMER',
  'Lounge_eligibility_tier': 'Tier 2',
  'pax': 3,
  'GMT_PLND_DEP_TS': Timestamp('2019-08-10 06:10:00'),
  'GMT_ACT_DEP_TS': Timestamp('2019-08-10 06:11:00'),
  'OPG_ALN_CD': 'BA',
  'OPG_FLT_NO': 1414,
  'ACT_DEP_STN_CD': 'LHR',
  'ACT_DEP_TML_CD': '5',
  'PLND_ARR_STN_CD': 'BHD',
  'ACT_ARR_STN_CD': 'BHD',
  'IATA_AC_TYP_CD': '320',
  'ACT_AC_TYP_CD': 'A3',
  'ROUTE': 'LHRBHD',
  'GMT_PLND_DEP_DATE': datetime.date(2019, 8, 10),
  'GMT_PLND_DEP_TIME': datetime.time(6, 10),
  'GMT_ACT_DEP_DATE': datetime.date(2019, 8, 10),
  'GMT_ACT_DEP_TIME': datetime.time(6, 11),
  'COUNTRY_CD': 'GB',
  'COUNTRY_NM': 'United Kingdom and Northern Ireland',
  'CORP_GEOG_CTRY_GRP_NM': 'UK',
  'CORP_GEOG_CONTIN

Joints:

`df_lounge_elig_flight_info_country`:`'IATA_AC_TYP_CD','ACT_AC_TYP_CD'`

`df_acft_typ`:`'IATA_AC_TYP_CD','ACT_AC_TYP_CD'`

In [44]:
df_lounge_elig_flight_info_country_acft_typ = pd.merge(
    df_lounge_elig_flight_info_country,
    df_acft_typ,
    left_on = ['IATA_AC_TYP_CD','ACT_AC_TYP_CD'],
    right_on = ['IATA_AC_TYP_CD','ACT_AC_TYP_CD'],
    how = 'left'
)

df_lounge_elig_flight_info_country_acft_typ.head(10)

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax,GMT_PLND_DEP_TS,GMT_ACT_DEP_TS,OPG_ALN_CD,OPG_FLT_NO,ACT_DEP_STN_CD,ACT_DEP_TML_CD,PLND_ARR_STN_CD,ACT_ARR_STN_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,GMT_ACT_DEP_DATE,GMT_ACT_DEP_TIME,COUNTRY_CD,COUNTRY_NM,CORP_GEOG_CTRY_GRP_NM,CORP_GEOG_CONTINENT_NM,WB_NB_CAT,FIRST_SEATS_QTY,CLUB_SEATS_QTY,PREM_ECONOMY_SEATS_QTY,ECONOMY_SEATS_QTY
0,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Gold,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
1,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,,,Not eligible,114,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
2,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,EMER,Tier 2,3,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
3,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Silver,SAPP,Tier 3,7,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
4,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,SAPP,Tier 3,1,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
5,2019-08-101414,BA,1414,2019-08-10,LHR,MXP,M,M,,,Not eligible,7,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
6,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,Gold,EMER,Tier 2,4,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
7,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,M,Gold,EMER,Tier 2,1,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
8,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,Silver,SAPP,Tier 3,4,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
9,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,,Tier 3,10,2019-08-10 06:10:00,2019-08-10 06:11:00,BA,1414,LHR,5,BHD,BHD,320,A3,LHRBHD,2019-08-10,06:10:00,2019-08-10,06:11:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0


In [45]:
df_lounge_elig_flight_info_country_acft_typ.shape

(860516, 36)

## More Cleaning
- Let's check the headers and first row as some of them are noise(?)

In [46]:
headers_and_first_row(df_lounge_elig_flight_info_country_acft_typ)

{'Skew_Id': '2019-08-101414',
 'OPERATING_AIRLINE_CD': 'BA',
 'OPERATING_FLT_NO': 1414,
 'GMT_UPLIFT_DT': datetime.date(2019, 8, 10),
 'UPLIFT_STN_CD': 'LHR',
 'DISCHARGE_STN_CD': 'BHD',
 'BOOKED_CABIN_CD': 'M',
 'TRAVEL_CABIN_CD': 'M',
 'BA_PAX_TIER': 'Gold',
 'ONEWORLD_TIER': 'EMER',
 'Lounge_eligibility_tier': 'Tier 2',
 'pax': 3,
 'GMT_PLND_DEP_TS': Timestamp('2019-08-10 06:10:00'),
 'GMT_ACT_DEP_TS': Timestamp('2019-08-10 06:11:00'),
 'OPG_ALN_CD': 'BA',
 'OPG_FLT_NO': 1414,
 'ACT_DEP_STN_CD': 'LHR',
 'ACT_DEP_TML_CD': '5',
 'PLND_ARR_STN_CD': 'BHD',
 'ACT_ARR_STN_CD': 'BHD',
 'IATA_AC_TYP_CD': '320',
 'ACT_AC_TYP_CD': 'A3',
 'ROUTE': 'LHRBHD',
 'GMT_PLND_DEP_DATE': datetime.date(2019, 8, 10),
 'GMT_PLND_DEP_TIME': datetime.time(6, 10),
 'GMT_ACT_DEP_DATE': datetime.date(2019, 8, 10),
 'GMT_ACT_DEP_TIME': datetime.time(6, 11),
 'COUNTRY_CD': 'GB',
 'COUNTRY_NM': 'United Kingdom and Northern Ireland',
 'CORP_GEOG_CTRY_GRP_NM': 'UK',
 'CORP_GEOG_CONTINENT_NM': 'UK',
 'WB_NB_CAT': 'N

Cloumns to drop:
`'GMT_PLND_DEP_TS','GMT_ACT_DEP_TS','OPG_ALN_CD','OPG_FLT_NO','ACT_DEP_STN_CD','PLND_ARR_STN_CD','ACT_ARR_STN_CD','GMT_ACT_DEP_DATE','GMT_ACT_DEP_TIME'`

In [47]:
to_drop = ['GMT_PLND_DEP_TS','GMT_ACT_DEP_TS','OPG_ALN_CD','OPG_FLT_NO','ACT_DEP_STN_CD','PLND_ARR_STN_CD','ACT_ARR_STN_CD','GMT_ACT_DEP_DATE','GMT_ACT_DEP_TIME']

df_lounge_elig_flight_info_country_acft_typ = df_lounge_elig_flight_info_country_acft_typ.drop(columns = to_drop,)

df_lounge_elig_flight_info_country_acft_typ.head(5)

Unnamed: 0,Skew_Id,OPERATING_AIRLINE_CD,OPERATING_FLT_NO,GMT_UPLIFT_DT,UPLIFT_STN_CD,DISCHARGE_STN_CD,BOOKED_CABIN_CD,TRAVEL_CABIN_CD,BA_PAX_TIER,ONEWORLD_TIER,Lounge_eligibility_tier,pax,ACT_DEP_TML_CD,IATA_AC_TYP_CD,ACT_AC_TYP_CD,ROUTE,GMT_PLND_DEP_DATE,GMT_PLND_DEP_TIME,COUNTRY_CD,COUNTRY_NM,CORP_GEOG_CTRY_GRP_NM,CORP_GEOG_CONTINENT_NM,WB_NB_CAT,FIRST_SEATS_QTY,CLUB_SEATS_QTY,PREM_ECONOMY_SEATS_QTY,ECONOMY_SEATS_QTY
0,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Gold,EMER,Tier 2,3,5,320,A3,LHRBHD,2019-08-10,06:10:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
1,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,,,Not eligible,114,5,320,A3,LHRBHD,2019-08-10,06:10:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
2,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,EMER,Tier 2,3,5,320,A3,LHRBHD,2019-08-10,06:10:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
3,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,M,M,Silver,SAPP,Tier 3,7,5,320,A3,LHRBHD,2019-08-10,06:10:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
4,2019-08-101414,BA,1414,2019-08-10,LHR,BHD,C,C,,SAPP,Tier 3,1,5,320,A3,LHRBHD,2019-08-10,06:10:00,GB,United Kingdom and Northern Ireland,UK,UK,NB,0.0,24.0,0.0,132.0
