In [19]:
#Importing all libraries for later use. 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')
%matplotlib inline

# Increase default figure and font sizes for easier viewing.
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['font.size'] = 14

In [20]:
# Read in the terminal/transit data.
tt_pax_data = '../data/terminal_transit_pax_data.csv'
df= pd.read_csv(tt_pax_data)

df.head()


#Using the same code as yesterday to read the data in 

Unnamed: 0,rundate,this_period,last_period,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
0,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,ABERDEEN,32964,239815,-86.254404,32964,239815,-86.254404,0,0,Null
1,7/24/2020 11:08:59 AM,202004,201904,Non UK Reporting Airports,ALDERNEY,120,4578,-97.378768,120,4578,-97.378768,0,0,Null
2,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BARRA,69,1412,-95.113314,69,1412,-95.113314,0,0,Null
3,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.814025,2503,211050,-98.814025,0,0,Null
4,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST INTERNATIONAL,0,542166,Null,0,542166,Null,0,0,Null


## 1) Data prep/clean
We can clean the data in a similar way to what we saw yesterday. Below will replace all of the null values in specified columns into -100

In [21]:
# Specify the colums we want in a list
null_cols =['total_pax_percent','terminal_pax_percent','transit_pax_percent']
#use replace to affect those specific columns, replacing null with -100 
df[null_cols] = df[null_cols].replace('Null',-100)
df

Unnamed: 0,rundate,this_period,last_period,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
0,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,ABERDEEN,32964,239815,-86.254404,32964,239815,-86.254404,0,0,-100.0
1,7/24/2020 11:08:59 AM,202004,201904,Non UK Reporting Airports,ALDERNEY,120,4578,-97.378768,120,4578,-97.378768,0,0,-100.0
2,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BARRA,69,1412,-95.113314,69,1412,-95.113314,0,0,-100.0
3,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.814025,2503,211050,-98.814025,0,0,-100.0
4,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST INTERNATIONAL,0,542166,-100.0,0,542166,-100.0,0,0,-100.0
5,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BENBECULA,152,3030,-94.983498,42,3030,-98.613861,110,0,-100.0
6,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BIGGIN HILL,30,252,-88.095238,30,252,-88.095238,0,0,-100.0
7,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BIRMINGHAM,4768,1004085,-99.52514,4768,1003699,-99.524957,0,386,-100.0
8,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BLACKPOOL,650,1745,-62.750716,650,1745,-62.750716,0,0,-100.0
9,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BOURNEMOUTH,0,69079,-100.0,0,69079,-100.0,0,0,-100.0


### 2) Change the datatypes appropriately 

Change the datatypes of total_pax_percent, terminal_pax_percent, transit_pax_percent into floats 

In [22]:
df.dtypes

rundate                         object
this_period                      int64
last_period                      int64
reporting_airport_group_name    object
reporting_airport_name          object
total_pax_this_period            int64
total_pax_last_period            int64
total_pax_percent               object
terminal_pax_this_period         int64
terminal_pax_last_period         int64
terminal_pax_percent            object
transit_pax_this_period          int64
transit_pax_last_period          int64
transit_pax_percent             object
dtype: object

In [23]:
# You could make this into a dictionary {key:value} with the column name as they key and the value as the datatype (all panda datatypes are 64bit)
df_newdatatypes = {'reporting_airport_group_name':'string',
                   'reporting_airport_name':'string',
                   'total_pax_percent':'float64',
                     'terminal_pax_percent':'float64',
                     'transit_pax_percent':'float64'}
# Use df.astype to convert using a dictionary
df = df.astype(df_newdatatypes)
#use df.dtypes to see the type of data your dataframe is holding 
df.dtypes

rundate                          object
this_period                       int64
last_period                       int64
reporting_airport_group_name     string
reporting_airport_name           string
total_pax_this_period             int64
total_pax_last_period             int64
total_pax_percent               float64
terminal_pax_this_period          int64
terminal_pax_last_period          int64
terminal_pax_percent            float64
transit_pax_this_period           int64
transit_pax_last_period           int64
transit_pax_percent             float64
dtype: object

### 2.1) [Optional] Round your values 
You can choose to round your values to the nearest 2 deciman points 

In [37]:
# create a list of the columns you want to round
columns_list =['total_pax_percent','terminal_pax_percent','transit_pax_percent']
# use df[columns_list].round(3) to round to 3 decimal places
df[columns_list] = df[columns_list].round(2)
df

Unnamed: 0,rundate,this_period,last_period,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
0,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,ABERDEEN,32964,239815,-86.25,32964,239815,-86.25,0,0,-100.0
1,7/24/2020 11:08:59 AM,202004,201904,Non UK Reporting Airports,ALDERNEY,120,4578,-97.38,120,4578,-97.38,0,0,-100.0
2,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BARRA,69,1412,-95.11,69,1412,-95.11,0,0,-100.0
3,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.81,2503,211050,-98.81,0,0,-100.0
4,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST INTERNATIONAL,0,542166,-100.0,0,542166,-100.0,0,0,-100.0
5,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BENBECULA,152,3030,-94.98,42,3030,-98.61,110,0,-100.0
6,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BIGGIN HILL,30,252,-88.1,30,252,-88.1,0,0,-100.0
7,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BIRMINGHAM,4768,1004085,-99.52,4768,1003699,-99.52,0,386,-100.0
8,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BLACKPOOL,650,1745,-62.75,650,1745,-62.75,0,0,-100.0
9,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BOURNEMOUTH,0,69079,-100.0,0,69079,-100.0,0,0,-100.0


### 3) Display the descriptive stastics 
Use the 'describe()' function to describe the dataframe

In [36]:
df.describe()

Unnamed: 0,this_period,last_period,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,202004.0,201904.0,6596.294118,489972.8,-96.421961,6584.470588,489778.3,-96.545098,11.823529,194.509804,-68.422353
std,0.0,0.0,29228.058859,1162485.0,8.849,29230.212867,1162475.0,8.842857,47.827066,560.432382,221.648525
min,202004.0,201904.0,0.0,2.0,-100.0,0.0,2.0,-100.0,0.0,0.0,-100.0
25%,202004.0,201904.0,4.5,8040.5,-99.99,4.0,8040.5,-99.99,0.0,0.0,-100.0
50%,202004.0,201904.0,166.0,69606.0,-99.36,166.0,69079.0,-99.36,0.0,0.0,-100.0
75%,202004.0,201904.0,1183.0,393357.0,-97.365,1044.0,393275.5,-97.675,0.0,29.5,-100.0
max,202004.0,201904.0,206600.0,6798206.0,-49.84,206600.0,6798206.0,-49.84,285.0,2916.0,1483.33


### 4) Displaying descriptive stats for reporting airports
Use Groupby to display descriptive statistics for the different reporting airport group names


In [42]:
pd.set_option("display.max_columns", None) # This allows you to have no limit to the amount of columns you can see

df.groupby('reporting_airport_group_name').describe()

Unnamed: 0_level_0,this_period,this_period,this_period,this_period,this_period,this_period,this_period,this_period,last_period,last_period,last_period,last_period,last_period,last_period,last_period,last_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_last_period,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_this_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_last_period,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
reporting_airport_group_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2
London Area Airports,6.0,202004.0,0.0,202004.0,202004.0,202004.0,202004.0,202004.0,6.0,201904.0,0.0,201904.0,201904.0,201904.0,201904.0,201904.0,6.0,40680.0,81526.496351,0.0,2464.5,11578.0,14067.5,206600.0,6.0,2534240.0,2485142.0,187479.0,704361.0,1971916.5,3488536.5,6798206.0,6.0,-99.193333,1.144931,-100.0,-99.88,-99.595,-99.175,-96.96,6.0,40675.166667,81528.415119,0.0,2464.5,11563.5,14060.25,206600.0,6.0,2534240.0,2485142.0,187479.0,704361.0,1971916.5,3488536.5,6798206.0,6.0,-99.193333,1.144931,-100.0,-99.88,-99.595,-99.175,-96.96,6.0,4.833333,11.8392,0.0,0.0,0.0,0.0,29.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,-100.0,0.0,-100.0,-100.0,-100.0,-100.0,-100.0
Non UK Reporting Airports,4.0,202004.0,0.0,202004.0,202004.0,202004.0,202004.0,202004.0,4.0,201904.0,0.0,201904.0,201904.0,201904.0,201904.0,201904.0,4.0,444.0,317.757769,120.0,210.75,427.0,660.25,802.0,4.0,74625.75,61348.69,4578.0,53349.0,69842.5,91119.25,154240.0,4.0,-98.91,1.044414,-99.66,-99.525,-99.3,-98.685,-97.38,4.0,444.0,317.757769,120.0,210.75,427.0,660.25,802.0,4.0,73335.5,60155.63,4578.0,51665.25,68720.0,90390.25,151324.0,4.0,-98.9,1.040673,-99.66,-99.5175,-99.28,-98.6625,-97.38,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,1290.25,1514.826805,0.0,0.0,1122.5,2412.75,2916.0,4.0,-100.0,0.0,-100.0,-100.0,-100.0,-100.0,-100.0
Other UK Airports,41.0,202004.0,0.0,202004.0,202004.0,202004.0,202004.0,202004.0,41.0,201904.0,0.0,201904.0,201904.0,201904.0,201904.0,201904.0,41.0,2208.658537,6264.818771,0.0,0.0,135.0,916.0,32964.0,41.0,231333.4,451961.8,2.0,3243.0,38154.0,239815.0,2388591.0,41.0,-95.773659,9.769003,-100.0,-100.0,-99.2,-96.02,-49.84,41.0,2194.658537,6268.150902,0.0,0.0,135.0,909.0,32964.0,41.0,231217.4,451752.5,2.0,3202.0,38154.0,239815.0,2387051.0,41.0,-95.927805,9.772562,-100.0,-100.0,-99.2,-97.39,-49.84,41.0,14.0,53.062228,0.0,0.0,0.0,0.0,285.0,41.0,116.073171,297.4175,0.0,0.0,0.0,41.0,1540.0,41.0,-60.720488,247.184136,-100.0,-100.0,-100.0,-100.0,1483.33


In [47]:
df.groupby('reporting_airport_group_name')['total_pax_this_period','total_pax_last_period'].describe()

  df.groupby('reporting_airport_group_name')['total_pax_this_period','total_pax_last_period'].describe()


Unnamed: 0_level_0,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_this_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period,total_pax_last_period
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
reporting_airport_group_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
London Area Airports,6.0,40680.0,81526.496351,0.0,2464.5,11578.0,14067.5,206600.0,6.0,2534240.0,2485142.0,187479.0,704361.0,1971916.5,3488536.5,6798206.0
Non UK Reporting Airports,4.0,444.0,317.757769,120.0,210.75,427.0,660.25,802.0,4.0,74625.75,61348.69,4578.0,53349.0,69842.5,91119.25,154240.0
Other UK Airports,41.0,2208.658537,6264.818771,0.0,0.0,135.0,916.0,32964.0,41.0,231333.4,451961.8,2.0,3243.0,38154.0,239815.0,2388591.0


In [None]:
l = ['total_pax_this_period','total_pax_last_period']
df.groupby('reporting_airport_group_name')[l].describe()

### 4.1 ) Displaying descriptive stats for reporting airports only showing passenger data (Terminal percent, transit percent and total percent)

In [65]:
# If you rememeber the syntax for groupby is 
# df.groupby(Group)[Features].aggregate()
# Features will be the column names, it'll be helpful to create a list of cols as above. 
l = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']

import numpy as np
#df.groupby('reporting_airport_group_name')[l].aggregate()

df.groupby('reporting_airport_group_name')[l].aggregate([np.mean,np.sum,np.size,np.std,np.var])
#df.groupby('reporting_airport_group_name')[l].aggregate([np.mean, np.std])


#mean()	Compute mean of groups
#sum()	Compute sum of group values
#size()	Compute group sizes
#count()	Compute count of group
#std()	Standard deviation of groups
#var()	Compute variance of groups
#sem()	Standard error of the mean of groups
#describe()	Generates descriptive statistics
#first()	Compute first of group values
#last()	Compute last of group values
#nth()	Take nth value, or a subset if n is a list
#min()	Compute min of group values
#max()

Unnamed: 0_level_0,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,terminal_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,transit_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent,total_pax_percent
Unnamed: 0_level_1,mean,sum,size,std,var,mean,sum,size,std,var,mean,sum,size,std,var
reporting_airport_group_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
London Area Airports,-99.193333,-595.16,6.0,1.144931,1.310867,-100.0,-600.0,6.0,0.0,0.0,-99.193333,-595.16,6.0,1.144931,1.310867
Non UK Reporting Airports,-98.9,-395.6,4.0,1.040673,1.083,-100.0,-400.0,4.0,0.0,0.0,-98.91,-395.64,4.0,1.044414,1.0908
Other UK Airports,-95.927805,-3933.04,41.0,9.772562,95.502968,-60.720488,-2489.54,41.0,247.184136,61099.997335,-95.773659,-3926.72,41.0,9.769003,95.433424


### 5) Display the correlations across the dataframe

### 5.1) Show the correlation for pax data 

pax data is what I will use to refer to ['terminal_pax_percent','transit_pax_percent','total_pax_percent']

You may also find other correlations that exist in the database

In [73]:
# You'll will want to use a list of column names here 
pax_data = df[['terminal_pax_percent','transit_pax_percent','total_pax_percent']]

### 5.2) Show the correlation for pax data grouped by different reporting airport groups
Pax data = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']

In [74]:
# Similar to 4.1 - Hopefully now you get the point!
pax_data.corr()

Unnamed: 0,terminal_pax_percent,transit_pax_percent,total_pax_percent
terminal_pax_percent,1.0,0.022742,0.998021
transit_pax_percent,0.022742,1.0,0.045398
total_pax_percent,0.998021,0.045398,1.0


### 6) Find the Covariance of the dataframe only using the Pax data 

In [75]:
pax_data.cov()

Unnamed: 0,terminal_pax_percent,transit_pax_percent,total_pax_percent
terminal_pax_percent,78.196125,44.574024,78.095558
transit_pax_percent,44.574024,49128.068782,89.041943
total_pax_percent,78.095558,89.041943,78.304804


# Question to think 
What does the covariance between the total passenger data and the terminal passenger data tell you? 

Hint: The covariance is positive...

# Missing Data 
You'll have a chance to remove some data from the dataframe (Even though we aren't getting into the Machine Learning aspect on the course *yet*)

In [76]:
df.head()


Unnamed: 0,rundate,this_period,last_period,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
0,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,ABERDEEN,32964,239815,-86.25,32964,239815,-86.25,0,0,-100.0
1,7/24/2020 11:08:59 AM,202004,201904,Non UK Reporting Airports,ALDERNEY,120,4578,-97.38,120,4578,-97.38,0,0,-100.0
2,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BARRA,69,1412,-95.11,69,1412,-95.11,0,0,-100.0
3,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.81,2503,211050,-98.81,0,0,-100.0
4,7/24/2020 11:08:59 AM,202004,201904,Other UK Airports,BELFAST INTERNATIONAL,0,542166,-100.0,0,542166,-100.0,0,0,-100.0


### 7) Drop off the dates for this_period and last_period 

In [85]:
# Create a list of the column names that you want to drop
drop_cols = ['this_period','last_period']
# use df.drop 
df_drop = df.drop(columns = drop_cols)
df_drop

Unnamed: 0,rundate,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
0,7/24/2020 11:08:59 AM,Other UK Airports,ABERDEEN,32964,239815,-86.25,32964,239815,-86.25,0,0,-100.0
1,7/24/2020 11:08:59 AM,Non UK Reporting Airports,ALDERNEY,120,4578,-97.38,120,4578,-97.38,0,0,-100.0
2,7/24/2020 11:08:59 AM,Other UK Airports,BARRA,69,1412,-95.11,69,1412,-95.11,0,0,-100.0
3,7/24/2020 11:08:59 AM,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.81,2503,211050,-98.81,0,0,-100.0
4,7/24/2020 11:08:59 AM,Other UK Airports,BELFAST INTERNATIONAL,0,542166,-100.0,0,542166,-100.0,0,0,-100.0
5,7/24/2020 11:08:59 AM,Other UK Airports,BENBECULA,152,3030,-94.98,42,3030,-98.61,110,0,-100.0
6,7/24/2020 11:08:59 AM,Other UK Airports,BIGGIN HILL,30,252,-88.1,30,252,-88.1,0,0,-100.0
7,7/24/2020 11:08:59 AM,Other UK Airports,BIRMINGHAM,4768,1004085,-99.52,4768,1003699,-99.52,0,386,-100.0
8,7/24/2020 11:08:59 AM,Other UK Airports,BLACKPOOL,650,1745,-62.75,650,1745,-62.75,0,0,-100.0
9,7/24/2020 11:08:59 AM,Other UK Airports,BOURNEMOUTH,0,69079,-100.0,0,69079,-100.0,0,0,-100.0


### 8) Remove first 3 rows from the dataframe 
This task is purely for practice, this dataset does not require any records removed from it. 

In [86]:
# Create a list of the rows index you will like to remove 
#use df.drop 
drop_rows =[0,1,2]

df_drop2 = df_drop.drop(drop_rows)
df_drop2

Unnamed: 0,rundate,reporting_airport_group_name,reporting_airport_name,total_pax_this_period,total_pax_last_period,total_pax_percent,terminal_pax_this_period,terminal_pax_last_period,terminal_pax_percent,transit_pax_this_period,transit_pax_last_period,transit_pax_percent
3,7/24/2020 11:08:59 AM,Other UK Airports,BELFAST CITY (GEORGE BEST),2503,211050,-98.81,2503,211050,-98.81,0,0,-100.0
4,7/24/2020 11:08:59 AM,Other UK Airports,BELFAST INTERNATIONAL,0,542166,-100.0,0,542166,-100.0,0,0,-100.0
5,7/24/2020 11:08:59 AM,Other UK Airports,BENBECULA,152,3030,-94.98,42,3030,-98.61,110,0,-100.0
6,7/24/2020 11:08:59 AM,Other UK Airports,BIGGIN HILL,30,252,-88.1,30,252,-88.1,0,0,-100.0
7,7/24/2020 11:08:59 AM,Other UK Airports,BIRMINGHAM,4768,1004085,-99.52,4768,1003699,-99.52,0,386,-100.0
8,7/24/2020 11:08:59 AM,Other UK Airports,BLACKPOOL,650,1745,-62.75,650,1745,-62.75,0,0,-100.0
9,7/24/2020 11:08:59 AM,Other UK Airports,BOURNEMOUTH,0,69079,-100.0,0,69079,-100.0,0,0,-100.0
10,7/24/2020 11:08:59 AM,Other UK Airports,BRISTOL,357,714889,-99.95,357,714889,-99.95,0,0,-100.0
11,7/24/2020 11:08:59 AM,Other UK Airports,CAMPBELTOWN,8,646,-98.76,7,646,-98.92,1,0,-100.0
12,7/24/2020 11:08:59 AM,Other UK Airports,CARDIFF WALES,0,126274,-100.0,0,126274,-100.0,0,0,-100.0
