In [None]:
#Importing all libraries for later use. 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')
%matplotlib inline

# Increase default figure and font sizes for easier viewing.
plt.rcParams['figure.figsize'] = (8, 8)
plt.rcParams['font.size'] = 14

In [None]:
# Read in the terminal/transit data.
tt_pax_data = '../data/terminal_transit_pax_data.csv'
df= pd.read_csv(tt_pax_data)

df.head()


#Using the same code as yesterday to read the data in 

## 1) Data prep/clean
We can clean the data in a similar way to what we saw yesterday. Below will replace all of the null values in specified columns into -100

In [None]:
# Specify the colums we want in a list
columns = ['total_pax_percent','terminal_pax_percent','transit_pax_percent'] 

df[columns] = df[columns].replace('Null',-100) # using 'inplace' give memory warnings


# NOTE Transit pax percent has some values that should be null, some that are actually -100% 

df

### 2) Change the datatypes appropriately 

Change the datatypes of total_pax_percent, terminal_pax_percent, transit_pax_percent into floats 

In [None]:
columns_to_convert = {'total_pax_percent':'float64',
                     'terminal_pax_percent':'float64',
                     'transit_pax_percent':'float64'}
df=df.astype(columns_to_convert)
df.dtypes

### 2.1) [Optional] Round your values 
You can choose to round your values to the nearest 2 deciman points 

In [None]:
cols = ['total_pax_percent','terminal_pax_percent','transit_pax_percent']
df[cols] = df[cols].round(2)

df

### 3) Display the descriptive stastics 
Use the 'describe()' function to describe the dataframe

In [None]:
df.describe()
df.describe(include='all')

### 4) Displaying descriptive stats for reporting airports
Use Groupby to display descriptive statistics for the different reporting airport group names


In [None]:
pd.set_option("display.max_columns", None) # This allows you to have no limit to the amount of columns you can see
df.groupby('reporting_airport_group_name').describe()


### 4.1 ) Displaying descriptive stats for reporting airports only showing passenger data (Terminal percent, transit percent and total percent)

In [2]:
cols = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']
df.groupby('reporting_airport_group_name')[cols].describe()

NameError: name 'df' is not defined

### 5) Display the correlations across the dataframe

In [None]:
df.head()

In [None]:
df.corr()

### 5.1) Show the correlation for pax data 

pax data is what I will use to refer to ['terminal_pax_percent','transit_pax_percent','total_pax_percent']

You may also find other correlations that exist in the database

In [None]:
cols = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']
df[cols].corr()

### 5.2) Show the correlation for pax data grouped by different reporting airport groups
Pax data = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']

In [None]:
cols = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']
df.groupby('reporting_airport_group_name')[cols].corr()

### 6) Find the Covariance of the dataframe only using the Pax data 

In [None]:
cols = ['terminal_pax_percent','transit_pax_percent','total_pax_percent']
df[cols].cov()

# Question to think 
What does the covariance between the total passenger data and the terminal passenger data tell you? 

Hint: The covariance is positive...

# Missing Data 
You'll have a chance to remove some data from the dataframe (Even though we aren't getting into the Machine Learning aspect on the course *yet*)

In [None]:
df.head()

### 7) Drop off the dates for this_period and last_period 

In [None]:
# Create a list of the column names that you want to drop

# use df.drop 
cols = ['this_period','last_period']
df.drop(columns=cols)

### 8) Remove first 3 rows from the dataframe 
This task is purely for practice, this dataset does not require any records removed from it. 

In [None]:
# Create a list of the rows index you will like to remove 
#use df.drop 

rows = [0,1,2]
df.drop(rows)