### Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

### Importing Data

In [2]:
flights_df = pd.read_csv('flights.csv')
flights_df.drop(columns=['Unnamed: 0'], inplace=True)
flights_df.head()

Unnamed: 0,year,month,day,dep_time,dep_delay,arr_time,arr_delay,carrier,tailnum,flight,origin,dest,air_time,distance,hour,minute
0,2013,1,1,517.0,2.0,830.0,11.0,UA,N14228,1545,EWR,IAH,227.0,1400,5.0,17.0
1,2013,1,1,533.0,4.0,850.0,20.0,UA,N24211,1714,LGA,IAH,227.0,1416,5.0,33.0
2,2013,1,1,542.0,2.0,923.0,33.0,AA,N619AA,1141,JFK,MIA,160.0,1089,5.0,42.0
3,2013,1,1,544.0,-1.0,1004.0,-18.0,B6,N804JB,725,JFK,BQN,183.0,1576,5.0,44.0
4,2013,1,1,554.0,-6.0,812.0,-25.0,DL,N668DN,461,LGA,ATL,116.0,762,5.0,54.0


In [3]:
weather_df = pd.read_csv('weather.csv')
weather_df.drop(columns=['Unnamed: 0'], inplace=True)
weather_np = weather_df.to_numpy()
weather_df.head()

Unnamed: 0,origin,year,month,day,hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib
0,EWR,2013,1.0,1.0,0.0,37.04,21.92,53.97,230.0,10.35702,11.918651,0.0,1013.9,10.0
1,EWR,2013,1.0,1.0,1.0,37.04,21.92,53.97,230.0,13.80936,15.891535,0.0,1013.0,10.0
2,EWR,2013,1.0,1.0,2.0,37.94,21.92,52.09,230.0,12.65858,14.567241,0.0,1012.6,10.0
3,EWR,2013,1.0,1.0,3.0,37.94,23.0,54.51,230.0,13.80936,15.891535,0.0,1012.7,10.0
4,EWR,2013,1.0,1.0,4.0,37.94,24.08,57.04,240.0,14.96014,17.21583,0.0,1012.8,10.0


### Questions

#### flights data

##### 1) How many flights were there from JFK to SLC? int

In [4]:
len(flights_df[(flights_df.origin == 'JFK') & (flights_df.dest == 'SLC')])

2113

##### 2) How many airlines fly to SLC? int

In [5]:
len(flights_df[flights_df.dest == 'SLC'])

2467

##### 3) What is the average arrival delay for flights to RDU? float

In [6]:
flights_df[flights_df.dest == 'RDU'].arr_delay.mean()

10.052380952380952

##### 4) What proportion of flights to SEA come from the two NYC airports (LGA and JFK)?  float

In [7]:
flightsToSEA = flights_df[flights_df.dest == 'SEA']
flightsFrom = flightsToSEA[(flightsToSEA.origin == 'LGA') | (flightsToSEA.origin == 'JFK')]

print(len(flightsFrom) / len(flightsToSEA))

0.5332653581442773


##### 5) Which date has the largest average depature delay? Pd slice with date and float. Please make date a column. Preferred format is 2013/1/1 (y/m/d)

In [48]:
flights_avg_by_date = (
    flights_df.copy()
    .assign(date=lambda row: pd.to_datetime(row[['year', 'month', 'day']]))
    .groupby('date')
    .mean()
)

In [49]:
flights_avg_by_date.nlargest(1, 'dep_delay')[['dep_delay']]

Unnamed: 0_level_0,dep_delay
date,Unnamed: 1_level_1
2013-03-08,83.536921


##### 6) Which date has the largest average arrival delay? pd slice with date and float

In [50]:
flights_avg_by_date_df.nlargest(1, 'arr_delay')[['arr_delay']]

Unnamed: 0_level_0,arr_delay
date,Unnamed: 1_level_1
2013-03-08,85.862155


##### 7) Which flight departing LGA or JFK in 2013 flew the fastest? pd slice with tailnumber and speed. speed = distance/airtime

In [51]:
print(
    flights_df[(flights_df.origin == 'LGA') | (flights_df.origin == 'JFK')]
    .assign(avg_speed=lambda row: row['distance'] / row['air_time'])
    .nlargest(1, 'avg_speed')
    [['tailnum', 'avg_speed']]
)

       tailnum  avg_speed
216447  N666DN  11.723077


##### 8) Replace all nans in the weather pd dataframe with 0s. Pd with no nans

In [15]:
weather_df.fillna(0)

Unnamed: 0,origin,year,month,day,hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib
0,EWR,2013,1.0,1.0,0.0,37.04,21.92,53.97,230.0,10.35702,11.918651,0.0,1013.9,10.0
1,EWR,2013,1.0,1.0,1.0,37.04,21.92,53.97,230.0,13.80936,15.891535,0.0,1013.0,10.0
2,EWR,2013,1.0,1.0,2.0,37.94,21.92,52.09,230.0,12.65858,14.567241,0.0,1012.6,10.0
3,EWR,2013,1.0,1.0,3.0,37.94,23.00,54.51,230.0,13.80936,15.891535,0.0,1012.7,10.0
4,EWR,2013,1.0,1.0,4.0,37.94,24.08,57.04,240.0,14.96014,17.215830,0.0,1012.8,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8714,JFK,2013,9.0,2.0,20.0,75.20,73.40,94.14,200.0,4.60312,5.297178,0.0,0.0,4.0
8715,JFK,2013,10.0,23.0,10.0,48.92,39.02,68.51,60.0,4.60312,5.297178,0.0,1008.1,10.0
8716,JFK,2013,10.0,23.0,11.0,48.92,39.02,68.51,40.0,4.60312,5.297178,0.0,1008.5,10.0
8717,JFK,2013,12.0,17.0,5.0,26.96,10.94,50.34,40.0,4.60312,5.297178,0.0,1023.9,10.0


#### weather data
use weather_np

In [16]:
weather_cols = weather_df.columns
weather_header = {}
for x in range(len(weather_cols)):
    weather_header[weather_cols[x]] = x
weather_header

{'origin': 0,
 'year': 1,
 'month': 2,
 'day': 3,
 'hour': 4,
 'temp': 5,
 'dewp': 6,
 'humid': 7,
 'wind_dir': 8,
 'wind_speed': 9,
 'wind_gust': 10,
 'precip': 11,
 'pressure': 12,
 'visib': 13}

In [17]:
weather_np

array([['EWR', 2013, 1.0, ..., 0.0, 1013.9, 10.0],
       ['EWR', 2013, 1.0, ..., 0.0, 1013.0, 10.0],
       ['EWR', 2013, 1.0, ..., 0.0, 1012.6, 10.0],
       ...,
       ['JFK', 2013, 10.0, ..., 0.0, 1008.5, 10.0],
       ['JFK', 2013, 12.0, ..., 0.0, 1023.9, 10.0],
       ['LGA', 2013, 8.0, ..., 0.0, 1011.9, 10.0]], dtype=object)

##### 9) How many observations were made in Feburary? int

In [18]:
feb_observations = []
for obs in weather_np:
    if obs[weather_header['month']] == 2.0:
        feb_observations.append(obs)
        
feb_observations = np.array(feb_observations)
feb_observations[:5]

array([['EWR', 2013, 2.0, 1.0, 0.0, 33.08, 15.08, 47.1, 270.0, 20.71404,
        23.8373029512, 0.0, 1006.6, 10.0],
       ['EWR', 2013, 2.0, 1.0, 1.0, 32.0, 19.04, 58.37, 250.0, 17.2617,
        19.864419126, 0.0, 1007.3, 10.0],
       ['EWR', 2013, 2.0, 1.0, 2.0, 32.0, 10.04, 39.38, 260.0, 28.7695,
        33.10736521, 0.0, 1008.0, 10.0],
       ['EWR', 2013, 2.0, 1.0, 3.0, 30.92, 6.98, 35.84, 260.0, 27.61872,
        31.7830706016, 0.0, 1008.5, 10.0],
       ['EWR', 2013, 2.0, 1.0, 4.0, 30.02, 8.06, 39.03, 260.0, 14.96014,
        17.2158299092, 0.0, 1008.9, 10.0]], dtype=object)

In [19]:
len(feb_observations)

671

##### 10) What was the mean for humidity in February? float

In [20]:
feb_observations[:, [weather_header['humid']]].mean()

62.918152011922515

##### 11) What was the std for humidity in February? Float

In [21]:
feb_observations[:, [weather_header['humid']]].std()

20.33690087674334