## Install packages

In [2]:
import requests
import pprint
import pandas as pd
import json
from pandas.io.json import json_normalize
import shutil
import os

## Working with Date object

In [9]:
from datetime import datetime, date, timedelta
import time
print(time.time())
print(datetime.now())


1572338236.1975281
2019-10-29 01:37:16.197839


In [0]:
cur_time = datetime.now()

In [25]:
cur_time.hour

2

## Collect Data on an interval

In [26]:
today = date.today()
print(today)

2019-10-28


In [27]:
# math operation on days?
print(today.day - 5)

23


In [28]:
# day of the week
print(date.weekday(today))

0


In [29]:
# a date n days ago from today
diff = timedelta(days=7)
week_ago = today - diff
print(week_ago)

2019-10-21


In [30]:
def print_days(days_ago):
    now = datetime.now()
  
    for i in range(days_ago):
        print(now - timedelta(days=i+1))
    
print_days(5)


2019-10-27 02:48:46.207293
2019-10-26 02:48:46.207293
2019-10-25 02:48:46.207293
2019-10-24 02:48:46.207293
2019-10-23 02:48:46.207293


In [10]:
# let's make some changes to our original url builder function to make it more dynamic

def build_fs_url(airpt_code, date, start_hour, departure):
  
    print(date)
    
    # airport status endpoint
    url = 'https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status'
    airport = airpt_code
    dep = 'dep' if departure == True else 'arr'
    year = date.year
    month = date.month
    day = date.day
    hod = start_hour       

    url = f'{url}/{airport}/{dep}/{year}/{month}/{day}/{hod}'
    
    return url

add_params = {
    'appId' : os.environ.get('FSID'),
    'appKey': os.environ.get('FSKEY'),
    'utc': 'true',
    'numHours': '6'
}  


res = requests.get(build_fs_url('lax', datetime.now(), 13, True), params=add_params)

#print(res.url)
print(res.status_code)
#print(res.json())


2019-10-29 01:39:58.437552
200


## Build Request URL 

In [12]:
# On second thought, let's build a class called req_url

class req_url:
    
    # type_data is one of 'flight', 'airport', 'weather'
    def __init__(self, type_data, airpt_code='lax', date=date.today(), start_hour=24, departure=True):
      
        # credentials for both apis
        self.credentials = {
            'flightstats': {
                'appId' : os.environ.get('FSID'),
                'appKey': os.environ.get('FSKEY')   
            },
            'darksky': {
                'key': os.environ.get('DSKEY')
            }
        }
      
        # check to see if flight or airport info needed
        if type_data == 'flight':         
            self.source = 'https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status'
            self.dep = 'dep' if departure==True else 'arr'
            self.url = f'{self.source}/{airpt_code}/{self.dep}/{date.year}/{date.month}/{date.day}/{start_hour}'

            # additional parameters needed for request
            self.add_params = {
                'utc': 'true',
                'numHours': '6'
            }
            
        # dark sky api
        elif type_data == 'weather':
          
            # we get the lat and long of the airport requested using flightstats api
            res = requests.get(f'https://api.flightstats.com/flex/airports/rest/v1/json/iata/{airpt_code}', 
                               params=self.credentials['flightstats'])
            
            self.lat = res.json()['airports'][0]['latitude']
            self.long = res.json()['airports'][0]['longitude']
            self.time = res.json()['airports'][0]['localTime'][:-4]
            self.source = 'https://api.darksky.net/forecast'
            self.ds_key = '906506b4ec44e7ea8a9d51c2744d9959'      # dark sky api key
            self.url = f'{self.source}/{self.ds_key}/{self.lat},{self.long},{self.time}'
            
            # additional parameters for excluding components of the response such as minutely weather
            self.add_params = {
                'exclude': 'minutely'
            }
            
        else:
            print("Error: type_data must be one of 'flight' or 'weather'")

fs_test = req_url('flight', 'jfk')
ds_test = req_url('weather', 'jfk')
print(fs_test.url)
print(ds_test.url)

https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status/jfk/dep/2019/10/29/24
https://api.darksky.net/forecast/906506b4ec44e7ea8a9d51c2744d9959/40.642335,-73.78817,2019-10-29T04:41:12


## Collect Data and Store into Google Drive

In [0]:
# modify to accomodate hourOfDay
def collect_daily(days_ago):
    today = datetime.now()
  
    for i in range(days_ago):
        day = today - timedelta(days=i+1)
        time_of_day = 0
        
        for j in range(4):
            time_of_day = i * 6
            print(build_fs_url('lax', day))
    
    
#collect_daily(7)

def collect_data(airpt_code, days_ago):
  
    # for now, maximum value for days_ago allowed is 7
    if days_ago > 7:
        print('Error: Cannot get flight history more than 7 days ago.')
        return
  
    # first remove all previously downloaded data
    folder = '/content/gdrive/My Drive/Colab Datasets/flights_data'

    # remove all files in each of the folders present in the directory
    for each_fldr in os.listdir(folder):
        fldr_to_del = os.path.join(folder, each_fldr)
        
        for each_file in os.listdir(fldr_to_del):
            file = os.path.join(fldr_to_del, each_file)
            
            # delete folder and its contents
            try:
                if os.path.isfile(file):
                    os.unlink(file)
            except:
                print(e)
  
    now = datetime.now()
    
    # determine the starting hour of day for flight status  
    if now.hour < 6:
        start_hour = 6
    elif now.hour < 12:
        start_hour = 12
    elif now.hour < 18:
        start_hour = 18
    else:
        start_hour = 24
        
    for i in range(days_ago):        
        date = today - timedelta(days=i)
        
        # flight status in 6-hour windows
        if i > 0:
            start_hour = 24
        
        while start_hour > 0:
            
            start_hour = start_hour - 6
            #print(day, start_hour)
            #print(build_fs_url('lax', day, start_hour))
            
            # flightstats api response
            dep_req = req_url('flight', airpt_code, date, start_hour)
            arr_req = req_url('flight', airpt_code, date, start_hour, False)
            
            # merge the credentials and additional params dictionaries
            deps = requests.get(dep_req.url, params={**dep_req.credentials['flightstats'], **dep_req.add_params})
            arrs = requests.get(arr_req.url, params={**arr_req.credentials['flightstats'], **arr_req.add_params})
            
            # flight status info from response
            d_flt_sts = deps.json()['flightStatuses']    # departing flight statuses
            a_flt_sts = arrs.json()['flightStatuses']    # arriving flight statuses
                          
            # save json to google drive
            with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/departures/{date}.json', 'a') as f:
                f.write(json.dumps(d_flt_sts))
          
            with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/arrivals/{date}.json', 'a') as f:
                f.write(json.dumps(a_flt_sts))
                
            #print(res.url)
            
        # weather data in 1-day windows since api response already gives hourly weather cond
        weather_req = req_url('weather', airpt_code, date)
        
        # get weather data for airport and date
        weather = requests.get(weather_req.url, params={'exclude': ['minutely', 'daily', 'alerts']})
        #print(weather.url)
        
        # extract hourly data
        hourly_weather = weather.json()['hourly']['data']
            
        # store in google drive
        with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/weather/{date}.json', 'w') as f:
            f.write(json.dumps(hourly_weather))
                        
            
collect_data('pdx', 3)

            

In [70]:
print(datetime.today())
print(datetime.now())
print(f'{date.today()}-{datetime.now().hour}')
print(int(time.time()))
print(datetime.fromtimestamp(1571554800))

2019-10-28 08:53:38.169901
2019-10-28 08:53:38.171746
2019-10-28-8
1572252818
2019-10-20 07:00:00
