<a href="https://colab.research.google.com/github/PharahMain/Flight-Delay-Predictor/blob/master/Utility_Functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install packages

In [0]:
import requests
import pprint
import pandas as pd
import json
from pandas.io.json import json_normalize
import shutil
import math
import os

## Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Working with Date object

In [3]:
from datetime import datetime, date, timedelta
import time
print(time.time())
print(datetime.now())


1579223646.2900648
2020-01-17 01:14:06.290451


In [0]:
cur_time = datetime.now()

In [0]:
cur_time.hour

8

## Import FlightStats API credentials

In [0]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


In [0]:
# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '169lkP3e4Atlkm7spBrUfLON8644sPq_D'
your_module = drive.CreateFile({'id': file_id})
your_module.GetContentFile('credentials.py')
#print('Downloaded content "{}"'.format(your_module.GetContentString()))

from credentials import credentials


# Collect Data on an interval

In [0]:
today = date.today()
print(today)

2019-10-30


In [0]:
# math operation on days?
print(today.day - 5)

25


In [0]:
# day of the week
print(date.weekday(today))

2


In [0]:
# a date n days ago from today
diff = timedelta(days=7)
week_ago = today - diff
print(week_ago)

2019-10-23


In [22]:
def print_days(days_ago):
    now = datetime.now()
  
    for i in range(days_ago):
        print(now - timedelta(days=i+1))
    
#print_days(5)

def print_hours(days):
    chunks = days*4
    now = datetime.now()
    print(now)
    times = [now - timedelta(hours=(6*(j+1))) for j in range(chunks)]
    print(times)

print_hours(1)

2020-01-17 01:39:28.362274
[datetime.datetime(2020, 1, 16, 19, 39, 28, 362274), datetime.datetime(2020, 1, 16, 13, 39, 28, 362274), datetime.datetime(2020, 1, 16, 7, 39, 28, 362274), datetime.datetime(2020, 1, 16, 1, 39, 28, 362274)]


In [12]:
# let's make some changes to our original url builder function to make it more dynamic

def build_fs_url(airpt_code, date, start_hour, departure):
  
    print(date)
    
    # airport status endpoint
    url = 'https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status'
    airport = airpt_code
    dep = 'dep' if departure == True else 'arr'
    year = date.year
    month = date.month
    day = date.day
    hod = start_hour       

    url = f'{url}/{airport}/{dep}/{year}/{month}/{day}/{hod}'
    
    return url

add_params = {
    'appId' : os.getenv('FSID'),
    'appKey': os.getenv('FSKEY'),
    'utc': 'true',
    'numHours': '6'
}  


res = requests.get(build_fs_url('lax', datetime.now(), 13, True), params=add_params)

print(res.url)
print(res.status_code)
print(res.json())


2020-01-16 23:17:53.007453
https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status/lax/dep/2020/1/16/13?utc=true&numHours=6
200
{'error': {'httpStatusCode': 401, 'errorId': 'd6e93a48-ba7b-4328-a735-93cd47277519', 'errorMessage': 'You must specify an appId', 'errorCode': 'UNAUTHORIZED'}}


In [0]:
# On second thought, let's build a class called req_url

class req_url:
    
    # type_data is one of 'flight', 'airport', 'weather'
    def __init__(self, type_data, airpt_code='lax', date=date.today(), start_hour=24, departure=True):
      
        # credentials for both apis
        self.credentials = {
            'flightstats': {
                'appId' : credentials['flightstats']['app_id'],
                'appKey': credentials['flightstats']['app_key'] 
            },
            'darksky': {
                'key': credentials['darksky']['secret_key']
            }
        }
      
        # check to see if flight or airport info needed
        if type_data is 'flight':         
            self.source = 'https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status'
            self.dep = 'dep' if departure is True else 'arr'
            self.url = f'{self.source}/{airpt_code}/{self.dep}/{date.year}/{date.month}/{date.day}/{start_hour}'

            # additional parameters needed for request
            self.add_params = {
                'utc': 'false',
                'numHours': '6'
            }
            
        # dark sky api
        elif type_data is 'weather':
          
            # we get the lat and long of the airport requested using flightstats api
            res = requests.get(f'https://api.flightstats.com/flex/airports/rest/v1/json/iata/{airpt_code}', 
                               params=self.credentials['flightstats'])
            #print(self.credentials['flightstats'])

            self.lat = res.json()['airports'][0]['latitude']
            self.long = res.json()['airports'][0]['longitude']
            self.time = res.json()['airports'][0]['localTime'][:-4]
            self.source = 'https://api.darksky.net/forecast'
            self.ds_key = self.credentials['darksky']['key']      # dark sky api key
            self.url = f'{self.source}/{self.ds_key}/{self.lat},{self.long},{self.time}'
            
            # additional parameters for excluding components of the response such as minutely weather
            self.add_params = {
                'exclude': 'minutely'
            }
            
        else:
            print("Error: type_data must be one of 'flight' or 'weather'")

#fs_test = req_url('flight', 'jfk', departure=False)
#ds_test = req_url('weather', 'jfk')
#print(fs_test.url)
#print(ds_test.url)

In [17]:
# modify to accomodate hourOfDay
def collect_daily(days_ago):
    today = datetime.now()
  
    for i in range(days_ago):
        day = today - timedelta(days=i+1)
        time_of_day = 0
        
        for j in range(4):
            time_of_day = j * 6
            print(build_fs_url('lax', day))
    
    
#collect_daily(7)

def collect_data(airpt_code, days_ago):
  
    # for now, maximum value for days_ago allowed is 7
    if days_ago > 7:
        print('Error: Cannot get flight history more than 7 days ago.')
        return
  
    # first remove all previously downloaded data
    folder = '/content/gdrive/My Drive/Colab Datasets/flights_data'

    # remove all files in each of the folders present in the directory
    for each_fldr in os.listdir(folder):
        fldr_to_del = os.path.join(folder, each_fldr)
        
        for each_file in os.listdir(fldr_to_del):
            file = os.path.join(fldr_to_del, each_file)
            
            # delete folder and its contents
            try:
                if os.path.isfile(file):
                    os.unlink(file)
            except:
                print(e)
  
    now = datetime.now()

    # Since we want the statuses from the last 24 hrs, and can only grab six hours at a time,
    # we need to format the api request as such that the last partial hour (most current) is 
    # captured
    windows = days_ago * 4

    start_times = [now-timedelta(hours=6*(i+1)) for i in range(windows)]
    print(f"The start times are {start_times}")


    # determine the starting hour of day for flight status  
    if now.hour < 6:
        start_hour = 6
    elif now.hour < 12:
        start_hour = 12
    elif now.hour < 18:
        start_hour = 18
    else:
        start_hour = 24
        
    for i in range(days_ago):        
        date = now - timedelta(days=i)
        
        # flight status in 6-hour windows
        if i > 0:
            start_hour = 24
        
        while start_hour > 0:
            
            start_hour = start_hour - 6
            #print(day, start_hour)
            #print(build_fs_url('lax', day, start_hour))
            
            # flightstats api response
            dep_req = req_url('flight', airpt_code, date, start_hour)
            arr_req = req_url('flight', airpt_code, date, start_hour, False)
            
            # merge the credentials and additional params dictionaries
            deps = requests.get(dep_req.url, params={**dep_req.credentials['flightstats'], **dep_req.add_params})
            print(deps.json())
            arrs = requests.get(arr_req.url, params={**arr_req.credentials['flightstats'], **arr_req.add_params})
            
            # flight status info from response
            d_flt_sts = deps.json()['flightStatuses']    # departing flight statuses
            a_flt_sts = arrs.json()['flightStatuses']    # arriving flight statuses
                          
            # save json to google drive
            with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/departures/{date}.json', 'a') as f:
                f.write(json.dumps(d_flt_sts))
          
            with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/arrivals/{date}.json', 'a') as f:
                f.write(json.dumps(a_flt_sts))
                
            #print(res.url)
            
        # weather data in 1-day windows since api response already gives hourly weather cond
        weather_req = req_url('weather', airpt_code, date)
        
        # get weather data for airport and date
        weather = requests.get(weather_req.url, params={'exclude': ['minutely', 'daily', 'alerts']})
        #print(weather.url)
        
        # extract hourly data
        hourly_weather = weather.json()['hourly']['data']
            
        # store in google drive
        with open(f'/content/gdrive/My Drive/Colab Datasets/flights_data/weather/{date}.json', 'a') as f:
            f.write(json.dumps(hourly_weather))
                        
            
collect_data('lax', 1)

            

The start times are [datetime.datetime(2020, 1, 16, 19, 32, 16, 293395), datetime.datetime(2020, 1, 16, 13, 32, 16, 293395), datetime.datetime(2020, 1, 16, 7, 32, 16, 293395), datetime.datetime(2020, 1, 16, 1, 32, 16, 293395)]
{'request': {'hourOfDay': {'requested': '0', 'interpreted': 0}, 'utc': {'requested': 'false', 'interpreted': False}, 'numHours': {'requested': '6', 'interpreted': 6}, 'url': 'https://api.flightstats.com/flex/flightstatus/rest/v2/json/airport/status/lax/dep/2020/1/17/0?utc=false&numHours=6', 'nonstopOnly': {'interpreted': False}, 'airport': {'fsCode': 'LAX', 'requestedCode': 'lax'}, 'date': {'year': '2020', 'month': '1', 'day': '17', 'interpreted': '2020-01-17'}}, 'appendix': {'airlines': [{'fs': '3U', 'iata': '3U', 'icao': 'CSC', 'name': 'Sichuan Airlines', 'active': True}, {'fs': 'LR', 'iata': 'LR', 'icao': 'LRC', 'name': 'Avianca Costa Rica', 'active': True}, {'fs': 'DL', 'iata': 'DL', 'icao': 'DAL', 'name': 'Delta Air Lines', 'phoneNumber': '1-800-221-1212', '

In [0]:
print(datetime.today())
print(datetime.now())
print(f'{date.today()}-{datetime.now().hour}')
print(int(time.time()))
print(datetime.fromtimestamp(1571554800))

2019-10-28 08:53:38.169901
2019-10-28 08:53:38.171746
2019-10-28-8
1572252818
2019-10-20 07:00:00
