In [2]:
import requests
import datetime
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, engine, Table, Column, types, MetaData, exc

from api_keys import *


arrivals_tomorrow_dtypes = {
    'dep_airport': types.NVARCHAR(length=255),
    'sched_arr_loc_time': types.DateTime(),
    'arr_terminal': types.INTEGER(),
    'status': types.NVARCHAR(length=255),
    'airline': types.NVARCHAR(length=255),
    'aircraft': types.NVARCHAR(length=255),
    'arr_airport_icoa': types.NVARCHAR(length=255),
    'city': types.NVARCHAR(length=255),
    'country': types.NVARCHAR(length=255),
}

weather_data_dtypes = {
'clouds': types.INTEGER(),
'visibility': types.INTEGER(),
'city': types.NVARCHAR(length=255),
'country': types.NVARCHAR(length=255),
'latitude': types.Float(precision=4, asdecimal=True),
'longitude': types.Float(precision=4, asdecimal=True),
'wind_speed': types.Float(precision=4, asdecimal=True),
'wind_deg': types.INTEGER(),
'wind_gust': types.Float(precision=4, asdecimal=True),
'temp': types.Float(precision=4, asdecimal=True),
'feels_like': types.Float(precision=4, asdecimal=True),
'temp_min': types.Float(precision=4, asdecimal=True),
'temp_max': types.Float(precision=4, asdecimal=True),
'pressure': types.INTEGER(),
'sea_level': types.INTEGER(),
'grnd_level': types.INTEGER(),
'humidity': types.INTEGER(),
'description': types.NVARCHAR(length=255),
'snow': types.Float(precision=4, asdecimal=True),
'rain': types.Float(precision=4, asdecimal=True),
'datetime': types.DateTime(),
}

In [3]:
def get_weather_forecast_for_tomorrow(city, country_code):
    owm_url = "https://community-open-weather-map.p.rapidapi.com/forecast"
    querystring = {"q": f"{city},{country_code}"}
    headers = {
        'x-rapidapi-host': "community-open-weather-map.p.rapidapi.com",
        'x-rapidapi-key': rapid_api_key
        }

    response = requests.request("GET", owm_url, headers=headers, params=querystring)
    
    return response

def extract_weather_data(weather_json):
    weather_data = weather_json.json()['list'].copy()
    for item in weather_data:
        
        # Extract city data 
        item['city'] = weather_json.json()['city']['name']
        item['country'] = weather_json.json()['city']['country']
        item['latitude'] = weather_json.json()['city']['coord']['lat']
        item['longitude'] = weather_json.json()['city']['coord']['lon']
        
        # Extract data from nested dictionaries
        for k, v in item['wind'].items():
            item['wind_'+k] = v
        for k, v in item['main'].items():
            item[k] = v
        for k, v in item['weather'][0].items():
            item[k] = v
        item['clouds'] = item['clouds']['all']
        
        # Extract data which may not exist
        try: 
            item['snow'] = item['snow']['3h']
        except: 
            item['snow'] = None
        try: 
            item['rain'] = item['rain']['3h']
        except: 
            item['rain'] = None
        
        # Transform units
        item['datetime'] = datetime.datetime.strptime(item['dt_txt'], "%Y-%m-%d %H:%M:%S")
        item['temp'] = item['temp'] - 273.15
        item['feels_like'] = item['feels_like'] - 273.15
        item['temp_min'] = item['temp_min'] - 273.15
        item['temp_max'] = item['temp_max'] - 273.15
        
        # Remove unnecessary data
        item.pop('wind')
        item.pop('main')
        item.pop('weather')
        item.pop('dt')
        item.pop('dt_txt')
        item.pop('sys')
        item.pop('icon')
        item.pop('id')
        item.pop('temp_kf')
        item.pop('pop')
    
    return pd.DataFrame(weather_data)

def get_airport_arrivals_for_tomorrow(airport_code='EDDB'):
    
    tomorrow = datetime.date.today() + datetime.timedelta(days=1)
    year = tomorrow.strftime('%Y')
    month = tomorrow.strftime('%m')
    day = tomorrow.strftime('%d')
    
    aero_data_box_url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport_code}/{year}-{month}-{day}T00:00/{year}-{month}-{day}T11:59"
    
    querystring = {"direction":"Arrival", "withCodeshared":"true", "withLocation":"false"}
    
    headers = {
        'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
        'x-rapidapi-key': rapid_api_key
        }
    response = requests.request("GET", aero_data_box_url, headers=headers, params=querystring)
    
    return response

def extract_flight_info(arrivals_response, city, country_code, airport_code='EDDB'):
    
    flight_data = []
    
    for flight in arrivals_response.json()['arrivals']:
        
        try: terminal = int(flight['movement']['terminal'])
        except: terminal = None
        
        flight_data.append({
            'dep_airport': flight['movement']['airport']['name'],
            'sched_arr_loc_time': datetime.datetime.strptime(flight['movement']['scheduledTimeLocal'], "%Y-%m-%d %H:%M+%S:%f"),
            'arr_terminal': terminal,
            'status': flight['status'],
            'airline': flight['airline']['name'],
            'aircraft': flight['aircraft']['model'],
            'arr_airport_icoa': airport_code,
            'city': city,
            'country': country_code,
        })
                      
    return pd.DataFrame(flight_data)

class DatabaseInterface:
    def __init__(self,
                 db_name,
                 user,
                 password,
                 host='localhost',
                 port=3306,
                 driver='mysql+pymysql'):
        
        mysql_engine = create_engine(f'{driver}://{user}:{password}@{host}')
        mysql_engine.execute(f"CREATE DATABASE IF NOT EXISTS {db_name}")
        self.db_engine = create_engine(f"{driver}://{user}:{password}@{host}:{port}/{db_name}")
        self.metadata = MetaData(self.db_engine)

    def insert_data(self, df, table_name, dtype):
        df.to_sql(table_name, self.db_engine, if_exists='append', index=False, dtype=dtype)

In [4]:
airport_code='EDDB'
city = 'Berlin'
country_code = 'DE'

#weather_response = get_weather_forecast_for_tomorrow(city, country_code)
weather_data = extract_weather_data(weather_response)

#arrivals_response = get_airport_arrivals_for_tomorrow(airport_code)
arrivals_tomorrow = extract_flight_info(arrivals_response, city, country_code, airport_code='EDDB')

dbi = DatabaseInterface(db_name='wbs_gans_project', 
                        user=aws_mysql_user, #'donal', #aws_mysql_user, 
                        password=aws_mysql_password, #'guest123', #aws_mysql_password, 
                        host=aws_mysql_host) #'localhost') #aws_mysql_host)

dbi.insert_data(arrivals_tomorrow, 'arrivals_tomorrow', dtype=arrivals_tomorrow_dtypes)
dbi.insert_data(weather_data, 'weather_forecast', dtype=weather_data_dtypes)

print(weather_data.head(5))
print(arrivals_tomorrow.head(5))

   clouds  visibility  rain    city country  latitude  longitude  wind_speed  \
0      83       10000  0.10  Berlin      DE   52.5244    13.4105        8.07   
1      92       10000   NaN  Berlin      DE   52.5244    13.4105        8.48   
2     100       10000  0.13  Berlin      DE   52.5244    13.4105       10.19   
3      98       10000   NaN  Berlin      DE   52.5244    13.4105       10.84   
4      99        9887   NaN  Berlin      DE   52.5244    13.4105        9.09   

   wind_deg  wind_gust  ...  feels_like  temp_min  temp_max  pressure  \
0       221      16.00  ...        2.92      7.13      7.41       983   
1       223      17.36  ...        3.16      7.40      7.61       984   
2       242      19.58  ...        2.71      7.39      7.39       986   
3       254      21.75  ...       -0.41      5.16      5.16       987   
4       269      17.68  ...       -1.63      3.88      3.88       990   

   sea_level  grnd_level  humidity      description snow            datetime  
0

In [13]:
from IPython.display import JSON
JSON(weather_response.json())
print(weather_response.json()['cod'])

200


In [9]:
JSON(arrivals_response.json())

<IPython.core.display.JSON object>