# Weather Data Collection using the https://openweathermap.org/forecast5 API from OpenWeatherMap.org

In [1]:
import pandas as pd
import requests
from datetime import datetime
import my_stuff

city = 'Berlin'
API_key = my_stuff.openweather_API_Key

# check out the docs for more info on making an api call https://openweathermap.org/forecast5
url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric")

response = requests.get(url)
json = response.json()

In [2]:
#collapse the JSON
from IPython.display import JSON
JSON(json)

<IPython.core.display.JSON object>

# Automation of weather data collection and send to local sql

In [3]:
import pandas as pd
import requests
from pytz import timezone
from datetime import datetime
import my_stuff

def retreiving_and_sending_weather_data():
  connection_string = connection()
  cities_df = get_cities_data(connection_string)
  weather_df = get_weather_data(cities_df)
  send_weather_data(weather_df, connection_string)
  return "Weather data has been updated"

def connection():
  schema = "gans"
  host = "127.0.0.1"
  user = "root"
  password = my_stuff.my_sql_password
  port = 3306
  return f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

def get_cities_data(connection_string):
  return pd.read_sql("city", con=connection_string)
def get_weather_data(city_df):
    berlin_timezone = timezone('Europe/Berlin')
    API_key = my_stuff.openweather_API_Key
    weather_items = []

    for i, row in city_df.iterrows():
        url = f"http://api.openweathermap.org/data/2.5/forecast?q={row['city']}&appid={API_key}&units=metric"
        response = requests.get(url)
        json = response.json()

        for item in json["list"][:8]:
            retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S") # Move this inside the inner loop

            weather_item = {
                "city_id": row['city_id'],
                "city": row['city'],
                "forecast_time": item.get('dt_txt', None),
                "temperature": item["main"].get("temp", None),
                "feels_like": item["main"].get("feels_like", None),
                "forecast": item["weather"][0].get("main", None),
                "rain_in_last_3h": item.get("rain", {}).get("3h", 0),
                "wind_speed":item["wind"].get("speed", None),
                "wind_gust":item["wind"].get("gust", None),
                "rain_probability": item.get("pop", None),
                "part_of_day": item.get("sys", {}).get("pod", 0),
                "data_retrieved_at": retrieval_time  # Updated here
            }

            weather_items.append(weather_item)

    weather_df = pd.DataFrame(weather_items)
    weather_df["forecast_time"] = pd.to_datetime(weather_df["forecast_time"])
    weather_df["data_retrieved_at"] = pd.to_datetime(weather_df["data_retrieved_at"])

    return weather_df

def send_weather_data(weather_df, connection_string):
  weather_df.to_sql('weather',
                    if_exists='append',
                    con=connection_string,
                    index=False)

In [4]:
retreiving_and_sending_weather_data()

'Weather data has been updated'

# Collection flights data retrieved from AeroDataBox API from RapidAPI. https://rapidapi.com/aedbx-aedbx/api/aerodatabox. 
For this project, we settled for the GET FIDS/Schedules (airport departures and arrivals) - by local time range as the end point.

In [5]:
import requests

url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/EGLL/2024-05-08T00:00/2024-05-08T12:00"

querystring = {"withLeg":"true","withCancelled":"true","withCodeshared":"true","withCargo":"true","withPrivate":"true","withLocation":"false"}

headers = {
	"X-RapidAPI-Key": my_stuff.AeroDataBox_API_key,
	"X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

#print(response.json())

In [6]:
response.status_code

200

In [7]:
flights_json = response.json()
from IPython.display import JSON
JSON(flights_json)

<IPython.core.display.JSON object>

# Automation of flight data collection and send to local sql

In [8]:
import pandas as pd
import requests
import my_stuff
from datetime import timedelta, datetime

def retreiving_and_sending_flights_data():
    connection_string = get_connection_string()
    airports_df = get_airports()
    flights_df = get_and_store_flights(airports_df)
    send_flights_data(flights_df, connection_string)
    return "Flights data has been updated"

def get_connection_string():
    schema = "gans"
    host = "127.0.0.1"
    user = "root"
    password = my_stuff.my_sql_password
    port = 3306
    
    connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
    return connection_string

def get_airports():
    connection_string = get_connection_string()
    airports_df = pd.read_sql('airport', con=connection_string)
    return airports_df

def get_and_store_flights(airports_df):
    connection_string = get_connection_string()
    
    now = datetime.now()
    tomorrow = now + timedelta(days=1)
    tomorrow = tomorrow.strftime('%Y-%m-%d')
    
    times_lists = [{'from':'00:00','to':'11:59'},{'from':'12:00','to':'23:59'}]
    
    flights_dict = {
        'ICAO': [],
        'departure_airport': [],
        'arrival_time': [],
        'airline': [],
        'terminal': [],
        'flight_status': [],
        'flight_number': [],
        'isCargo': []
    } 
    
    for i, row in airports_df.iterrows():
        for time in times_lists:
            url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{row['ICAO']}/{tomorrow}T{time['from']}/{tomorrow}T{time['to']}"
            querystring = {
                "withLeg": "true",
                "withCancelled": "true",
                "withCodeshared": "true",
                "withCargo": "true",
                "withPrivate": "true",
                "withLocation": "false"
            }
            headers = {
                "X-RapidAPI-Key":  my_stuff.AeroDataBox_API_key,
                "X-RapidAPI-Host": "aerodatabox.p.rapidapi.com"
            }
            
            response = requests.get(url, headers=headers, params=querystring)
            my_json = response.json()
            
            for flight in my_json['arrivals']:
                flights_dict['ICAO'].append(row['ICAO'])
                flights_dict['departure_airport'].append(flight['departure'].get('airport', {}).get('name', 'unknown'))
                flights_dict['arrival_time'].append(flight['arrival'].get('scheduledTime', {}).get('local', None))
                flights_dict['airline'].append(flight['airline'].get('name', None))
                flights_dict['terminal'].append(flight['arrival'].get('terminal', 'unknown'))
                flights_dict['flight_status'].append(flight.get('status', None))
                flights_dict['flight_number'].append(flight.get("number", None))
                flights_dict["isCargo"].append(flight.get('isCargo', None))
        
    flight_df = pd.DataFrame(flights_dict)
    flight_df['arrival_time'] = flight_df['arrival_time'].str[:-6]
    return flight_df

def send_flights_data(flights_df, connection_string):
    flights_df.to_sql(
        name='flight',
        con=connection_string,
        if_exists='append',
        index=False
    )


In [9]:
retreiving_and_sending_flights_data()

'Flights data has been updated'