# Get Wheather Forcast based on location

* Hot weather increases weekday ridership ([Lu et al., 2024](https://www.sciencedirect.com/science/article/pii/S2950105924000275))
* E-Scooter usage wa sensitive to rain, but even more to temperatures below freezing and snowfall than rain. ([Mathew et al., 2019](https://ieeexplore.ieee.org/document/8917121))


Retrieve forcasts from [OpenWeather](https://openweathermap.org/forecast5) on
* Temperature
* Perceived temperature
* Wind speed
* Chance of precipation
* Past Precipation

Please ensure you have set up the corresponding tables with `sql/create_database_data_pipeline_example.sql`.

Save you MySQL password in `python/key.env` as `MYSQL_KEY` (or provide you password by other means) and open your MySQL workbench.

In [None]:
import pandas as pd
import requests
from pytz import timezone
from datetime import datetime
from dotenv import load_dotenv
import os

def retrieve_and_send_data():
    """
    Main pipeline function to retrieve weather forecasts for stored cities
    and send the data to a SQL database.

    Workflow:
        1. Create a MySQL connection string.
        2. Fetch the list of cities (with coordinates) from the database.
        3. Retrieve 5-day weather forecasts from the OpenWeather API.
        4. Store the retrieved forecasts into the database.

    Returns:
        str: Confirmation message when data has been successfully updated.
    """
    connection_string = create_connection_string()
    cities_df = fetch_cities_data(connection_string)
    weather_df = fetch_weather_data(cities_df)
    store_weather_data(weather_df, connection_string)
    return "Data has been updated"


def create_connection_string():
    """
    Builds a SQLAlchemy-style connection string for a MySQL database.

    Environment:
        Expects a file named 'keys.env' containing:
            MYSQL_KEY=<your_mysql_password>

    Returns:
        str: A connection string of the form
             'mysql+pymysql://user:password@host:port/schema'
    """
    schema = "data_pipeline_example"
    load_dotenv('keys.env')  # Load credentials from .env file
    password = os.getenv("MYSQL_KEY")
    host = os.getenv("HOST")
    user = os.getenv("USER")
    port = os.getenv("PORT")
    return f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'


def fetch_cities_data(connection_string):
    """
    Fetches a list of cities from the database, including their coordinates.

    Args:
        connection_string (str): Database connection string.

    Returns:
        pandas.DataFrame: A DataFrame with city data.
                          Expected columns: ['city_id', 'latitude', 'longitude']
    """
    return pd.read_sql("cities", con=connection_string)


def fetch_weather_data(cities_df):
    """
    Retrieves weather forecast data for each city from the OpenWeather API.

    Args:
        cities_df (pandas.DataFrame): DataFrame containing city information
                                      with columns ['city_id', 'latitude', 'longitude'].

    Environment:
        Expects a file named 'keys.env' containing:
            OPENWEATHER_KEY=<your_openweather_api_key>

    Returns:
        pandas.DataFrame: DataFrame with weather forecasts, containing:
            - city_id
            - forecast_time (datetime)
            - temperature
            - feels_like
            - forecast (main weather description)
            - rain_prob (probability of precipitation)
            - rain_past3h (rain volume in past 3 hours, if available)
            - wind_speed
            - data_retrieved_at (timestamp of retrieval)
    """
    berlin_timezone = timezone('Europe/Berlin')
    load_dotenv('keys.env')
    API_KEY = os.getenv("OPENWEATHER_KEY")
    weather_items = []

    for _, city in cities_df.iterrows():
        latitude = city["latitude"]
        longitude = city["longitude"]
        city_id = city["city_id"]

        # Build API request URL
        url = (
            f"https://api.openweathermap.org/data/2.5/forecast"
            f"?lat={latitude}&lon={longitude}&appid={API_KEY}&units=metric"
        )
        response = requests.get(url)
        weather_data = response.json()

        retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

        # Extract forecast data from API response
        for item in weather_data["list"]:
            weather_item = {
                "city_id": city_id,
                "forecast_time": item.get("dt_txt"),
                "temperature": item["main"].get("temp"),
                "feels_like": item["main"].get("feels_like"),
                "forecast": item["weather"][0].get("main"),
                "rain_prob": item.get('pop'),
                "rain_past3h": item.get("rain", {}).get("3h", 0),
                "wind_speed": item["wind"].get("speed"),
                "data_retrieved_at": retrieval_time
            }
            weather_items.append(weather_item)

    weather_df = pd.DataFrame(weather_items)

    # Ensure correct datetime types
    weather_df["forecast_time"] = pd.to_datetime(weather_df["forecast_time"])
    weather_df["data_retrieved_at"] = pd.to_datetime(weather_df["data_retrieved_at"])

    return weather_df


def store_weather_data(weather_df, connection_string):
    """
    Stores the retrieved weather forecast data into the database.

    Args:
        weather_df (pandas.DataFrame): DataFrame containing weather forecasts.
        connection_string (str): Database connection string.

    Behavior:
        Appends data to the 'weather_forecasts' table.
    """
    weather_df.to_sql(
        'weather_forecasts',
        if_exists='append',
        con=connection_string,
        index=False
    )

In [None]:
retrieve_and_send_data()