In [27]:
import pandas as pd 
import requests 
from secrets_config import api_key
import jinja2 as j2

Extract data

In [28]:
# read list of cities
df_cities = pd.read_csv("data/australian_capital_cities.csv")
df_cities.head()

Unnamed: 0,city_name
0,canberra
1,sydney
2,darwin
3,brisbane
4,adelaide


In [29]:
# request data for each city (json) and push to a list 
weather_data = []
for city_name in df_cities["city_name"]:
    params = {
        "q": city_name,
        "units": "metric",
        "appid": api_key
    }
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params)
    if response.status_code == 200: 
        weather_data.append(response.json())
    else: 
        raise Exception("Extracting weather api data failed. Please check if API limits have been reached.")

In [30]:
# read data into a dataframe to create a consistent schema 
df = pd.json_normalize(weather_data, max_level=0)
df.head()

Unnamed: 0,coord,weather,base,main,visibility,wind,clouds,dt,sys,timezone,id,name,cod,rain
0,"{'lon': 149.1281, 'lat': -35.2835}","[{'id': 803, 'main': 'Clouds', 'description': ...",stations,"{'temp': 7.02, 'feels_like': 5.24, 'temp_min':...",10000,"{'speed': 2.57, 'deg': 130}",{'all': 75},1658496230,"{'type': 2, 'id': 2004200, 'country': 'AU', 's...",36000,2172517,Canberra,200,
1,"{'lon': 151.2073, 'lat': -33.8679}","[{'id': 500, 'main': 'Rain', 'description': 'l...",stations,"{'temp': 12.25, 'feels_like': 11.91, 'temp_min...",10000,"{'speed': 5.66, 'deg': 110}",{'all': 40},1658496121,"{'type': 2, 'id': 2002865, 'country': 'AU', 's...",36000,2147714,Sydney,200,{'1h': 0.16}
2,"{'lon': 130.8418, 'lat': -12.4611}","[{'id': 800, 'main': 'Clear', 'description': '...",stations,"{'temp': 19.43, 'feels_like': 19.33, 'temp_min...",10000,"{'speed': 2.06, 'deg': 190}",{'all': 0},1658495970,"{'type': 1, 'id': 9574, 'country': 'AU', 'sunr...",34200,2073124,Darwin,200,
3,"{'lon': 153.0281, 'lat': -27.4679}","[{'id': 803, 'main': 'Clouds', 'description': ...",stations,"{'temp': 15.29, 'feels_like': 15.12, 'temp_min...",10000,"{'speed': 5.66, 'deg': 190}",{'all': 75},1658495587,"{'type': 2, 'id': 2005393, 'country': 'AU', 's...",36000,2174003,Brisbane,200,
4,"{'lon': 138.6, 'lat': -34.9333}","[{'id': 500, 'main': 'Rain', 'description': 'l...",stations,"{'temp': 13.79, 'feels_like': 13.08, 'temp_min...",10000,"{'speed': 3.6, 'deg': 40}",{'all': 100},1658496052,"{'type': 2, 'id': 2001763, 'country': 'AU', 's...",34200,2078025,Adelaide,200,{'1h': 0.41}


Load data

In [31]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, JSON # https://www.tutorialspoint.com/sqlalchemy/sqlalchemy_core_creating_table.htm
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from secrets_config import db_user, db_password, db_server_name, db_database_name

In [32]:
# create connection to database 
connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = db_user,
    password = db_password,
    host = db_server_name, 
    port = 5432,
    database = db_database_name, 
)

engine = create_engine(connection_url)

In [33]:
# specify target table schema 
raw_table = "raw_weather"

meta = MetaData()
weather_table = Table(
    raw_table, meta, 
    Column("dt", Integer, primary_key=True),
    Column("id", Integer, primary_key=True),
    Column("coord", JSON),
    Column("weather", JSON),
    Column("base", String),
    Column("main", JSON),
    Column("visibility", Integer),
    Column("rain", String),
    Column("wind", JSON),
    Column("clouds", JSON),
    Column("sys", JSON),
    Column("timezone", Integer),
    Column("name", String),
    Column("cod", Integer)
)
meta.create_all(engine) # creates table if it does not exist 

In [34]:
insert_statement = postgresql.insert(weather_table).values(df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['id', 'dt'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['id', 'dt']})
with engine.connect() as connection: 
    connection.execute(upsert_statement)

Transform data (SQL)

In [52]:
import os 
import logging 

def build_model(model, engine, models_path="models")->bool:
    """
    Builds models with a matching file name in the models_path folder. 
    - `model`: the name of the model (without .sql)
    - `models_path`: the path to the models directory containing the sql files. defaults to `models`
    """
    logging.basicConfig(level=logging.INFO, format="[%(levelname)s][%(asctime)s]: %(message)s")
    
    if f"{model}.sql" in os.listdir(models_path):
        logging.info(f"Building model: {model}")
    
        # read sql contents into a variable 
        with open(f"{models_path}/{model}.sql") as f: 
            raw_sql = f.read()

        # parse sql using jinja 
        parsed_sql = j2.Template(raw_sql).render(target_table = model, engine=engine)

        # execute parsed sql 
        result = engine.execute(parsed_sql)
        logging.info(f"Successfully built model: {model}, rows inserted/updated: {result.rowcount}")
        return True 
    else: 
        logging.error(f"Could not find model: {model}")

In [55]:
build_model(model="staging_weather", engine=engine, models_path="models/")

[INFO][2022-07-22 22:03:36,419][1669441271.py]: Building model: staging_weather
[INFO][2022-07-22 22:03:36,443][1669441271.py]: Successfully built model: staging_weather


True

In [56]:
build_model(model="serving_weather", engine=engine, models_path="models/")

[INFO][2022-07-22 22:03:37,821][1669441271.py]: Building model: serving_weather
[INFO][2022-07-22 22:03:37,844][1669441271.py]: Successfully built model: serving_weather


True