In [66]:
import pandas as pd 
import requests 
from secrets_config import api_key

Extract data

In [67]:
# read list of cities
df_cities = pd.read_csv("data/australian_capital_cities.csv")
df_cities.head()

Unnamed: 0,city_name
0,canberra
1,sydney
2,darwin
3,brisbane
4,adelaide


In [68]:
# request data for each city (json) and push to a list 
weather_data = []
for city_name in df_cities["city_name"]:
    params = {
        "q": city_name,
        "units": "metric",
        "appid": api_key
    }
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather", params=params)
    if response.status_code == 200: 
        weather_data.append(response.json())
    else: 
        raise Exception("Extracting weather api data failed. Please check if API limits have been reached.")

In [69]:
# read data into a dataframe to create a consistent schema 
df = pd.json_normalize(weather_data, max_level=0)
df.head()

Unnamed: 0,coord,weather,base,main,visibility,wind,clouds,dt,sys,timezone,id,name,cod,rain
0,"{'lon': 149.1281, 'lat': -35.2835}","[{'id': 804, 'main': 'Clouds', 'description': ...",stations,"{'temp': 6.99, 'feels_like': 5.63, 'temp_min':...",10000,"{'speed': 2.06, 'deg': 150}",{'all': 100},1658405994,"{'type': 2, 'id': 2004200, 'country': 'AU', 's...",36000,2172517,Canberra,200,
1,"{'lon': 151.2073, 'lat': -33.8679}","[{'id': 803, 'main': 'Clouds', 'description': ...",stations,"{'temp': 13.87, 'feels_like': 13.35, 'temp_min...",10000,"{'speed': 7.72, 'deg': 100, 'gust': 13.89}",{'all': 75},1658405508,"{'type': 2, 'id': 2002865, 'country': 'AU', 's...",36000,2147714,Sydney,200,
2,"{'lon': 130.8418, 'lat': -12.4611}","[{'id': 800, 'main': 'Clear', 'description': '...",stations,"{'temp': 22.99, 'feels_like': 23.25, 'temp_min...",10000,"{'speed': 2.06, 'deg': 200}",{'all': 0},1658406153,"{'type': 1, 'id': 9574, 'country': 'AU', 'sunr...",34200,2073124,Darwin,200,
3,"{'lon': 153.0281, 'lat': -27.4679}","[{'id': 501, 'main': 'Rain', 'description': 'm...",stations,"{'temp': 15.15, 'feels_like': 15.18, 'temp_min...",10000,"{'speed': 4.12, 'deg': 130}",{'all': 75},1658405884,"{'type': 2, 'id': 2005393, 'country': 'AU', 's...",36000,2174003,Brisbane,200,{'1h': 3.62}
4,"{'lon': 138.6, 'lat': -34.9333}","[{'id': 801, 'main': 'Clouds', 'description': ...",stations,"{'temp': 13.94, 'feels_like': 13.19, 'temp_min...",10000,"{'speed': 4.63, 'deg': 40}",{'all': 20},1658406036,"{'type': 2, 'id': 2001763, 'country': 'AU', 's...",34200,2078025,Adelaide,200,


Load data

In [70]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, Float, JSON # https://www.tutorialspoint.com/sqlalchemy/sqlalchemy_core_creating_table.htm
from sqlalchemy.engine import URL
from sqlalchemy.dialects import postgresql
from secrets_config import db_user, db_password, db_server_name, db_database_name

In [71]:
# create connection to database 
connection_url = URL.create(
    drivername = "postgresql+pg8000", 
    username = db_user,
    password = db_password,
    host = db_server_name, 
    port = 5432,
    database = db_database_name, 
)

engine = create_engine(connection_url)

In [72]:
# specify target table schema 
meta = MetaData()
weather_table = Table(
    "raw_weather", meta, 
    Column("dt", Integer, primary_key=True),
    Column("id", Integer, primary_key=True),
    Column("coord", JSON),
    Column("weather", JSON),
    Column("base", String),
    Column("main", JSON),
    Column("visibility", Integer),
    Column("rain", String),
    Column("wind", JSON),
    Column("clouds", JSON),
    Column("sys", JSON),
    Column("timezone", Integer),
    Column("name", String),
    Column("cod", Integer)
)
meta.create_all(engine) # creates table if it does not exist 

In [73]:
insert_statement = postgresql.insert(weather_table).values(df.to_dict(orient='records'))
upsert_statement = insert_statement.on_conflict_do_update(
    index_elements=['id', 'dt'],
    set_={c.key: c for c in insert_statement.excluded if c.key not in ['id', 'dt']})
engine.execute(upsert_statement)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fd641e2ae20>

Transform data (SQL)

In [74]:
# json functions: https://www.postgresql.org/docs/9.4/functions-json.html 
staging_table = "stg_weather"

engine.execute(f"drop table if exists {staging_table}")

engine.execute(f"""
    create table {staging_table} as (
        select 
            to_timestamp(dt) as datetime, 
            id, 
            name, 
            cast(coord ->> 'lon' as numeric) as lon,
            cast(coord ->> 'lat' as numeric) as lat,
            weather -> 0 ->> 'main' as weather_main,
            weather -> 0 ->> 'description' as weather_description,
            base, 
            cast(main ->> 'temp' as numeric) as temperature,
            cast(main ->> 'pressure' as numeric) as pressure,
            cast(main ->> 'humidity' as numeric) as humidity,
            cast(clouds ->> 'all' as numeric) as clouds
        from public.raw_weather
    )
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fd629d9fbe0>