In [10]:
import pandas as pd
import sqlite3
from datetime import datetime
from contextlib import contextmanager

In [21]:
# SQL Database helpers

db = sqlite3.connect(':memory:') if not 'db' in locals() else db

def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d
    
db.row_factory = dict_factory

@contextmanager
def cursor():
    cursor_ = None
    try:
        cursor_ = db.cursor()
        yield cursor_
    finally:
        if cursor_:
            cursor_.close()


def table_exists(table_name: str) -> bool:
    with cursor() as c:
        try:
            c.execute(f'SELECT 1 FROM "{table_name}"')
            return True
        except sqlite3.OperationalError:
            return False


In [34]:
electricity = pd.read_csv('data/electricity.csv') \
    if 'electricity' not in locals() else electricity

daily_weather = pd.read_parquet('data/daily_weather.parquet') \
    if 'daily_weather' not in locals() else daily_weather

cities_in_countries = pd.read_csv('data/cities.csv') \
    if 'cities_in_countries' not in locals() else cities_in_countries

In [50]:
if not table_exists('daily_weather_raw'):
    daily_weather.to_sql('daily_weather_raw', db, if_exists='replace')
    
if not table_exists('weather_cities'): 
    cities_in_countries.to_sql('weather_cities', db, if_exists='replace')

if not table_exists('electricity'): 
    electricity.to_sql('electricity', db, if_exists='replace')

In [15]:
# grouping weather data by month - to join with electricity
with cursor() as c:
    c.execute('ALTER TABLE daily_weather_raw ADD COLUMN year INT;')
    c.execute('ALTER TABLE daily_weather_raw ADD COLUMN month INT;')
    c.execute('UPDATE daily_weather_raw SET year=strftime("%Y", date), month=strftime("%m", date);')
    c.execute('DELETE FROM daily_weather_raw WHERE year < 2010 OR year > 2018;')


In [47]:
complete_weather_query = """
    SELECT 
        country,
        year,
        month,
        season,
        AVG(avg_temp_c) AS avg_temp_c,
        MAX(max_temp_c) AS max_temp_c,
        MIN(min_temp_c) AS min_temp_c,
        AVG(avg_sea_level_pres_hpa) AS avg_sealevel_pressure_hpa,
        AVG(precipitation_mm) AS avg_daily_precipitation_mm,
        AVG(sunshine_total_min) AS avg_sunshine_min,
        SUM(sunshine_total_min) AS total_sunshine_min,
        AVG(snow_depth_mm) AS avg_snow_depth_mm
    FROM daily_weather_raw w
    JOIN weather_cities c ON c.station_id = w.station_id
    GROUP BY c.country, w.year, w.month
"""

with cursor() as c:
    weather = pd.DataFrame(c.execute(f'{complete_weather_query};').fetchall())

In [53]:
with cursor() as c:
    print(pd.DataFrame(c.execute('SELECT COUNT(*) FROM (SELECT COUNTRY, YEAR, MONTH FROM electricity GROUP BY COUNTRY, YEAR, MONTH);').fetchall()))

   COUNT(*)
0      7332
