In [1]:
import pandas as pd
import sqlite3
from datetime import datetime
from contextlib import contextmanager

In [2]:
# SQL Database helpers

db = sqlite3.connect('Database') if not 'db' in locals() else db

def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d
    
db.row_factory = dict_factory

@contextmanager
def cursor():
    cursor_ = None
    try:
        cursor_ = db.cursor()
        yield cursor_
    finally:
        if cursor_:
            cursor_.close()


def table_exists(table_name: str) -> bool:
    with cursor() as c:
        try:
            c.execute(f'SELECT 1 FROM "{table_name}"')
            return True
        except sqlite3.OperationalError:
            return False


In [3]:
electricity = pd.read_csv('data/electricity.csv') \
    if 'electricity' not in locals() else electricity

daily_weather = pd.read_parquet('data/daily_weather.parquet') \
    if 'daily_weather' not in locals() else daily_weather

cities_in_countries = pd.read_csv('data/cities.csv') \
    if 'cities_in_countries' not in locals() else cities_in_countries

In [4]:
if not table_exists('daily_weather_raw'):
    daily_weather.to_sql('daily_weather_raw', db, if_exists='replace')
    
if not table_exists('weather_cities'): 
    cities_in_countries.to_sql('weather_cities', db, if_exists='replace')

if not table_exists('electricity'): 
    electricity.to_sql('electricity', db, if_exists='replace')

In [5]:
# grouping weather data by month - to join with electricity
with cursor() as c:
    c.execute('ALTER TABLE daily_weather_raw ADD COLUMN year INT;')
    c.execute('ALTER TABLE daily_weather_raw ADD COLUMN month INT;')
    c.execute('UPDATE daily_weather_raw SET year=strftime("%Y", date), month=strftime("%m", date);')
    c.execute('DELETE FROM daily_weather_raw WHERE year < 2010 OR year > 2018;')


In [9]:
complete_weather_query = """
    SELECT 
        country,
        year,
        month,
        season,
        AVG(avg_temp_c) AS avg_temp_c,
        MAX(max_temp_c) AS max_temp_c,
        MIN(min_temp_c) AS min_temp_c,
        AVG(avg_sea_level_pres_hpa) AS avg_sealevel_pressure_hpa,
        AVG(precipitation_mm) AS avg_daily_precipitation_mm,
        AVG(sunshine_total_min) AS avg_sunshine_min,
        SUM(sunshine_total_min) AS total_sunshine_min,
        AVG(snow_depth_mm) AS avg_snow_depth_mm
    FROM daily_weather_raw w
    JOIN weather_cities c ON c.station_id = w.station_id
    GROUP BY c.country, w.year, w.month
"""

with cursor() as c:
    limit = 5
    print(pd.DataFrame(c.execute(f'{complete_weather_query} LIMIT ?;', [limit]).fetchall()))

       country  year  month  season  avg_temp_c  max_temp_c  min_temp_c  \
0  Afghanistan  2010      1  Winter    4.236301        22.8       -18.0   
1  Afghanistan  2010      2  Winter    3.833871        26.7       -19.5   
2  Afghanistan  2010      3  Spring   11.642581        34.8        -6.5   
3  Afghanistan  2010      4  Spring   17.192517        34.8        -5.3   
4  Afghanistan  2010      5  Spring   20.677703        39.8         2.5   

  avg_sealevel_pressure_hpa  avg_daily_precipitation_mm avg_sunshine_min  \
0                      None                    2.134545             None   
1                      None                    4.703774             None   
2                      None                    2.781538             None   
3                      None                    3.335593             None   
4                      None                    3.452727             None   

  total_sunshine_min  avg_snow_depth_mm  
0               None         106.631579  
1       

In [14]:
# read and load population datasets

if not table_exists('population'):
    population = pd.read_csv('data/population_total_long.csv')
    population.to_sql('population', db, if_exists='replace')

if not table_exists('pop_female_perc'):
    pop_female_perc = pd.read_csv('data/population_female_percentage_long.csv')
    pop_female_perc.to_sql('pop_female_perc', db, if_exists='replace')

if not table_exists('pop_density'):
    pop_density = pd.read_csv('data/population_density_long.csv')
    pop_density.to_sql('pop_density', db, if_exists='replace')

if not table_exists('pop_below_14'):
    pop_below_14 = pd.read_csv('data/population_below_age_14_percentage_long.csv')
    pop_below_14.to_sql('pop_below_14', db, if_exists='replace')

if not table_exists('pop_above_65'):
    pop_above_65 = pd.read_csv('data/population_above_age_65_percentage_long.csv')
    pop_above_65.to_sql('pop_above_65', db, if_exists='replace')

[{'index': 0, 'Country Name': 'Aruba', 'Year': 1960, 'Count': 50}, {'index': 1, 'Country Name': 'Afghanistan', 'Year': 1960, 'Count': 48}, {'index': 2, 'Country Name': 'Angola', 'Year': 1960, 'Count': 49}, {'index': 3, 'Country Name': 'Albania', 'Year': 1960, 'Count': 48}, {'index': 4, 'Country Name': 'United Arab Emirates', 'Year': 1960, 'Count': 49}]


In [23]:
# Odvozené sloupce: male_perc, population_working_age

population_complete = """
    SELECT 
        p."Country Name" AS country,
        p.Year AS year,
        female.Count AS female_perc,
        (100 - female.Count) AS male_perc,
        density.Count AS population_density,
        below14.Count AS population_below_14_perc,
        above65.Count AS population_above_65_perc,
        (100 - below14.Count - above65.Count) AS population_working_age_perc
    FROM population p
    JOIN pop_female_perc female ON (
        female."Country Name" = p."Country Name" AND female.Year = p.Year
    )
    JOIN pop_density density ON (
        density."Country Name" = p."Country Name" AND density.Year = p.Year
    )
    JOIN pop_below_14 below14 ON (
        below14."Country Name" = p."Country Name" AND below14.Year = p.Year
    )
    JOIN pop_above_65 above65 ON (
        above65."Country Name" = p."Country Name" AND above65.Year = p.Year
    )
"""

with cursor() as c:
    limit = 5
    print(pd.DataFrame(c.execute(f'{population_complete} LIMIT ?;', [limit]).fetchall()))

                country  year  female_perc  male_perc  population_density  \
0                 Aruba  1961           50         50                 307   
1           Afghanistan  1961           48         52                  14   
2                Angola  1961           49         51                   4   
3               Albania  1961           48         52                  60   
4  United Arab Emirates  1961           48         52                   1   

   population_below_14_perc  population_above_65_perc  \
0                        43                         2   
1                        42                         2   
2                        42                         3   
3                        40                         5   
4                        43                         3   

   population_working_age_perc  
0                           55  
1                           56  
2                           55  
3                           55  
4                           5