# Header

<img src="http://reiner-lemoine-institut.de//wp-content/uploads/2015/09/rlilogo.png" width="100" style="float: right">

__copyright__ 	= "© Reiner Lemoine Institut" <br>
__license__ 	= "GNU Affero General Public License Version 3 (AGPL-3.0)" <br>
__url__ 		= "https://www.gnu.org/licenses/agpl-3.0.en.html" <br>
__author__ 		= "Ludwig Hülk" <br>

# Import

In [1]:
import sys
import os
import getpass
import pandas as pd
import numpy as np
from sqlalchemy import *
# plot
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import plotly.graph_objs as go
import plotly.offline as pltly
import colorlover as cl
import seaborn as sns
# notebook
from IPython.display import Image
from IPython.core.display import HTML 

pltly.init_notebook_mode(connected=True)
%matplotlib inline

version = 'v0.1.3 (jupyter)'
project = 'REEEM'

# Database Connection

This function creates a database connection to the **reeem_db**.<br>
The default user is **reeem_vis**, a user that has only read rights.

In [2]:
def reeem_session():
    """SQLAlchemy session object with valid connection to reeem database"""
    
    print('Please provide connection parameters to database:\n' +
              'Hit [Enter] to take defaults')
    host = '130.226.55.43' # input('host (default 130.226.55.43): ')
    port = '5432' # input('port (default 5432): ')
    database = 'reeem' # input("database name (default 'reeem'): ")
    user = 'reeem_vis' # input('user (default postgres): ')
    # password = input('password: ')
    password = getpass.getpass(prompt='password: ',
                                   stream=sys.stderr)
    con = create_engine(
            'postgresql://' + '%s:%s@%s:%s/%s' % (user,
                                                  password,
                                                  host,
                                                  port,
                                                  database)).connect()
    print('Password correct! Database connection established.')
    return con

# Access Data

This section establishes the database connection and asks for the **password**.<br>
The username can be changed in the corresponding function in the section **Database Connection** above.<br>
If you don't have a username or forgot your password please contact your database admins.

In [3]:
con = reeem_session()


The `stream` parameter of `getpass.getpass` will have no effect when using ipykernel



Please provide connection parameters to database:
Hit [Enter] to take defaults
password: ········
Password correct! Database connection established.


# View 0.1: Existing entries in one column

## Database Query

This section can be used to query one specific database **column** from one database table (**FROM**).<br>
The result is saved to a pandas.DataFrame (**df_0**) and printed.

In [14]:
# Database select (SQL)
sql = text("""
    SELECT  count(*) AS count
    FROM    model_draft.reeem_times_paneu_output""")
df = pd.read_sql_query(sql, con)
df.head(25)

Unnamed: 0,count
0,4858080


In [4]:
# Database select (SQL)
column = 'pathway' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {0}, count(*) AS count
    FROM    model_draft.reeem_times_paneu_output 
    GROUP BY {0} 
    ORDER BY {0}; """.format(column))
df = pd.read_sql_query(sql, con)
df.head(25)

Unnamed: 0,pathway,count
0,Base,1802768
1,Base(withRen.Target),421984
2,HighRES,1367376
3,Pilot,421992
4,Pilot2,421976
5,StorageInnov,421984


In [5]:
# Database select (SQL)
column = 'year' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {0}, count(*) AS count
    FROM    model_draft.reeem_times_paneu_output 
    GROUP BY {0} 
    ORDER BY {0}; """.format(column))
df = pd.read_sql_query(sql, con)
df

Unnamed: 0,year,count
0,2015,607260
1,2020,607260
2,2025,607260
3,2030,607260
4,2035,607260
5,2040,607260
6,2045,607260
7,2050,607260


In [6]:
# Database select (SQL)
column = 'region' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {0}, count(*) AS count
    FROM    model_draft.reeem_times_paneu_output 
    GROUP BY {0} 
    ORDER BY {0}; """.format(column))
df = pd.read_sql_query(sql, con)
df

Unnamed: 0,region,count
0,AT,168416
1,BE,168144
2,BG,167264
3,CY,167248
4,CZ,168240
5,DE,168680
6,DK,168152
7,EE,168064
8,ES,168064
9,EU28,152208


In [10]:
# Database select (SQL)
column = 'category' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {0}, count(*) AS count
    FROM    model_draft.reeem_times_paneu_output 
    GROUP BY {0} 
    ORDER BY {0}; """.format(column))
df = pd.read_sql_query(sql, con)
df

Unnamed: 0,category,count
0,Activity Bus,58696
1,Activity Car,58696
2,Activity Motorcycles,5104
3,Activity rail,5104
4,Activity Truck Heavy,58696
5,Activity Truck Light,58696
6,Average electricity price,24552
7,Biomass production,15312
8,CO2 prices,1160
9,Distric Heat price per timeslice industry high...,39168


In [20]:
# Database select (SQL)
column = 'category,indicator,nid' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {0}, count(*) AS count
    FROM    model_draft.reeem_times_paneu_output 
    GROUP BY {0} 
    ORDER BY {0}; """.format(column))
df_ind = pd.read_sql_query(sql, con)
df_ind

Unnamed: 0,category,indicator,nid,count
0,Activity Bus,Biodiesel,1036.0,8
1,Activity Bus,Biodiesel,1043.0,1392
2,Activity Bus,Biodiesel,1102.0,224
3,Activity Bus,Biodiesel,1124.0,32
4,Activity Bus,Biodiesel,1190.0,896
5,Activity Bus,Combined Combustion,1039.0,8
6,Activity Bus,Combined Combustion,1046.0,1392
7,Activity Bus,Combined Combustion,1105.0,224
8,Activity Bus,Combined Combustion,1127.0,32
9,Activity Bus,Combined Combustion,1193.0,896


In [21]:
# Save data to CSV
csvname = 'data/reeem_times_output_indicators.csv'
df_ind.to_csv(csvname, sep=';')
print("Data saved to file:", csvname)

Data saved to file: data/reeem_times_output_indicators.csv


# View 0.3: Table metadata
## Database Query

This section can be used to get the metadata from one database table.<br>
The result is printed.<br>

In [7]:
# Database select (SQL)
sql = text("""SELECT obj_description('model_draft.reeem_times_paneu_output'::regclass);""")
df_meta = pd.read_sql_query(sql, con).loc[:,'obj_description']
df_meta

0    {"title": "REEEM Times PanEU Output",\n    "de...
Name: obj_description, dtype: object

# View 5: All indicators for all regions in one pathway over time

## Database Query

This section can be used to query all **indicator** for all regions from one database table (_table_).<br>
It is possible to select one specific **pathway** (_filter 1_) and one specific data **version** (_filter 2_).<br>
To querry additional coulmns from the database table add the names to the **SELECT** statement (_column_).<br>
The sortation is done by the **ORDER BY** (_sorting_).<br>
The result from the database is saved to a pandas.DataFrame (**df_5**) and can be printed.

In [8]:
# Database select (SQL)
sql = text("""
    SELECT  id, nid, pathway, framework, version, 
        region, year, category, indicator, value, 
        unit, aggregation, updated  -- column
    FROM    model_draft.reeem_times_paneu_output  -- table
    WHERE pathway = 'Base'                        -- filter 1
        AND version = 'DataV1'                    -- filter 2
    ORDER BY pathway, version, region, year;      -- sorting """)
df_5 = pd.read_sql_query(sql, con)
df_5.head(5)

Unnamed: 0,id,nid,pathway,framework,version,region,year,category,indicator,value,unit,aggregation,updated
0,1281633,287,Base,FrameworkV1,DataV1,AT,2015,NEW Capacities Public and Industrial Power Pla...,Lignite,0.0,MW,True,2017-11-15 00:00:00+01:00
1,2991169,1087,Base,FrameworkV2,DataV1,AT,2015,Vehicle Stock Bus,Gasoline,0.001,1000 veh,False,2018-07-10 00:00:00+02:00
2,1291897,1599,Base,FrameworkV1,DataV1,AT,2015,Variable O&M costs industry by subsector,Lime,1.214,Million Euro,False,2017-11-15 00:00:00+01:00
3,1291905,1600,Base,FrameworkV1,DataV1,AT,2015,Variable O&M costs industry by subsector,Other non metallic minerals,272.815,Million Euro,False,2017-11-15 00:00:00+01:00
4,2986801,521,Base,FrameworkV2,DataV1,AT,2015,Electricity Production from Public and Industr...,Fuel Cell,0.0,TWh,False,2018-07-10 00:00:00+02:00


## Metadata

The important information from the above select (**df_5**) is collected in a Dictionary (**info_dict_5**).

In [9]:
# Facts dict
info_dict_5 = {}
info_dict_5['Updated'] = [x.strftime("%Y-%m-%d") for x in df_5.loc[:,'updated'].unique()]
info_dict_5['Filename'] = ['{0}_{1}_TIMESPanEU_{2}_{3}_Output' .format(
    info_dict_5['Updated'][0],
    df_5.loc[:,'pathway'].unique()[0],
    df_5.loc[:,'framework'].unique()[0],
    df_5.loc[:,'version'].unique()[0])]
#info_dict_5['Category'] = df_5.loc[:,'category'].unique()
#info_dict_5['Indicator'] = df_5.loc[:,'indicator'].unique()
#info_dict_5['Unit'] = df_5.loc[:,'unit'].unique()
info_dict_5['Pathway'] = df_5.loc[:,'pathway'].unique()
info_dict_5['Version'] = df_5.loc[:,'version'].unique()
info_dict_5['Year'] = df_5.loc[:,'year'].unique().tolist()
info_dict_5['Region'] = df_5.loc[:,'region'].unique()
#info_dict_5['Y-Axis'] = ['{} in {}'.format(*info_dict_5['Indicator'], *info_dict_5['Unit'])]
#info_dict_5['Title'] = ['{} in all regions'.format(*info_dict_5['Category'])]
info_dict_5['Metadata'] = df_meta

# Print facts
for x in info_dict_5:
    print(x,':',info_dict_5[x])

Updated : ['2017-11-15', '2018-07-10']
Filename : ['2017-11-15_Base_TIMESPanEU_FrameworkV1_DataV1_Output']
Pathway : ['Base']
Version : ['DataV1']
Year : [2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]
Region : ['AT' 'BE' 'BG' 'CY' 'CZ' 'DE' 'DK' 'EE' 'ES' 'EU28' 'FI' 'FR' 'GR' 'HR'
 'HU' 'IE' 'IT' 'LT' 'LU' 'LV' 'MT' 'NL' 'PL' 'PT' 'RO' 'SE' 'SI' 'SK'
 'UK']
Metadata : 0    {"title": "REEEM Times PanEU Output",\n    "de...
Name: obj_description, dtype: object


## Save results to files

The results are saved to cooresponding files in a folder named **data**.<br>
The **info_dict** is saved to a text file (txt).<br>
The **data** is saved to a table (csv).<br>

In [None]:
# Write facts to textfile
filename = ('data/{}.txt').format(*info_dict_5['Filename'])
with open(filename,'w') as tfile:
    for i in info_dict_5.keys():
        tfile.write(i + ": " + ', '.join([str(x) for x in info_dict_5[i]]) + "\n")
tfile.close()
print("Facts saved to file:", filename)

# Save data to CSV
csvname = 'data/{}.csv' .format(*info_dict_5['Filename'])
df_5.to_csv(csvname, sep=';')
print("Data saved to file:", csvname)