# Header

<img src="http://reiner-lemoine-institut.de//wp-content/uploads/2015/09/rlilogo.png" width="100" style="float: right">

__copyright__ 	= "© Reiner Lemoine Institut" <br>
__license__ 	= "GNU Affero General Public License Version 3 (AGPL-3.0)" <br>
__url__ 		= "https://www.gnu.org/licenses/agpl-3.0.en.html" <br>
__author__ 		= "Ludwig Hülk" <br>

# Import

In [None]:
import sys
import os
import getpass
import pandas as pd
import numpy as np
from sqlalchemy import *
# plot
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import plotly.graph_objs as go
import plotly.offline as pltly
import colorlover as cl
import seaborn as sns
# notebook
from IPython.display import Image
from IPython.core.display import HTML 

pltly.init_notebook_mode(connected=True)
%matplotlib inline

version = 'v0.1.3 (jupyter)'
project = 'REEEM'

# Database Connection

This function creates a database connection to the **reeem_db**.<br>
The default user is **reeem_vis**, a user that has only read rights.

In [None]:
def reeem_session():
    """SQLAlchemy session object with valid connection to reeem database"""
    
    print('Please provide connection parameters to database:\n' +
              'Hit [Enter] to take defaults')
    host = '130.226.55.43' # input('host (default 130.226.55.43): ')
    port = '5432' # input('port (default 5432): ')
    database = 'reeem' # input("database name (default 'reeem'): ")
    user = 'reeem_vis' # input('user (default postgres): ')
    # password = input('password: ')
    password = getpass.getpass(prompt='password: ',
                                   stream=sys.stderr)
    con = create_engine(
            'postgresql://' + '%s:%s@%s:%s/%s' % (user,
                                                  password,
                                                  host,
                                                  port,
                                                  database)).connect()
    print('Password correct! Database connection established.')
    return con

# Access Data

This section establishes the database connection and asks for the **password**.<br>
The username can be changed in the corresponding function in the section **Database Connection** above.<br>
If you don't have a username or forgot your password please contact your database admins.

In [None]:
con = reeem_session()

# Table Info

## Database Query

This section is used to query one specific database table (**db_table**).<br>
The result is saved to a pandas.DataFrame (**df**) and printed.

In [None]:
# Number of entries in table
db_table = 'model_draft.reeem_times_paneu_output'
sql = text("""
    SELECT  count(*) AS count
    FROM    {0}""".format(db_table))
df = pd.read_sql_query(sql, con)
df

In [None]:
# Pathways
column = 'pathway' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {1}, count(*) AS count
    FROM    {0}
    GROUP BY {1} 
    ORDER BY {1}; """.format(db_table,column))
df = pd.read_sql_query(sql, con)
df

In [None]:
# Years
column = 'year' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {1}, count(*) AS count
    FROM    {0} 
    GROUP BY {1} 
    ORDER BY {1}; """.format(db_table,column))
df = pd.read_sql_query(sql, con)
df

In [None]:
# Regions
column = 'region' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {1}, count(*) AS count
    FROM    {0}
    GROUP BY {1} 
    ORDER BY {1}; """.format(db_table,column))
df = pd.read_sql_query(sql, con)
df

In [None]:
# Categories
column = 'category' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {1}, count(*) AS count
    FROM    {0}
    GROUP BY {1} 
    ORDER BY {1}; """.format(db_table,column))
df = pd.read_sql_query(sql, con)
df

In [None]:
# Indicators
column = 'category,indicator,nid' # id, pathway, version, region, year, indicator, category, value, unit
sql = text("""
    SELECT  {1}, count(*) AS count
    FROM    {0}
    GROUP BY {1} 
    ORDER BY {1}; """.format(db_table,column))
df_ind = pd.read_sql_query(sql, con)
df_ind

In [None]:
# Save data to CSV
csvname = 'data/reeem_times_output_indicators.csv'
df_ind.to_csv(csvname, sep=';')
print("Data saved to file:", csvname)

# View 0.3: Table metadata
## Database Query

This section can be used to get the metadata from one database table.<br>
The result is printed.<br>

In [None]:
# Database select (SQL)
sql = text("""SELECT obj_description('{0}'::regclass);""".format(db_table))
df_meta = pd.read_sql_query(sql, con).loc[:,'obj_description']
df_meta

# View 5: All indicators for all regions in one pathway over time

## Database Query

This section can be used to query all **indicator** for all regions from one database table (_table_).<br>
It is possible to select one specific **pathway** (_filter 1_) and one specific data **version** (_filter 2_).<br>
To querry additional coulmns from the database table add the names to the **SELECT** statement (_column_).<br>
The sortation is done by the **ORDER BY** (_sorting_).<br>
The result from the database is saved to a pandas.DataFrame (**df_5**) and can be printed.

In [None]:
# Database select (SQL)
sql = text("""
    SELECT  id, nid, pathway, framework, version, 
        region, year, category, indicator, value, 
        unit, aggregation, updated  -- column
    FROM    model_draft.reeem_times_paneu_output  -- table
    WHERE pathway = 'Base'                        -- filter 1
        AND version = 'DataV1'                    -- filter 2
    ORDER BY pathway, version, region, year;      -- sorting """)
df_5 = pd.read_sql_query(sql, con)
df_5.head(5)

## Metadata

The important information from the above select (**df_5**) is collected in a Dictionary (**info_dict_5**).

In [None]:
# Facts dict
info_dict_5 = {}
info_dict_5['Updated'] = [x.strftime("%Y-%m-%d") for x in df_5.loc[:,'updated'].unique()]
info_dict_5['Filename'] = ['{0}_{1}_TIMESPanEU_{2}_{3}_Output' .format(
    info_dict_5['Updated'][0],
    df_5.loc[:,'pathway'].unique()[0],
    df_5.loc[:,'framework'].unique()[0],
    df_5.loc[:,'version'].unique()[0])]
#info_dict_5['Category'] = df_5.loc[:,'category'].unique()
#info_dict_5['Indicator'] = df_5.loc[:,'indicator'].unique()
#info_dict_5['Unit'] = df_5.loc[:,'unit'].unique()
info_dict_5['Pathway'] = df_5.loc[:,'pathway'].unique()
info_dict_5['Version'] = df_5.loc[:,'version'].unique()
info_dict_5['Year'] = df_5.loc[:,'year'].unique().tolist()
info_dict_5['Region'] = df_5.loc[:,'region'].unique()
#info_dict_5['Y-Axis'] = ['{} in {}'.format(*info_dict_5['Indicator'], *info_dict_5['Unit'])]
#info_dict_5['Title'] = ['{} in all regions'.format(*info_dict_5['Category'])]
info_dict_5['Metadata'] = df_meta

# Print facts
for x in info_dict_5:
    print(x,':',info_dict_5[x])

## Save results to files

The results are saved to cooresponding files in a folder named **data**.<br>
The **info_dict** is saved to a text file (txt).<br>
The **data** is saved to a table (csv).<br>

In [None]:
# Write facts to textfile
filename = ('data/{}.txt').format(*info_dict_5['Filename'])
with open(filename,'w') as tfile:
    for i in info_dict_5.keys():
        tfile.write(i + ": " + ', '.join([str(x) for x in info_dict_5[i]]) + "\n")
tfile.close()
print("Facts saved to file:", filename)

# Save data to CSV
csvname = 'data/{}.csv' .format(*info_dict_5['Filename'])
df_5.to_csv(csvname, sep=';')
print("Data saved to file:", csvname)