In [3]:
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone, date
from decouple import config
import string

In [4]:
# Credentials Data lake
dluname = config('DBUNAME')
dlpwd = config('DBPWD')
dlhost = config('HOST')
dldbname = config('DBNAME')
port = config('PORT')

# Credentials Datawarehouse
dwhuname = config('DWHUNAME')
dwhpwd = config('DWHPWD')
dwhhost = config('DWHHOST')
dwhdbname = config('DWHDBNAME')

In [10]:
def fetch_load_data(cur, asset, method):
    
    #define variables for fetching data
    table_name_str = "'table_yf_"+asset+"'"
    table_name = 'table_yf_'+asset
    
    df = pd.DataFrame()
    
 
    while True:
        #column names of the table in the data lake
        try:
            cur.execute("select COLUMN_NAME from information_schema.columns where table_name="+table_name_str)
            column_names = [row[0] for row in cur]
        
            col_ind_date = column_names.index('date')
            col_ind_open = column_names.index('Open')
            col_ind_high = column_names.index('High')
            col_ind_low = column_names.index('Low')
            col_ind_close = column_names.index('Close')
            col_ind_vol = column_names.index('Volume')
            col_ind_asset = column_names.index('Ins_label')      
        except Exception as e: 
            print("Column indexes could no be defined")
            print(e)
            break    
    
        #fetch data in data lake
        try:             
            cur.execute("SELECT * FROM "+table_name+" WHERE date = (SELECT MAX(date) FROM "+table_name+");")
            result = cur.fetchall()
        except psycopg2.Error as e: 
            print("Error: select *")
            print(e)
            break
   
        #build dataframe 
        try:
            for row in result:
                df = df.append(pd.DataFrame([{'date': row[col_ind_date], 
                                    'asset': row[col_ind_asset],
                                     'open': row[col_ind_open],
                                     'high': row[col_ind_high],
                                     'low': row[col_ind_low],
                                     'close': row[col_ind_close],
                                     'volume': row[col_ind_vol]}]))   
        except Exception as e: 
            print("Dataframe could not builded")
            print(e)
    
    
        #check if data of yesterday is available and correct it if necessary 
        yesterday = datetime.strftime(datetime.today() - timedelta(1), "%Y-%m-%d")
        date_df = df['date'].values[0]
        date_df = np.datetime_as_string(date_df, unit='D')
        
        if date_df != yesterday:
            df['date'] = datetime.strptime(yesterday, "%Y-%m-%d").date()
        else:
            pass        
        
        #define loading variables 
        dwhtblname = 'finance'
        if_ex_val = method
    
        #load data
        try:
            conn_string = 'postgresql://'+dwhuname+':'+dwhpwd+'@'+dwhhost+':'+port+'/'+dwhdbname
            engine = create_engine(conn_string)
            df.to_sql(dwhtblname, conn_string, if_exists = if_ex_val, index=False)
            print(table_name+" loaded")
        except Exception as e:
            print(e)
            print("Data load failed: " + table_name)
            break
        break    
    

In [6]:
def close_conn_to_dl(cur, conn):
    try:
        cur.close()
        conn.close()
        print("connection closed")
    except psycopg2.Error as e: 
        print("Error: Could not close")
        print(e)    

In [7]:
def conn_to_dl():
    try: 
        conn = psycopg2.connect("host=" + dlhost + " dbname=" + dldbname + " user=" + dluname + " password="+dlpwd)
    except psycopg2.Error as e: 
        print("Error: Could not make connection to the Postgres database")
        print(e)
    
    try: 
        cur = conn.cursor()
    except psycopg2.Error as e: 
        print("Error: Could not get curser to the Database")
        print(e)
    
    # Auto commit
    conn.set_session(autocommit=True)
    print("connected")
    return cur, conn

In [8]:
assets={'gold','silver','msci_world','euro_stoxx','smi','nasdaq'}

In [11]:
def main():
    
    #open connection
    cur, conn = conn_to_dl()
    
    # process data
    for asset in assets:
        fetch_load_data(cur, asset,'append')  
    
    #close connection
    close_conn_to_dl(cur, conn)    

if __name__ == "__main__":
    main()

connected
table_yf_silver loaded
table_yf_euro_stoxx loaded
table_yf_gold loaded
table_yf_smi loaded
table_yf_msci_world loaded
table_yf_nasdaq loaded
connection closed
