# Currency data API's

## Live Data extraction

Lambda layer: group2_currency_api

In [None]:
import requests
import pandas as pd
import json
from datetime import date, datetime, timedelta
import psycopg2
from psycopg2.extras import execute_values
import boto3
import os

In [None]:
# Load environment variables
DB_CONFIG_DATALAKE = {
    "host": os.getenv("var_host"),
    "port": os.getenv("var_port"),
    "database": os.getenv("var_database"),
    "user": os.getenv("var_user"),
    "password": os.getenv("var_password")
}

DB_CONFIG_DWH = {
    "host": os.getenv("var_host2"),
    "port": os.getenv("var_port"),
    "database": os.getenv("var_database2"),
    "user": os.getenv("var_user"),
    "password": os.getenv("var_password2")
}

In [None]:
def lambda_handler(event, context):
    try:
        # Setup date window (2 days ago)
        trade_day = date.today() - timedelta(days=2)
        print(f'trade_day: {trade_day}')
        start_time = datetime.combine(trade_day, datetime.min.time())
        print(f'start_time: {start_time}')
        end_time = start_time + timedelta(days=1)
        print(f'end_time: {end_time}')

        # Connect to source and target databases
        conn_src = psycopg2.connect(**DB_CONFIG_DATALAKE)
        conn_tgt = psycopg2.connect(**DB_CONFIG_DWH)
        cur_tgt = conn_tgt.cursor()

        # Ensure fact_energy_trade table exists
        cur_tgt.execute("""
        CREATE TABLE IF NOT EXISTS fact_energy_trade (
            trade_id BIGINT PRIMARY KEY,
            time_id INT,
            location_id INT,
            neighbor_country_id INT,
            direction TEXT,
            energy_value_gw FLOAT,
            exchange_rate FLOAT,
            value_chf FLOAT
        );
        """)
        conn_tgt.commit()

        # 1) Load and aggregate hourly energy CBET data
        sql_energy = """
        SELECT
        date_trunc('hour', timestamp) AS hour,
        country,
        SUM(value) AS energy_sum_gw
        FROM tbl_energy_cbet_data
        GROUP BY 1,2
        ORDER BY 1,2;
        """
        df_energy = pd.read_sql_query(sql_energy, conn_src)
        # drop aggregate "sum" rows that aren’t actual countries
        df_energy = df_energy[df_energy['country'].str.strip().str.lower() != 'sum']

        # 2) Load daily currency rates
        sql_currency = """
        SELECT
        DATE(timestamp) AS rate_date,
        exchange_rate
        FROM tbl_currency_data
        WHERE source_currency='CHF' AND target_currency='EUR'
        ;
        """
        df_curr = pd.read_sql_query(sql_currency, conn_src)
        # ensure one rate per day to avoid duplicate merges
        df_curr = df_curr.drop_duplicates(subset=['rate_date'])

        # 3) Prepare energy dataframe
        df_energy['direction'] = df_energy['energy_sum_gw'].apply(
            lambda x: 'import' if x>0 else ('export' if x<0 else 'none')
        )
        df_energy['energy_value_gw'] = df_energy['energy_sum_gw'].abs()
        df_energy['rate_date'] = pd.to_datetime(df_energy['hour']).dt.date

        # merge exchange rate
        df_energy = df_energy.merge(
            df_curr,
            left_on='rate_date', right_on='rate_date',
            how='left'
        )

        # 4) Map foreign keys
        # location_id always Switzerland = 6
        df_energy['location_id'] = 6

        # Fetch dim_countries for neighbor_country_id
        sql_country = "SELECT country_id, country_name_en FROM dim_countries;"
        df_country = pd.read_sql_query(sql_country, conn_tgt)
        df_energy = df_energy.merge(
            df_country,
            left_on='country', right_on='country_name_en',
            how='left'
        )
        df_energy.rename(columns={'country_id':'neighbor_country_id'}, inplace=True)

        # Fetch dim_time for time_id
        sql_time = "SELECT time_id, timestamp_utc FROM dim_time;"
        df_time = pd.read_sql_query(sql_time, conn_tgt)
        df_time['timestamp_utc'] = pd.to_datetime(df_time['timestamp_utc'])

        df_energy = df_energy.merge(
            df_time,
            left_on='hour', right_on='timestamp_utc',
            how='left'
        )

        # 5) Lookup hourly energy prices and calculate value in EUR
        sql_price = """
        SELECT
        date_trunc('hour', timestamp) AS hour,
        price
        FROM tbl_energy_price_data;
        """
        df_price = pd.read_sql_query(sql_price, conn_src)
        # ensure one price per hour to avoid duplicate merges
        df_price = df_price.drop_duplicates(subset=['hour'])
        df_price['hour'] = pd.to_datetime(df_price['hour']).dt.floor('H')
        df_price.rename(columns={'price':'price_eur_mwh'}, inplace=True)
        

        # merge hourly prices
        df_energy = df_energy.merge(
            df_price[['hour','price_eur_mwh']],
            on='hour', how='left'
        )

        # calculate value in EUR: price (EUR/MWh) * 1000 * energy_value_gw / exchange_rate (CHF-EUR)
        df_energy['value_chf'] = (df_energy['price_eur_mwh'] * 1000 * df_energy['energy_value_gw'] / df_energy['exchange_rate'])
        df_energy['value_chf'] = df_energy['value_chf'].round(2)

        # 6) Generate trade_id starting after current max
        cur_tgt.execute("SELECT COALESCE(MAX(trade_id),0) FROM fact_energy_trade;")
        max_id = cur_tgt.fetchone()[0]
        df_energy = df_energy.sort_values(['hour','neighbor_country_id'])
        df_energy['trade_id'] = range(max_id+1, max_id+1+len(df_energy))

        # 7) Build insert records
        records = df_energy[['trade_id','time_id','location_id',
                            'neighbor_country_id','direction',
                            'energy_value_gw','exchange_rate','value_chf']].values.tolist()

            # Batch insert (convert numpy types to native Python types)
        insert_sql = ("INSERT INTO fact_energy_trade "
            "(trade_id, time_id, location_id, neighbor_country_id, direction, energy_value_gw, exchange_rate, value_chf) "
            "VALUES %s"
        )
        # Convert to list of tuples
        records = df_energy[['trade_id','time_id','location_id',
                            'neighbor_country_id','direction',
                            'energy_value_gw','exchange_rate','value_chf']].values.tolist()
        # Ensure each value is a Python scalar
        clean_records = []
        for row in records:
            clean_row = []
            for v in row:
                # numpy types: use item(), else keep
                clean_row.append(v.item() if hasattr(v, 'item') else v)
            clean_records.append(tuple(clean_row))

        execute_values(cur_tgt, insert_sql, clean_records)
        conn_tgt.commit()

        # Close connections
        cur_tgt.close()
        conn_src.close()
        conn_tgt.close()

        return { 'status':'success', 'rows_inserted': len(clean_records) }


    except Exception as e:
        return {
            'statusCode': 500,
            'body': json.dumps(f'Error: {str(e)}')
        }
