# Code Documentation of Group2 | Datalake and Datawarehouse

## Lamdba Functions for API

### group2_currency_api

In [None]:
import json
import pandas as pd
import datetime as dt
import requests
import psycopg2
import os
import boto3
from psycopg2.extras import execute_values

# Get Environment Variables

endpoint = os.getenv('ENDPOINT')
db_name = os.getenv('DB_NAME')
username = os.getenv('USERNAME')
password = os.getenv('PASSWORD')
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID_INTERNAL')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY_INTERNAL')
bucket_name = os.getenv('BUCKET_NAME')



def upload_dataframe_to_db(df, table_name, conn):
    cur = conn.cursor()
    
    # Replace invalid characters in column names and convert index to a column
    df = df.copy()
    df.index.name = 'timestamp'
    df.reset_index(inplace=True)
    df.columns = [col.lower().replace(" ", "_").replace("-", "_") for col in df.columns]
    
    # Generate CREATE TABLE query
    column_defs = ', '.join([
        f"{col} {'timestamp' if col == 'timestamp' else ('text' if col in ['unit', 'country'] else 'float')}"
        for col in df.columns
    ])
    create_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({column_defs});"
    cur.execute(create_query)

    # INSERT query
    placeholders = ', '.join(['%s'] * len(df.columns))
    insert_query = f"INSERT INTO {table_name} ({', '.join(df.columns)}) VALUES ({placeholders})"
    
    # Convert NaN to None for SQL compatibility
    values = [tuple(None if pd.isna(x) else x for x in row) for row in df.values]
    cur.executemany(insert_query, values)
    
    cur.close()

def upload_dataframe_to_bucket(df, foldername,s3 ,bucket_name):
    today = str(dt.datetime.today().date())
    key = foldername + '/' + foldername + '_' + today + '.json'  # Key = path in the bucket

    data = df.reset_index().to_json(orient="records", date_format="iso")

    s3.put_object(
        Bucket=bucket_name,
        Key=key,
        Body=json.dumps(data),
        ContentType='application/json'
    )

def fetch_cbet_data(date: str) -> dict:
    base_url = "https://api.energy-charts.info/cbet"
    params = {
        "country": "ch",
        "start": date
    }
    headers = {
        'accept': 'application/json'
    }

    response = requests.get(base_url, params=params, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error during requesting CBET-Data for {date}: {response.status_code}")
        return {}
    

def extract_cbet_data(json_data: dict) -> pd.DataFrame:
    idx = (pd.to_datetime(json_data["unix_seconds"], unit="s", utc=True)
             .tz_convert("Europe/Zurich"))
    idx.name = "timestamp"

    frames = []

    for country in json_data.get("countries", []):
        values = country.get("data", [])
        if len(values) != len(idx):
            # length guard – optional but helpful for debugging mismatches
            raise ValueError(
                f"Length mismatch for {country.get('name')}: "
                f"{len(values)} values vs {len(idx)} timestamps"
            )
        df = pd.DataFrame(
            {"value": values, "country": country.get("name")},
            index=idx
        )
        frames.append(df)

    return pd.concat(frames).reset_index()

def insert_cbet_data_to_db(df_cbet, table_name, conn):
    cur = conn.cursor()
    df_cbet = df_cbet.fillna(0)

    column_defs = ', '.join([
        f"{col} {'timestamp' if col == 'timestamp' else ('text' if col in ['unit', 'country'] else 'float')}"
        for col in df_cbet.columns
        ])

    create_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({column_defs});"
    cur.execute(create_query)

    insert_query = f"""
    INSERT INTO {table_name} (timestamp, country, value)
    VALUES %s
    """
    data_tuples = [
        (row['timestamp'], row['country'], row['value'])
        for _, row in df_cbet.iterrows()
    ]

    try:
        execute_values(cur, insert_query, data_tuples)
        conn.commit()
        print("CBET-Data successfully implemented.")
    except Exception as e:
        print(f"Error in implementing CBET-Data: {e}")
        conn.rollback()
    finally:
        cur.close()

def lambda_handler(event, context):
    date = dt.datetime.today().date()
    start_date = str(date-dt.timedelta(days=1))

    api_result_power = requests.get(f'https://api.energy-charts.info/public_power?country=ch&start={start_date}')
    api_result_prices = requests.get(f'https://api.energy-charts.info/price?bzn=CH&start={start_date}')

    api_response_power = api_result_power.json()
    api_response_prices = api_result_prices.json()

    cbet_json = fetch_cbet_data(start_date)

    api_df_power = pd.DataFrame(api_response_power["production_types"])
    api_df_power_t = pd.DataFrame(api_df_power['data'].tolist()).T
    api_df_power_t.columns = api_df_power['name']
    api_df_power_t.index = pd.to_datetime(api_response_power["unix_seconds"], unit='s', utc=True).tz_convert("Europe/Zurich")

    api_df_price = pd.DataFrame(api_response_prices["price"])
    api_df_price['Unit'] = api_response_prices["unit"]
    api_df_price.columns = ['Price', 'Unit']
    api_df_price.index = pd.to_datetime(api_response_prices["unix_seconds"], unit='s', utc=True).tz_convert("Europe/Zurich")

    api_df_cbet = extract_cbet_data(cbet_json)

    try:
        print("Connecting to DB & Bucket...")
        conn = psycopg2.connect(
            host=endpoint,
            dbname=db_name,
            user=username,
            password=password
        )
        conn.set_session(autocommit=True)

        print("Connection to DB successful.")

        s3 = boto3.client('s3',
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
            )

        print("Connection to Bucket successful.")
                
        # Upload DataFrames
        upload_dataframe_to_db(api_df_power_t, "tbl_energy_production_data", conn)
        upload_dataframe_to_db(api_df_price, "tbl_energy_price_data", conn)
        insert_cbet_data_to_db(api_df_cbet, "tbl_energy_cbet_data", conn)
        
        

        upload_dataframe_to_bucket(api_df_power_t, "energy_production",s3,bucket_name)
        upload_dataframe_to_bucket(api_df_price, "energy_price",s3,bucket_name)
        upload_dataframe_to_bucket(api_df_cbet, "energy_cbet",s3,bucket_name)

        conn.close()
        print("Data uploaded and connection closed.")

    except Exception as e:
        print("Error:", e)

    

    


    return {
        'statusCode': 200,
        'body': json.dumps("Inport succesfully")
    }


### group2_energy_api

In [None]:
import json
import pandas as pd
import datetime as dt
import requests
import psycopg2
import os
import boto3
from psycopg2.extras import execute_values

# Get Environment Variables

endpoint = os.getenv('ENDPOINT')
db_name = os.getenv('DB_NAME')
username = os.getenv('USERNAME')
password = os.getenv('PASSWORD')
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID_INTERNAL')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY_INTERNAL')
bucket_name = os.getenv('BUCKET_NAME')



def upload_dataframe_to_db(df, table_name, conn):
    cur = conn.cursor()

    # Replace invalid characters in column names and convert index to a column
    df = df.copy()
    df.index.name = 'timestamp'
    df.reset_index(inplace=True)
    df.columns = [col.lower().replace(" ", "_").replace("-", "_") for col in df.columns]

    # Generate CREATE TABLE query
    column_defs = ', '.join([
        f"{col} {'timestamp' if col == 'timestamp' else ('text' if col == 'unit' else 'float')}"
        for col in df.columns
    ])
    create_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({column_defs});"
    cur.execute(create_query)

    # INSERT query
    placeholders = ', '.join(['%s'] * len(df.columns))
    insert_query = f"INSERT INTO {table_name} ({', '.join(df.columns)}) VALUES ({placeholders})"

    # Convert NaN to None for SQL compatibility
    values = [tuple(None if pd.isna(x) else x for x in row) for row in df.values]
    cur.executemany(insert_query, values)

    cur.close()

def upload_dataframe_to_bucket(df, foldername,s3 ,bucket_name):
    today = str(dt.datetime.today().date())
    key = foldername + '/' + foldername + '_' + today + '.json'  # Key = path in the bucket

    data = df.reset_index().to_json(orient="records", date_format="iso")

    s3.put_object(
        Bucket=bucket_name,
        Key=key,
        Body=json.dumps(data),
        ContentType='application/json'
    )

def fetch_cbet_data(date: str) -> dict:
    base_url = "https://api.energy-charts.info/cbet"
    params = {
        "country": "ch",
        "start": date
    }
    headers = {
        'accept': 'application/json'
    }

    response = requests.get(base_url, params=params, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error during requesting CBET-Data for {date}: {response.status_code}")
        return {}


def extract_cbet_data(json_data: dict) -> pd.DataFrame:
    records = []
    timestamps = json_data.get('unix_seconds', [])
    for country in json_data.get('countries', []):
        country_name = country.get('name')
        values = country.get('data', [])

        for ts, value in zip(timestamps, values):
            dtime = dt.datetime.utcfromtimestamp(ts)
            records.append({
                'timestamp': dtime,
                'country': country_name,
                'value': value
            })
            df_records = pd.DataFrame(records)
    return df_records

def insert_cbet_data_to_db(df_cbet, table_name, conn):
    cur = conn.cursor()
    df_cbet = df_cbet.fillna(0)

    column_defs = ', '.join([
        f"{col} {'timestamp' if col == 'timestamp' else ('text' if col == 'country' else 'float')}"
        for col in df_cbet.columns
        ])

    create_query = f"CREATE TABLE IF NOT EXISTS {table_name} ({column_defs});"
    cur.execute(create_query)

    insert_query = f"""
    INSERT INTO {table_name} (timestamp, country, value)
    VALUES %s
    """
    data_tuples = [
        (row['timestamp'], row['country'], row['value'])
        for _, row in df_cbet.iterrows()
    ]

    try:
        execute_values(cur, insert_query, data_tuples)
        conn.commit()
        print("CBET-Data successfully implemented.")
    except Exception as e:
        print(f"Error in implementing CBET-Data: {e}")
        conn.rollback()
    finally:
        cur.close()

def lambda_handler(event, context):
    date = dt.datetime.today().date()
    start_date = str(date-dt.timedelta(days=1))

    api_result_power = requests.get(f'https://api.energy-charts.info/public_power?country=ch&start={start_date}')
    api_result_prices = requests.get(f'https://api.energy-charts.info/price?bzn=CH&start={start_date}')

    api_response_power = api_result_power.json()
    api_response_prices = api_result_prices.json()

    cbet_json = fetch_cbet_data(start_date)

    api_df_power = pd.DataFrame(api_response_power["production_types"])
    api_df_power_t = pd.DataFrame(api_df_power['data'].tolist()).T
    api_df_power_t.columns = api_df_power['name']
    api_df_power_t.index = pd.to_datetime(api_response_power["unix_seconds"], unit='s')

    api_df_price = pd.DataFrame(api_response_prices["price"])
    api_df_price['Unit'] = api_response_prices["unit"]
    api_df_price.columns = ['Price', 'Unit']
    api_df_price.index = pd.to_datetime(api_response_prices["unix_seconds"], unit='s')

    api_df_cbet = extract_cbet_data(cbet_json)

    try:
        print("Connecting to DB & Bucket...")
        conn = psycopg2.connect(
            host=endpoint,
            dbname=db_name,
            user=username,
            password=password
        )
        conn.set_session(autocommit=True)

        print("Connection to DB successful.")

        s3 = boto3.client('s3',
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key
            )

        print("Connection to Bucket successful.")

        # Upload DataFrames
        upload_dataframe_to_db(api_df_power_t, "tbl_energy_production_data", conn)
        upload_dataframe_to_db(api_df_price, "tbl_energy_price_data", conn)
        insert_cbet_data_to_db(api_df_cbet, "tbl_energy_cbet_data", conn)



        upload_dataframe_to_bucket(api_df_power_t, "energy_production",s3,bucket_name)
        upload_dataframe_to_bucket(api_df_price, "energy_price",s3,bucket_name)
        upload_dataframe_to_bucket(api_df_cbet, "energy_cbet",s3,bucket_name)

        conn.close()
        print("Data uploaded and connection closed.")

    except Exception as e:
        print("Error:", e)






    return {
        'statusCode': 200,
        'body': json.dumps("Inport succesfully")
    }


### group2_weather_api

In [None]:
import requests
import pandas as pd
import json
from datetime import datetime, timedelta
import psycopg2
from psycopg2.extras import execute_values
import boto3
import os

# Env Variable
#db_user = os.getenv("DB_USER")
var_apikey = os.getenv("var_apikey")
var_database = os.getenv("var_database")
var_host = os.getenv("var_host")
var_password = os.getenv("var_password")
var_port = os.getenv("var_port")
var_user = os.getenv("var_user")
var_bucketname = os.getenv("var_bucketname")
var_aws_access_key_id = os.getenv("var_aws_access_key_id")
var_aws_secret_access_key = os.getenv("var_aws_secret_access_key")


# === PARAMETER ============================================
DAYS_BACK = 1  # defining the amount of days from yesterday backwards

# configuration parameter for db and bucket
BUCKET_NAME = var_bucketname
DB_CONFIG = {
    "host": var_host,
    "port": var_port,
    "database": var_database,
    "user": var_user,
    "password": var_password
}
API_KEY = var_apikey
AWS_ACCESS_KEY_ID = var_aws_access_key_id
AWS_SECRET_ACCESS_KEY = var_aws_secret_access_key

# ==========================================================

# Definition of weather station location
def get_city_data():
    return  [
    {"City": "Aarau", "Latitude": 47.392715, "Longitude": 8.044445, "Canton": "Aargau"},
    {"City": "Baden", "Latitude": 47.473683, "Longitude": 8.308682, "Canton": "Aargau"},
    {"City": "Basel", "Latitude": 47.558108, "Longitude": 7.587826, "Canton": "Basel-City"},
    {"City": "Bern", "Latitude": 46.948474, "Longitude": 7.452175, "Canton": "Bern"},
    {"City": "Chur", "Latitude": 46.854747, "Longitude": 9.526490, "Canton": "Grisons"},
    {"City": "Frauenfeld", "Latitude": 47.556191, "Longitude": 8.896335, "Canton": "Thurgau"},
    {"City": "Genf", "Latitude": 46.201756, "Longitude": 6.146601, "Canton": "Geneva"},
    {"City": "Lausanne", "Latitude": 46.521827, "Longitude": 6.632702, "Canton": "Vaud"},
    {"City": "Lugano", "Latitude": 46.005010, "Longitude": 8.952028, "Canton": "Ticino"},
    {"City": "Luzern", "Latitude": 47.050545, "Longitude": 8.305468, "Canton": "Lucerne"},
    {"City": "Neuenburg", "Latitude": 46.989583, "Longitude": 6.929264, "Canton": "Neuchâtel"},
    {"City": "Schaffhausen", "Latitude": 47.696049, "Longitude": 8.634513, "Canton": "Schaffhausen"},
    {"City": "Sion", "Latitude": 46.231175, "Longitude": 7.358879, "Canton": "Wallis"},
    {"City": "Solothurn", "Latitude": 47.208135, "Longitude": 7.538405, "Canton": "Solothurn"},
    {"City": "St. Gallen", "Latitude": 47.425059, "Longitude": 9.376588, "Canton": "St. Gallen"},
    {"City": "Winterthur", "Latitude": 47.499172, "Longitude": 8.729150, "Canton": "Zurich"},
    {"City": "Zug", "Latitude": 47.167990, "Longitude": 8.517365, "Canton": "Zug"},
    {"City": "Zürich", "Latitude": 47.374449, "Longitude": 8.541042, "Canton": "Zurich"}
    ]

# Sending request do weather API
def fetch_weather_data(city, lat, lon, start, end, api_key):
    url = f"https://history.openweathermap.org/data/2.5/history/city?lat={lat}&lon={lon}&type=hour&start={start}&end={end}&appid={api_key}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error on {city}: {response.status_code}")
        return None
    return response.json()

def upload_to_s3(s3_client, city, date_str, data):
    object_key = f"weather_data/weatherdata_{city.lower()}_{date_str}.json"
    s3_client.put_object(
        Bucket=BUCKET_NAME,
        Key=object_key,
        Body=json.dumps(data, ensure_ascii=False).encode('utf-8'),
        ContentType='application/json'
    )

# Extracting information from json into a list -> for dataframe
def extract_weather_data(city, weather_json):
    entries = []
    for entry in weather_json.get("list", []):
        dt = datetime.utcfromtimestamp(entry["dt"])
        main = entry["main"]
        wind = entry.get("wind", {})
        weather = entry["weather"][0]
        rain = entry.get("rain", {}).get("1h", 0.0)
        clouds = entry.get("clouds", {}).get("all", 0)
        entries.append({
            'City': city,
            'datetime': dt,
            'temp': main['temp'],
            'pressure': main['pressure'],
            'humidity': main['humidity'],
            'temp_min': main['temp_min'],
            'temp_max': main['temp_max'],
            'wind_speed': wind.get('speed', 0.0),
            'weather_main': weather['main'],
            'weather_description': weather['description'],
            'rain_1h': rain,
            'clouds_all': clouds
        })
    return entries

# Connecting to db
def connect_to_db():
    return psycopg2.connect(**DB_CONFIG)

# Inserting data into db
def insert_weather_data(conn, df_weather):
    cur = conn.cursor()
    df_weather = df_weather.fillna(0)
    data_tuples = [
        (
            row['City'],
            row['datetime'],
            row['temp'],
            row['pressure'],
            row['humidity'],
            row['temp_min'],
            row['temp_max'],
            row['wind_speed'],
            row['weather_main'],
            row['weather_description'],
            row['rain_1h'],
            row['clouds_all']
        )
        for _, row in df_weather.iterrows()
    ]
    insert_query = """
    INSERT INTO tbl_weather_data (
        city, datetime, temp, pressure, humidity,
        temp_min, temp_max, wind_speed,
        weather_main, weather_description, rain_1h, clouds_all
    ) VALUES %s
    """
    try:
        execute_values(cur, insert_query, data_tuples)
        conn.commit()
        print("Data successfully inserted.")
    except Exception as e:
        print(f"Error during inserting data: {e}")
        conn.rollback()
    finally:
        cur.close()

# Running API request and data processing
def lambda_handler(event, context):
    df_coords = pd.DataFrame(get_city_data())
    s3 = boto3.client('s3',
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY
    )
    END_DATE = datetime.utcnow() - timedelta(days=1)  # yesterday
    all_data = []
    for days_ago in range(DAYS_BACK):
        target_day = END_DATE - timedelta(days=days_ago)
        start = int(datetime(target_day.year, target_day.month, target_day.day, 0, 0).timestamp())
        end = int(datetime(target_day.year, target_day.month, target_day.day, 23, 59).timestamp())
        date_str = target_day.strftime('%Y%m%d')
        for _, row in df_coords.iterrows():
            city = row["City"]
            lat = row["Latitude"]
            lon = row["Longitude"]
            weather_json = fetch_weather_data(city, lat, lon, start, end, API_KEY)
            if not weather_json:
                continue
            upload_to_s3(s3, city, date_str, weather_json)
            city_data = extract_weather_data(city, weather_json)
            all_data.extend(city_data)


    # Writing processed data into db
    df_weather = pd.DataFrame(all_data)
    if not df_weather.empty:
        try:
            conn = connect_to_db()
            insert_weather_data(conn, df_weather)
        except Exception as e:
            print(f"Connection error: {e}")
        finally:
            if conn:
                conn.close()
    else:
        print("No weather data available.")


## Lamdba Functions for Data Processing in Datawarehouse

### group2_dwh_fact_energy_production

In [None]:
import requests
import pandas as pd
import json
from datetime import date, datetime, timedelta
import psycopg2
from psycopg2.extras import execute_values
import boto3
import os

# Load environment variables for both databases
DB_CONFIG = {
    "host": os.getenv("var_host"),
    "port": os.getenv("var_port"),
    "database": os.getenv("var_database"),
    "user": os.getenv("var_user"),
    "password": os.getenv("var_password")
}

DB_CONFIG2 = {
    "host": os.getenv("var_host2"),
    "port": os.getenv("var_port"),
    "database": os.getenv("var_database2"),
    "user": os.getenv("var_user"),
    "password": os.getenv("var_password2")
}


def lambda_handler(event, context):
    try:
        # Establish database connections
        conn1 = psycopg2.connect(**DB_CONFIG)
        conn2 = psycopg2.connect(**DB_CONFIG2)
        cur2 = conn2.cursor()

        # Define the date for which to fetch data (2 days before today)
        yesterday = date.today() - timedelta(days=2)

        # Query data from the data lake
        query_energy = f"""
            SELECT *
            FROM tbl_energy_production_data
            WHERE timestamp >= '{yesterday}'::date
            AND timestamp < '{yesterday + timedelta(days=1)}'::date
        """

        df_energy = pd.read_sql_query(query_energy, conn1)

        # Load dimension tables from the data warehouse
        df_locations = pd.read_sql_query("SELECT * FROM dim_locations", conn2)
        df_countries = pd.read_sql_query("SELECT * FROM dim_countries", conn2)
        df_time = pd.read_sql_query("SELECT * FROM dim_time", conn2)

        # Merge with time dimension to get time_id
        df_merged = df_energy.merge(
            df_time[['time_id', 'timestamp_utc']],
            left_on='timestamp',
            right_on='timestamp_utc',
            how='left'
        )

        # Select and rename relevant columns
        df_fact = df_merged[[
            'time_id', 'timestamp', 'solar', 'wind_onshore', 'load',
            'renewable_share_of_load', 'residual_load'
        ]].copy()
        df_fact.rename(columns={
            'solar': 'solar_output',
            'wind_onshore': 'wind_output'
        }, inplace=True)

        # Add country_id for Switzerland
        switzerland_id = df_countries.loc[
            df_countries['iso_code'] == 'CH', 'country_id'
        ].values[0]
        df_fact.insert(
            loc=df_fact.columns.get_loc('time_id') + 1,
            column='country_id',
            value=switzerland_id
        )

        # Fetch existing time_ids from the target table to avoid duplicates
        cur2.execute("SELECT time_id FROM fact_energy_production")
        existing_time_ids = {row[0] for row in cur2.fetchall()}

        # Keep only records with time_ids not already in the table
        df_fact = df_fact[~df_fact['time_id'].isin(existing_time_ids)]

        # If no new data to insert, exit early
        if df_fact.empty:
            cur2.close()
            conn1.close()
            conn2.close()
            return {
                'statusCode': 200,
                'body': json.dumps('No new records to insert.')
            }

        # Get the current maximum production_id to generate unique keys
        cur2.execute("SELECT COALESCE(MAX(production_id), 0) FROM fact_energy_production")
        max_production_id = cur2.fetchone()[0]

        # Add production_id as a running index starting from the max + 1
        df_fact.insert(0, 'production_id', range(max_production_id + 1, max_production_id + 1 + len(df_fact)))

        # Prepare columns and values for insertion
        columns = [
            "production_id", "time_id", "country_id", "timestamp", "solar_output",
            "wind_output", "load", "renewable_share_of_load", "residual_load"
        ]
        values = [tuple(x) for x in df_fact[columns].to_numpy()]

        # Create SQL INSERT statement
        insert_sql = f"""
        INSERT INTO fact_energy_production ({', '.join(columns)})
        VALUES %s
        """

        # Execute batch insert using execute_values for efficiency
        execute_values(cur2, insert_sql, values)

        # Commit transaction and close connections
        conn2.commit()
        cur2.close()
        conn1.close()
        conn2.close()

        return {
            'statusCode': 200,
            'body': json.dumps('New records successfully inserted.')
        }

    except Exception as e:
        return {
            'statusCode': 500,
            'body': json.dumps(f'Error during processing: {str(e)}')
        }

### group2_dwh_fact_......

### group2_dwh_fact_....

## SQL Queries for DataLake and Datawarehoue tables

### DataLake tables

tbl_currency_data

In [None]:
CREATE TABLE public.tbl_currency_data (
	"timestamp" timestamp NOT NULL,
	source_currency varchar(10) NOT NULL,
	target_currency varchar(10) NOT NULL,
	exchange_rate float8 NULL,
	CONSTRAINT tbl_currency_data_pkey PRIMARY KEY ("timestamp", source_currency, target_currency)
);

tbl_energy_cbet_data

In [None]:
CREATE TABLE public.tbl_energy_cbet_data (
	"timestamp" timestamp NULL,
	country float8 NULL,
	value float8 NULL
);

tbl_energy_price_data

In [None]:
CREATE TABLE public.tbl_energy_price_data (
	"timestamp" timestamp NULL,
	price float8 NULL,
	unit text NULL
);

tbl_energy_production_data

In [None]:
CREATE TABLE public.tbl_energy_production_data (
	"timestamp" timestamp NULL,
	cross_border_electricity_trading float8 NULL,
	nuclear float8 NULL,
	hydro_run_of_river float8 NULL,
	hydro_water_reservoir float8 NULL,
	hydro_pumped_storage float8 NULL,
	"others" float8 NULL,
	wind_onshore float8 NULL,
	solar float8 NULL,
	"load" float8 NULL,
	residual_load float8 NULL,
	renewable_share_of_load float8 NULL,
	renewable_share_of_generation float8 NULL
);

tbl_weather_data

In [None]:
CREATE TABLE public.tbl_weather_data (
	id serial4 NOT NULL,
	city varchar(100) NULL,
	datetime timestamp NULL,
	"temp" float8 NULL,
	pressure int4 NULL,
	humidity int4 NULL,
	temp_min float8 NULL,
	temp_max float8 NULL,
	wind_speed float8 NULL,
	weather_main varchar(50) NULL,
	weather_description text NULL,
	rain_1h float8 NULL,
	clouds_all float8 NULL,
	CONSTRAINT tbl_weather_data_pkey PRIMARY KEY (id)
);

### Datawarehouse tables

dim_countries

In [None]:
CREATE TABLE dim_countries (
    country_id SERIAL PRIMARY KEY,
    country_name_en VARCHAR(100),
    country_name_de VARCHAR(100),
    iso_code VARCHAR(5)
);

INSERT INTO dim_countries (country_name_en, country_name_de, iso_code)
VALUES
    ('Germany',      'Deutschland',   'DE'),
    ('France',       'Frankreich',    'FR'),
    ('Austria',      'Österreich',    'AT'),
    ('Italy',        'Italien',       'IT'),
    ('Liechtenstein','Liechtenstein', 'LI'),
    ('Switzerland',    'Schweiz',     'CH');

dim_currency

In [None]:
CREATE TABLE public.dim_currency (
	currency_id serial4 NOT NULL,
	currency_code varchar(3) NOT NULL,
	CONSTRAINT dim_currency_pkey PRIMARY KEY (currency_id)
);

dim_locations

In [None]:
CREATE TABLE dim_locations (
    location_id SERIAL PRIMARY KEY,
    country VARCHAR(100) DEFAULT 'Switzerland',
    city VARCHAR(100) NOT NULL,
    lat DOUBLE PRECISION NOT NULL,
    long DOUBLE PRECISION NOT NULL
);

-- Daten einfügen
INSERT INTO dim_locations (city, lat, long)
VALUES
    ('Aarau', 47.392715, 8.044445),
    ('Baden', 47.473683, 8.308682),
    ('Basel', 47.558108, 7.587826),
    ('Bern', 46.948474, 7.452175),
    ('Chur', 46.854747, 9.526490),
    ('Frauenfeld', 47.556191, 8.896335),
    ('Genf', 46.201756, 6.146601),
    ('Lausanne', 46.521827, 6.632702),
    ('Lugano', 46.005010, 8.952028),
    ('Luzern', 47.050545, 8.305468),
    ('Neuenburg', 46.989583, 6.929264),
    ('Schaffhausen', 47.696049, 8.634513),
    ('Sion', 46.231175, 7.358879),
    ('Solothurn', 47.208135, 7.538405),
    ('St. Gallen', 47.425059, 9.376588),
    ('Winterthur', 47.499172, 8.729150),
    ('Zug', 47.167990, 8.517365),
    ('Zürich', 47.374449, 8.541042);

dim_time

In [None]:
CREATE TABLE public.dim_time (
	time_id serial4 NOT NULL,
	timestamp_utc timestamp NOT NULL,
	"date" date NOT NULL,
	"hour" int4 NULL,
	day_of_week varchar(3) NULL,
	"month" varchar(3) NULL,
	"year" int4 NULL,
	CONSTRAINT dim_time_day_of_week_check CHECK (((day_of_week)::text = ANY ((ARRAY['Mon'::character varying, 'Tue'::character varying, 'Wed'::character varying, 'Thu'::character varying, 'Fri'::character varying, 'Sat'::character varying, 'Sun'::character varying])::text[]))),
	CONSTRAINT dim_time_hour_check CHECK (((hour >= 0) AND (hour <= 23))),
	CONSTRAINT dim_time_month_check CHECK (((month)::text = ANY ((ARRAY['Jan'::character varying, 'Feb'::character varying, 'Mar'::character varying, 'Apr'::character varying, 'May'::character varying, 'Jun'::character varying, 'Jul'::character varying, 'Aug'::character varying, 'Sep'::character varying, 'Oct'::character varying, 'Nov'::character varying, 'Dec'::character varying])::text[]))),
	CONSTRAINT dim_time_pkey PRIMARY KEY (time_id),
	CONSTRAINT dim_time_year_check CHECK ((year >= 1900))
);

fact_energy_production

In [None]:
CREATE TABLE fact_energy_production (
    production_id SERIAL PRIMARY KEY,
    time_id INTEGER NOT NULL,
    country_id INTEGER NOT NULL,
    timestamp TIMESTAMP NOT NULL,
    solar_output FLOAT,
    wind_output FLOAT,
    load FLOAT,
    renewable_share_of_load FLOAT,
    residual_load FLOAT
);


-- Foreign Keys
ALTER TABLE fact_energy_production
ADD CONSTRAINT fk_time
FOREIGN KEY (time_id) REFERENCES dim_time(time_id);

ALTER TABLE fact_energy_production
ADD CONSTRAINT fk_country
FOREIGN KEY (country_id) REFERENCES dim_countries(country_id);