# Exchange electricity price and consumption
Combines consumption manually downloaded from Fingrid Datahub with price history of Nord Pool exchange electricity retrieved from Entso-e Transparency Platform SFTP service.

Before use:

* The user must download the hourly consumption data themselves from https://oma.datahub.fi/.
* The user must register for the Entso-e Transparency Platform service at https://transparency.entsoe.eu/.

## Configuration

The location of Entso-e files is determined by the variable `entso_e_data_local_folder`. The location can be the same as the consumption data files according to the variable `consumption_data_local_folder` and the final product directory according to the variable `output_data_local_folder`.

In [None]:
from getpass import getpass
from pathlib import Path
from glob import glob
import pandas as pd
import paramiko
import os
from datetime import datetime, timedelta, timezone

entso_e_data_local_folder = f"{Path.home()}/electricity_data" # User configurable
consumption_data_local_folder = f"{Path.home()}/electricity_data" # User configurable
output_data_local_folder = f"{Path.home()}/electricity_data" # User configurable
local_host_keys_filename = f"{Path.home()}/known_hosts" # User configurable

os.makedirs(entso_e_data_local_folder, exist_ok=True) # Create folder if does not exist

def get_entso_e_data_filename(year, month):
    return f"{year}_{month:02}_DayAheadPrices_12.1.D.csv"

## Reading of consumption data
All `*.csv` files are downloaded from the consumption data folder and filtered to only contain actual hourly kWh consumption data. The consumption data of approved files will be combined.

In [None]:
consumption_data_datetime_col = "Start Time"
consumption_data_consumption_col = "Quantity"
consumption_data_resolution_col = "Resolution"
consumption_data_unit_col = "Unit Type"
consumption_data_quality_col = "Quality"

def is_consumption_data(filename):
    with open(filename, encoding="utf-8") as file:
        first_line = file.readline()
        return consumption_data_datetime_col in first_line and consumption_data_consumption_col in first_line

consumption_dict = {}
year_month_dict = {}

print(f"Reading consumption data from {consumption_data_local_folder}/*.csv:")
for filename in filter(is_consumption_data, glob(f"{consumption_data_local_folder}/*.csv")):
    df = pd.read_csv(filename, encoding="utf-8", sep=";", decimal=",", index_col=consumption_data_datetime_col)
    df = df[(df[consumption_data_quality_col] == "OK") & (df[consumption_data_resolution_col] == "PT1H") & (df[consumption_data_unit_col] == "kWh")]
    df.index = pd.to_datetime(df.index, utc=True)
    df[consumption_data_consumption_col] = df[consumption_data_consumption_col]
    num_hours = 0
    for index, row in df.iterrows():
        val = row[consumption_data_consumption_col]
        if index in consumption_dict:
            consumption_dict[index] = max(consumption_dict[index], val) # A larger consumption is probably more up to date
        else:
            consumption_dict[index] = val
        year_month_dict[(index.year, index.month)] = True
        num_hours += 1
    print(f"Read {num_hours} hours from {filename}")

print()
print("Consumption data read for the following (year, month) combinations:")
print(sorted(year_month_dict.keys()))

## Entso-e login data entry

In [None]:
user = input("Entso-e e-mail:")
password = getpass("Entso-e password:")
print("Entso-e SFTP username and password set")

## Price history download from Entso-e SFTP server

The price history is loaded into the folder specified by the variable `enso_e_data_local_folder' for the months corresponding to the consumption data. Only files newer than the local ones are downloaded from the Entso-e SFTP server.

In [None]:
entso_e_host = "sftp-transparency.entsoe.eu"
entso_e_port = 22
entso_e_data_remote_folder = "/TP_export/DayAheadPrices_12.1.D"

class PromptPolicy(paramiko.client.MissingHostKeyPolicy):
    def missing_host_key(self, client, hostname, key):
        answer = input(f"Accept new key {key.get_base64()} (y/n)?")
        if answer == "y":
            answer = input(f"Save key in {local_host_keys_filename} (y/n)?")
            if answer == "y":
                client._host_keys.add(hostname, key.get_name(), key)
                Path(client._host_keys_filename).touch()
                client.save_host_keys(client._host_keys_filename)
                print(f"Saved key in {client._host_keys_filename}")
            return
        raise Exception("Unknown host key, not accepted by user")

ssh = paramiko.client.SSHClient()
ssh.load_system_host_keys()
try:
    ssh.load_host_keys(local_host_keys_filename)
except:
    None
promptPolicy = PromptPolicy()
ssh.set_missing_host_key_policy(promptPolicy)
try:
    print("Opening Entso-e SSH connection")
    ssh.connect(entso_e_host, port=entso_e_port, username=user, password=password)
    try:
        print("Opening Entso-e SFTP connection using the SSH connection")
        sftp = ssh.open_sftp()
        try:
            print(f"Downloading data to {entso_e_data_local_folder}:")
            for year, month in sorted(year_month_dict.keys()):                
                entso_e_data_filename = get_entso_e_data_filename(year, month)
                try:
                    st_mtime = sftp.stat(f"{entso_e_data_remote_folder}/{entso_e_data_filename}").st_mtime
                    try:
                        local_st_mtime = os.stat(f"{entso_e_data_local_folder}/{entso_e_data_filename}").st_mtime
                    except:
                        local_st_mtime = 0
                    if st_mtime == local_st_mtime:
                        print(f"{entso_e_data_filename} already up to date")
                    else:
                        print(f"{entso_e_data_filename} downloading...")
                        sftp.get(f"{entso_e_data_remote_folder}/{entso_e_data_filename}", f"{entso_e_data_local_folder}/{entso_e_data_filename}")
                        try:
                            os.utime(f"{entso_e_data_local_folder}/{entso_e_data_filename}", (st_mtime, st_mtime))
                        except:
                            print("ERROR: Could not update modified time")
                except:
                    print(f"{entso_e_data_filename} not available from Entso-e")
            print(f"Finished downloading data")
        except:
            print("ERROR downloading data")        
        sftp.close()
        print("Closed SFTP connection")
    except:
        print("ERROR opening SFTP connection")
    ssh.close()
    print("Closed SSH connection")
except:
    print("ERROR opening SSH connection")


## Loading price information

In [None]:
entso_e_data_datetime_col = "DateTime"
entso_e_data_area_col = "AreaCode"
entso_e_data_resolution_col = "ResolutionCode"
entso_e_data_currency_col = "Currency"
entso_e_data_price_col = "Price"

price_dict = {}

print(f"Reading price data...")
for year, month in sorted(year_month_dict.keys()):
    try:
        filename = get_entso_e_data_filename(year, month)
        df = pd.read_csv(f"{entso_e_data_local_folder}/{filename}", encoding="utf-8", sep="\t", decimal=".", index_col=entso_e_data_datetime_col)
        df = df[(df[entso_e_data_area_col] == "10YFI-1--------U") & (df[entso_e_data_resolution_col] == "PT60M") & (df[entso_e_data_currency_col] == "EUR")]
        df.index = pd.to_datetime(df.index, utc=True)
        df[entso_e_data_price_col] = df[entso_e_data_price_col]
        for index, row in df.iterrows():
            price_dict[index] = row[entso_e_data_price_col]
    except:
        print(f"ERROR reading/parsing {entso_e_data_local_folder}/{filename}")
        
print("Finished reading price data")

## Combining price and consumption data

Enter the first hour of the period to be considered in the variable `start_datetime_string` and the hour after the last hour in the variable `end_datetime_string` in ISO format. Times can also define a time zone, e.g. `2022-11-04T00:00:00+02:00` at midnight Finnish time (summer time would be `+03:00`). If no time zone is specified, the computer's time zone is used, which in Finland is usually conveniently Finnish time.

The combined consumption and price data are stored in the directory of the previously defined variable `output_data_local_folder` under the name `Consumption and spot price START TIME to END TIME.csv` where the aforementioned times are placed (the `+` of a possible time zone is replaced by `p`). The `DateTime` values written to the CSV file are in ISO format in the UTC time zone.

In [None]:
start_datetime_string = "2022-11-01" # User configurable. Must be the start of an hour
end_datetime_string = "2022-12-01" # User configurable. Note that this datetime is excluded but the previous hour will be included

start_datetime = datetime.fromisoformat(start_datetime_string).astimezone(timezone.utc)
end_datetime = datetime.fromisoformat(end_datetime_string).astimezone(timezone.utc)
hour_timedelta = timedelta(hours=1)
t = start_datetime
datetimes = []
consumptions = []
prices = []
while t < end_datetime:
    datetimes.append(t)
    try:
        consumptions.append(consumption_dict[t])
    except:
        consumptions.append("")
    try:
        prices.append(price_dict[t])
    except:
        prices.append("")
    t += hour_timedelta
output = pd.DataFrame({"DateTime": datetimes, "Consumption (kWh)": consumptions, "Price (€/MWh)": prices})
output.set_index("DateTime")
output_filename = f"{output_data_local_folder}/Consumption and spot price {start_datetime_string.replace('+','p')} to {end_datetime_string.replace('+','p')}.csv"
output.to_csv(output_filename, index=False)
print(f"Wrote output file {output_filename}")