In [6]:
import pandas as pd
import os
import datetime as dt

In [7]:
def read_and_clean(country, lng_path, area_suffix, coal_path = "./Bloomberg/Rotterdam Coal.xlsx"):
        os.chdir(os.path.join("/Users","ethan","Dev","Thesis Work"))
        # start by iterating and reading in all of the files
        price_data = []
        #os.chdir("../..")
        os.chdir(os.path.join("Data", country, "Price"))
        for file in os.listdir():
                price_data.append(pd.read_csv(file))

        prices = pd.concat(price_data)

        # general clean up
        prices = prices.dropna(subset=["Day-ahead Price [EUR/MWh]"])
        prices = prices[prices["Day-ahead Price [EUR/MWh]"] != "n/e"]
        prices = prices[prices["Day-ahead Price [EUR/MWh]"] != "-"]
        prices["Price"] = prices["Day-ahead Price [EUR/MWh]"].apply(float)
        prices = prices[["MTU (CET/CEST)","Price"]]

        # convert the time range into a start + end time
        prices["Start"] = prices["MTU (CET/CEST)"].apply(lambda x: x.split("-")[0])
        prices["End"] = prices["MTU (CET/CEST)"].apply(lambda x: x.split("-")[1])


        ## READ AND CLEAN LOAD DATA
        # work through the same process for the load data
        load_data = []
        os.chdir(os.path.join("..", "Load"))
        for load_file in os.listdir():
                df = pd.read_csv(load_file)
                load_data.append(df)
        os.chdir(os.path.join("..",".."))
        loads = pd.concat(load_data)

        # special step for Germany due to some quirks of reporting
        if(country == "Germany"):
                loads["Forecast"] = loads["Day-ahead Total Load Forecast [MW] - BZN|DE-AT-LU"].combine_first(loads["Day-ahead Total Load Forecast [MW] - BZN|DE-LU"])
                loads["Actual"] = loads["Actual Total Load [MW] - BZN|DE-AT-LU"].combine_first(loads["Actual Total Load [MW] - BZN|DE-LU"])
        elif(country == "Austria"):
                loads["Forecast"] = loads["Day-ahead Total Load Forecast [MW] - BZN|DE-AT-LU"].combine_first(loads["Day-ahead Total Load Forecast [MW] - BZN|AT"])
                loads["Actual"] = loads["Actual Total Load [MW] - BZN|DE-AT-LU"].combine_first(loads["Actual Total Load [MW] - BZN|AT"])
        else:
                loads["Forecast"] = loads["Day-ahead Total Load Forecast [MW] - {}".format(area_suffix)]
                loads["Actual"] = loads["Actual Total Load [MW] - {}".format(area_suffix)]
        loads = loads.dropna(subset=["Forecast", "Actual"])
        loads = loads[loads["Actual"] != "n/e"]
        loads = loads[loads["Actual"] != "-"]
        loads["Actual"] = loads["Actual"].apply(float)
        loads["Forecast"] = loads["Forecast"].apply(float)

        loads = loads[["Time (CET/CEST)", "Forecast", "Actual"]]
        # make the time range effective
        loads["Start"] = loads["Time (CET/CEST)"].apply(lambda x: x.split("-")[0])
        loads["End"] = loads["Time (CET/CEST)"].apply(lambda x: x.split("-")[1])

        ## MERGING THE TWO DATASETS
        # merge the two sets of data
        elec_merged = pd.merge(left = prices, right = loads, on = "Start", how="left")
        elec_merged = elec_merged[["Start", "Price", "Actual", "Forecast"]]
        elec_merged["Date"] = pd.to_datetime(pd.to_datetime(elec_merged["Start"]).dt.date)
        ## READ IN LNG DATA FROM BLOOMBERG
        lng_data = pd.read_excel(lng_path)
        # filter columns
        lng_data = lng_data[["Date", "Last Price"]]

        ## READ IN COAL DATA FROM BLOOMBERG
        coal_data = pd.read_excel(coal_path)
        coal_data["Coal Price"] = coal_data["Last Price"]
        coal_data = coal_data[["Date", "Coal Price"]]
        # add the additional day to match up with the day ahead data
        #lng_data["Date"] = pd.to_datetime(lng_data["Date"] + dt.timedelta(days=1))
        combined = elec_merged.merge(lng_data, on="Date")
        combined = combined.merge(coal_data, on = "Date");
        
        combined.to_csv("./{}/combined_data.csv".format(country))

        return combined


In [8]:
read_and_clean("Spain", "/Users/ethan/Dev/Thesis Work/Data/Bloomberg/PVB_data.xlsx", "BZN|ES")

Unnamed: 0,Start,Price,Actual,Forecast,Date,Last Price,Coal Price
0,02.01.2017 00:00,54.99,23755.0,24004.0,2017-02-01,26.40,-104.75
1,02.01.2017 01:00,50.18,21649.0,21881.0,2017-02-01,26.40,-104.75
2,02.01.2017 02:00,47.10,20422.0,20252.0,2017-02-01,26.40,-104.75
3,02.01.2017 03:00,45.78,19846.0,19687.0,2017-02-01,26.40,-104.75
4,02.01.2017 04:00,44.99,19749.0,19442.0,2017-02-01,26.40,-104.75
...,...,...,...,...,...,...,...
43339,29.12.2015 19:00,58.35,31532.0,33014.0,2015-12-29,19.25,-138.25
43340,29.12.2015 20:00,58.78,31722.0,32873.0,2015-12-29,19.25,-138.25
43341,29.12.2015 21:00,56.70,31268.0,32627.0,2015-12-29,19.25,-138.25
43342,29.12.2015 22:00,50.85,29125.0,30389.0,2015-12-29,19.25,-138.25


In [9]:
read_and_clean("Netherlands", "/Users/ethan/Dev/Thesis Work/Data/Bloomberg/TTF_data.xlsx", "BZN|NL")

Unnamed: 0,Start,Price,Actual,Forecast,Date,Last Price,Coal Price
0,02.01.2017 00:00,30.54,10725.0,11403.0,2017-02-01,22.3,-104.75
1,02.01.2017 01:00,30.00,10240.0,11042.0,2017-02-01,22.3,-104.75
2,02.01.2017 02:00,33.21,9931.0,10438.0,2017-02-01,22.3,-104.75
3,02.01.2017 03:00,32.05,9777.0,10354.0,2017-02-01,22.3,-104.75
4,02.01.2017 04:00,31.28,9772.0,10342.0,2017-02-01,22.3,-104.75
...,...,...,...,...,...,...,...
49483,31.12.2015 19:00,36.79,12257.0,14404.0,2015-12-31,14.1,-138.20
49484,31.12.2015 20:00,28.81,11377.0,13696.0,2015-12-31,14.1,-138.20
49485,31.12.2015 21:00,26.27,10507.0,12816.0,2015-12-31,14.1,-138.20
49486,31.12.2015 22:00,29.99,9900.0,11794.0,2015-12-31,14.1,-138.20


In [10]:
read_and_clean("Germany", "/Users/Ethan/Dev/Thesis Work/Data/Bloomberg/THE_data.xlsx", "BZN|DE")

Unnamed: 0,Start,Price,Actual,Forecast,Date,Last Price,Coal Price
0,02.01.2017 00:00,30.54,53537.0,48745.0,2017-02-01,22.325,-104.75
1,02.01.2017 01:00,28.91,51050.0,47780.0,2017-02-01,22.325,-104.75
2,02.01.2017 02:00,28.11,49575.0,46353.0,2017-02-01,22.325,-104.75
3,02.01.2017 03:00,27.89,48941.0,46196.0,2017-02-01,22.325,-104.75
4,02.01.2017 04:00,27.73,49992.0,46979.0,2017-02-01,22.325,-104.75
...,...,...,...,...,...,...,...
50083,30.12.2022 22:45,-29.94,48868.0,48004.0,2022-12-30,72.750,183.45
50084,30.12.2022 23:00,36.16,48631.0,47268.0,2022-12-30,72.750,183.45
50085,30.12.2022 23:15,4.95,47834.0,46852.0,2022-12-30,72.750,183.45
50086,30.12.2022 23:30,-0.10,47024.0,46418.0,2022-12-30,72.750,183.45


In [11]:
read_and_clean("France", "/Users/Ethan/Dev/Thesis Work/Data/Bloomberg/TRF_data.xlsx", "BZN|FR")

Unnamed: 0,Start,Price,Actual,Forecast,Date,Last Price,Coal Price
0,02.01.2017 00:00,54.99,69366.0,71300.0,2017-02-01,192.52,-104.75
1,02.01.2017 01:00,50.18,66988.0,68550.0,2017-02-01,192.52,-104.75
2,02.01.2017 02:00,49.11,65675.0,67600.0,2017-02-01,192.52,-104.75
3,02.01.2017 03:00,45.78,63066.0,65100.0,2017-02-01,192.52,-104.75
4,02.01.2017 04:00,44.99,62297.0,64150.0,2017-02-01,192.52,-104.75
...,...,...,...,...,...,...,...
48835,31.12.2015 19:00,36.79,60772.0,62650.0,2015-12-31,189.95,-138.20
48836,31.12.2015 20:00,28.81,57492.0,59400.0,2015-12-31,189.95,-138.20
48837,31.12.2015 21:00,26.27,55217.0,56550.0,2015-12-31,189.95,-138.20
48838,31.12.2015 22:00,29.99,57153.0,56450.0,2015-12-31,189.95,-138.20


In [12]:
read_and_clean("Austria", "/Users/Ethan/Dev/Thesis Work/Data/Bloomberg/VTP_data.xlsx", "BZN|AT")

Unnamed: 0,Start,Price,Actual,Forecast,Date,Last Price,Coal Price
0,02.01.2017 00:00,30.54,53537.0,48745.0,2017-02-01,24.0,-104.75
1,02.01.2017 01:00,28.91,51050.0,47780.0,2017-02-01,24.0,-104.75
2,02.01.2017 02:00,28.11,49575.0,46353.0,2017-02-01,24.0,-104.75
3,02.01.2017 03:00,27.89,48941.0,46196.0,2017-02-01,24.0,-104.75
4,02.01.2017 04:00,27.73,49992.0,46979.0,2017-02-01,24.0,-104.75
...,...,...,...,...,...,...,...
112675,21.02.2023 22:45,125.00,,,2023-02-21,50.3,135.85
112676,21.02.2023 23:00,192.46,6325.0,6664.0,2023-02-21,50.3,135.85
112677,21.02.2023 23:15,149.92,,,2023-02-21,50.3,135.85
112678,21.02.2023 23:30,120.00,,,2023-02-21,50.3,135.85
