# Harnessing Weather Insights for Accurate Energy Load Forecasting

by Florian Schulze, Raffaela Länger, Johanna Kronfuß and Julian Janisch

------------

# Downloading the Data

In this project, we download data from two different websites to predict energy load based on weather data. Since frequent API requests can be inefficient and may be subject to usage limits or costs, we store the downloaded data locally. This allows us to reuse the data for analysis and model optimization without overloading the API, ensuring a more efficient and sustainable workflow.

In [None]:
%pip install -r requirements.txt

In [None]:
# Imports
import requests
import datetime
import os
import time

# API Settings

# Transparency Portal
TRANSPARENCY_API_KEY = "9d0ebad5-08cf-4d6f-a752-744ba3707b70"
TRANSPARENCY_PERIOD_START = "2024-01-01"
TRANSPARENCY_PERIOD_END = "2024-12-31"
TRANSPARENCY_BIDDING_ZONE = "10YAT-APG------L" # Load Austria

# GeoSphere
GEOSPHERE_PERIOD_START = "2024-01-01"
GEOSPHERE_PERIOD_END = "2024-12-31"
GEOSPHERE_STATIONS = "1, 105"

In [None]:
# Transparency Portal
# Restful API Guide: https://documenter.getpostman.com/view/7009892/2s93JtP3F6


if TRANSPARENCY_API_KEY == "DEMO_KEY":
    print("Error: Missing Transparency API key.")
else:
    api_url = "https://web-api.tp.entsoe.eu/api"
    
    start = datetime.datetime.strptime(TRANSPARENCY_PERIOD_START, "%Y-%m-%d")
    end = datetime.datetime.strptime(TRANSPARENCY_PERIOD_END, "%Y-%m-%d")
    
    # Go through each month of the span
    current = start
    while current <= end:
        print("Requesting data for " + current.strftime("%Y-%m"))
        
        next_month = (current.replace(day=1) + datetime.timedelta(days=32)).replace(day=1)
        period_end = min(next_month, end + datetime.timedelta(days=1))
        
        api_params = {
            "documentType": "A65", # System Total Load
            "processType": "A16", # Actual Realized Load
            "outBiddingZone_Domain": TRANSPARENCY_BIDDING_ZONE,
            "periodStart": current.strftime("%Y%m%d0000"), # Format: YYYYMMDDHHMM
            "periodEnd": period_end.strftime("%Y%m%d0000"), # Format: YYYYMMDDHHMM
            "securityToken": TRANSPARENCY_API_KEY
        }
        
        response = requests.get(api_url, params=api_params) # Send the request => Get XML response

        if response.status_code == 200:
            # Save to file and create directory if it doesn't exist
            year_folder = current.strftime("%Y")
            month_file = current.strftime("%m")
            os.makedirs(os.path.dirname(f"./data/transparency/{year_folder}/"), exist_ok=True)
            
            with open(f"./data/transparency/{year_folder}/{month_file}.xml", "w", encoding='utf-8') as file:
                file.write(response.text)
                
            print(f"Data saved to ./data/transparency/{year_folder}/{month_file}.xml")
                
        else:
            print("Error: " + str(response.status_code))
            print("Response: " + response.text)
        
        current = next_month


In [None]:
# GeoSphere Historical Data
if TRANSPARENCY_API_KEY == "DEMO_KEY":
    print("Error: Missing Transparency API key.")
else:
    api_url = "https://dataset.api.hub.geosphere.at/v1/station/historical/"
    api_dataset = "klima-v2-1h"
    
    start = datetime.datetime.strptime(TRANSPARENCY_PERIOD_START, "%Y-%m-%d")
    end = datetime.datetime.strptime(TRANSPARENCY_PERIOD_END, "%Y-%m-%d")
    
    # Go through each month of the span
    current = start
    while current <= end:       
        next_month = (current.replace(day=1) + datetime.timedelta(days=32)).replace(day=1)
        period_end = min(next_month, end + datetime.timedelta(days=1))
        
        print("Requesting data for " + current.strftime("%Y-%m"))
        
        api_params = {
            # Data to get
            "parameters": "rr,tl,p,so_h,ff", # Rainfall, Temperature, Pressure, Sunshine Duration, Wind speed
            "start": current.strftime("%Y-%m-%d"),
            "end": period_end.strftime("%Y-%m-%d"),
            "station_ids": GEOSPHERE_STATIONS,
            "output_format": "csv",
        }
        
        response = requests.get(api_url + api_dataset, params=api_params) # Send the request => Get GeoJSON response
        
        if response.status_code == 200:
            print("Remaining requests for this hour: " + response.headers["x-ratelimit-remaining-hour"])
            
            # Save to file and create directory if it doesn't exist
            year_folder = current.strftime("%Y")
            month_file = current.strftime("%m")
            os.makedirs(os.path.dirname(f"./data/geosphere/{year_folder}/"), exist_ok=True)
            
            with open(f"./data/geosphere/{year_folder}/{month_file}.csv", "w", encoding='utf-8') as file:
                file.write(response.text)
                
            print(f"Data saved to ./data/geosphere/{year_folder}/{month_file}.csv")
            
        else:
            print("Error: " + str(response.status_code))
            print("Response: " + response.text)
        
        # Sleep to avoid rate limiting (5 requests per second)
        time.sleep(0.2)
        current = next_month
    

Requesting data for 2024-01
Remaining requests for this hour: 221
Data saved to ./data/geosphere/2024/01.csv
Requesting data for 2024-02
Remaining requests for this hour: 219
Data saved to ./data/geosphere/2024/02.csv
Requesting data for 2024-03
Remaining requests for this hour: 218
Data saved to ./data/geosphere/2024/03.csv
Requesting data for 2024-04
Remaining requests for this hour: 217
Data saved to ./data/geosphere/2024/04.csv
Requesting data for 2024-05
Remaining requests for this hour: 216
Data saved to ./data/geosphere/2024/05.csv
Requesting data for 2024-06
Remaining requests for this hour: 215
Data saved to ./data/geosphere/2024/06.csv
Requesting data for 2024-07
Remaining requests for this hour: 214
Data saved to ./data/geosphere/2024/07.csv
Requesting data for 2024-08
Remaining requests for this hour: 213
Data saved to ./data/geosphere/2024/08.csv
Requesting data for 2024-09
