In [1]:
import urllib.request, xml.etree.ElementTree as ET

url = ("https://datenservice.tradinghub.eu/XmlInterface/getXML.ashx"
       "?ReportId=PricesEnergyImbalance&Start=01-10-2021&End=31-10-2021")
req = urllib.request.Request(url, headers={"User-Agent": "YourOrg-YourApp/1.0"})
with urllib.request.urlopen(req, timeout=60) as r:
    xml = r.read()

root = ET.fromstring(xml)
# inspect the first few elements
for child in list(root)[:5]:
    print(child.tag, {k:v for k,v in child.attrib.items()})

{http://www.w3.org/2001/XMLSchema}schema {'targetNamespace': 'urn:schemas-microsoft-com:sql:SqlRowSet1', 'elementFormDefault': 'qualified'}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesEnergyImbalance {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesEnergyImbalance {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesEnergyImbalance {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesEnergyImbalance {}


In [2]:
import urllib.request, xml.etree.ElementTree as ET

url = ("https://datenservice.tradinghub.eu/XmlInterface/getXML.ashx"
       "?ReportId=PricesFlexibilityChargeIntraday")
req = urllib.request.Request(url, headers={"User-Agent": "YourOrg-YourApp/1.0"})
with urllib.request.urlopen(req, timeout=60) as r:
    xml = r.read()

root = ET.fromstring(xml)
# Explore what fields THE returns today
for row in root:
    print(row.tag, row.attrib)

{http://www.w3.org/2001/XMLSchema}schema {'targetNamespace': 'urn:schemas-microsoft-com:sql:SqlRowSet1', 'elementFormDefault': 'qualified'}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}
{urn:schemas-microsoft-com:sql:SqlRowSet1}PricesFlexibilityChargeIntraday {}


In [1]:
import urllib.request, xml.etree.ElementTree as ET
import csv, sys

URL = "https://datenservice.tradinghub.eu/XmlInterface/getXML.ashx?ReportId=PricesFlexibilityChargeIntraday"
UA = "YourOrg-YourApp/1.0 (you@example.com)"  # use a real UA per THE's rules

def strip_ns(tag):
    return tag.split('}', 1)[-1] if '}' in tag else tag

def fetch_xml(url):
    req = urllib.request.Request(url, headers={"User-Agent": UA})
    with urllib.request.urlopen(req, timeout=60) as r:
        return r.read()

def find_records(root, record_tag="PricesFlexibilityChargeIntraday"):
    """Return all data-record elements (skip the <schema> header)."""
    recs = []
    for el in root.iter():
        name = strip_ns(el.tag)
        if name == "schema":
            continue
        if name == record_tag:
            recs.append(el)
    return recs

def record_to_dict(el):
    """Flatten one record: child elements -> columns; include any attributes too."""
    row = dict(el.attrib)
    for c in el:
        k = strip_ns(c.tag)
        v = (c.text or "").strip()
        # If there are nested children (rare), you can extend this to flatten deeper.
        if k in row and v:
            # avoid collisions if both attr and child share a name
            row[k + "_val"] = v
        else:
            row[k] = v
    return row

def write_csv(rows, path="flex_intraday.csv"):
    # build header as union of keys, but put likely fields first if present
    keys = set().union(*(r.keys() for r in rows)) if rows else set()
    preferred = ["GasDay","From","To","Position","Timestamp","Charge","Quantity","Cost","Currency"]
    fieldnames = [k for k in preferred if k in keys] + [k for k in sorted(keys) if k not in preferred]
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in rows:
            w.writerow(r)
    return path, fieldnames

def main():
    try:
        xml_bytes = fetch_xml(URL)
    except Exception as e:
        print("HTTP error:", e, file=sys.stderr); return

    try:
        root = ET.fromstring(xml_bytes)
    except ET.ParseError as e:
        print("XML parse error:", e, file=sys.stderr)
        print("First 400 bytes:", xml_bytes[:400]); return

    # Grab the actual data elements (not the schema)
    rec_elems = find_records(root, "PricesFlexibilityChargeIntraday")
    if not rec_elems:
        print("No data records found. (Possible reasons: outside gas day, or structure different today.)")
        return

    rows = [record_to_dict(el) for el in rec_elems if list(el) or el.attrib]
    if not rows:
        print("Records were found, but contained neither attributes nor child text.")
        return

    print(f"Found {len(rows)} records. Preview:")
    for i, r in enumerate(rows[:5], 1):
        print(f"{i:>2}. {r}")

    path, cols = write_csv(rows)
    print(f"\nSaved {len(rows)} rows to {path}")
    print("Columns:", cols)


In [8]:
from urllib.request import Request, urlopen
from urllib.parse import urlencode
import xml.etree.ElementTree as ET
import csv, re

def fetch_xml(report_id="PricesEnergyImbalance", start=None, end=None, ua="my-org-data-puller/1.0"):
    params = {"ReportId": report_id}
    if start: params["Start"] = start  # dd-mm-yyyy
    if end:   params["End"]   = end
    url = "https://datenservice.tradinghub.eu/XmlInterface/getXML.ashx?" + urlencode(params)
    req = Request(url, headers={"User-Agent": ua})
    with urlopen(req, timeout=60) as r:
        return r.read()

def flatten(elem):
    row = {}
    # own attributes
    for k, v in elem.attrib.items():
        row[k] = v
    # child tags
    for child in elem:
        tag = re.sub(r"{.*}", "", child.tag)  # strip namespaces if any
        if list(child):  # child has children; pull text from leaves
            for g in child.iter():
                if g is child: 
                    continue
                t = re.sub(r"{.*}", "", g.tag)
                if (g.text or "").strip():
                    row.setdefault(t, (g.text or "").strip())
                for ak, av in g.attrib.items():
                    row[f"{t}_{ak}"] = av
        else:
            if (child.text or "").strip():
                row[tag] = (child.text or "").strip()
        for ak, av in child.attrib.items():
            row[f"{tag}_{ak}"] = av
    return row

def xml_to_csv(xml_bytes, csv_path, return_df=False, preview_rows=0):
    import csv
    import xml.etree.ElementTree as ET

    root = ET.fromstring(xml_bytes)

    # Find a repeating element (heuristic: the tag that appears most under root)
    counts = {}
    for e in root.iter():
        counts[e.tag] = counts.get(e.tag, 0) + 1

    # Skip the global root; pick a frequent mid-level tag
    candidates = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)

    repeating_tag = None
    for tag, _ in candidates:
        for e in root.iter(tag):
            if e is root:
                continue
            if len(list(e)) or e.attrib:
                repeating_tag = tag
                break
        if repeating_tag:
            break

    rows = []
    for e in root.iter(repeating_tag):
        rows.append(flatten(e))  # reuse your existing flatten()

    # headers = union of keys (stable order)
    headers = sorted({k for r in rows for k in r.keys()})

    # Write CSV
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=headers)
        w.writeheader()
        w.writerows(rows)

    # Backward-compatible return
    result = (csv_path, len(rows), headers[:10])

    # Optional DataFrame
    if return_df:
        try:
            import pandas as pd
            df = pd.DataFrame(rows)[headers]  # ensure column order
            if preview_rows:
                print(df.head(preview_rows))
            return result + (df,)
        except ImportError:
            # pandas not installed; still return something usable
            return result + (rows,)

    return result

In [11]:
xml_bytes = fetch_xml(start="01-10-2021", end="31-10-2021")

# Old behavior (just CSV and summary):
csv_path, nrows, sample_cols = xml_to_csv(xml_bytes, "THE_PricesEnergyImbalance.csv")

# New: also get a DataFrame you can display:
csv_path, nrows, sample_cols, df = xml_to_csv(
    xml_bytes,
    "THE_PricesEnergyImbalance.csv",
    return_df=True,
    preview_rows=5,  # optional: prints df.head(5)
)

# In a notebook:
df.head(10)

       Gasday MarginalPriceSystemBuy MarginalPriceSystemSell  \
0  2021-10-01                96.0000                 57.1250   
1  2021-10-02                    NaN                 55.0000   
2  2021-10-03                    NaN                 63.6750   
3  2021-10-04                92.0000                 77.0250   
4  2021-10-05               106.0000                 85.0000   

  NegativeEnergyImbalancePrice PositiveEnergyImbalancePrice     Unit  \
0                      96.0000                      57.1250  EUR/MWh   
1                      90.9920                      55.0000  EUR/MWh   
2                      90.1350                      63.6750  EUR/MWh   
3                      92.5690                      77.0250  EUR/MWh   
4                     106.0000                      85.0000  EUR/MWh   

  VTPDailyAveragePrice VTPDailyAveragePriceSmallAdjMinus  \
0              90.4030                         88.594940   
1              89.2080                         87.423840   
2 

Unnamed: 0,Gasday,MarginalPriceSystemBuy,MarginalPriceSystemSell,NegativeEnergyImbalancePrice,PositiveEnergyImbalancePrice,Unit,VTPDailyAveragePrice,VTPDailyAveragePriceSmallAdjMinus,VTPDailyAveragePriceSmallAdjPlus
0,2021-10-01,96.0,57.125,96.0,57.125,EUR/MWh,90.403,88.59494,92.21106
1,2021-10-02,,55.0,90.992,55.0,EUR/MWh,89.208,87.42384,90.99216
2,2021-10-03,,63.675,90.135,63.675,EUR/MWh,88.368,86.60064,90.13536
3,2021-10-04,92.0,77.025,92.569,77.025,EUR/MWh,90.754,88.93892,92.56908
4,2021-10-05,106.0,85.0,106.0,85.0,EUR/MWh,93.379,91.51142,95.24658
5,2021-10-06,137.0,,137.0,109.485,EUR/MWh,111.719,109.48462,113.95338
6,2021-10-07,117.0,,117.0,111.458,EUR/MWh,113.733,111.45834,116.00766
7,2021-10-08,106.2013,,106.201,93.925,EUR/MWh,95.842,93.92516,97.75884
8,2021-10-09,86.7263,,89.534,86.022,EUR/MWh,87.778,86.02244,89.53356
9,2021-10-10,89.5,84.5,89.964,84.5,EUR/MWh,88.2,86.436,89.964
