In [None]:
# === DOWNLOAD FILE FROM WEB IF AVAILABLE ===

import requests
from bs4 import BeautifulSoup
import os

# Base URL of the page
base_url = "https://www.arera.it/area-operatori/prezzi-e-tariffe"

# Fetch the page content
response = requests.get(base_url)
response.raise_for_status()  # Raise error if request fails

# Parse HTML
soup = BeautifulSoup(response.text, "html.parser")

# Find the <a> tag that links to the Excel file (contains 'smt.xlsx')
link_tag = soup.find("a", href=lambda x: x and "smt.xlsx" in x)

if link_tag:
    # Build full URL (the href is relative)
    file_url = "https://www.arera.it" + link_tag['href']
    file_name = os.path.basename(file_url)
    
    # Download the Excel file
    r = requests.get(file_url)
    r.raise_for_status()
    
    # Save locally
    with open(file_name, "wb") as f:
        f.write(r.content)
    
    print(f"Downloaded file: {file_name}")
else:
    print("No Excel file found containing 'smt.xlsx'")

In [22]:
# === DOWNLOAD DATA FROM LOCAL XLS FILE ===

import pandas as pd
import json
from numbers import Number

# === COSTANTI ===
EXCEL_FILE = "E2025-3_smt.xlsx"
SHEET_NAME = 0  # foglio indicato da indice o nome
HEADER_ROW = 16  # riga di intestazione (zero-based)
COLS = list(range(2, 21))  # colonne da C a U (zero-based)

# mapping righe valori
RESIDENTIAL_ROWS = {
    "EN €/kWh": 19,
    "FIX €/Y": 20,
    "POT €/kW/Y": 21
}
NON_RESIDENTIAL_ROWS = {
    "EN €/kWh": 28,
    "FIX €/Y": 29,
    "POT €/kW/Y": 30
}

# chiavi fisse per i gruppi di colonne
PE_KEYS = ["F0", "F1", "F23"]
ME_KEYS = ["Monorario", "F1", "F23"]


def load_raw_data(file_path, sheet, header_row, cols):
    """Carica il file excel senza intestazioni e ritorna dataframe e intestazioni delle colonne."""
    df = pd.read_excel(file_path, sheet_name=sheet, header=None)
    headers = df.iloc[header_row, cols].astype(str).str.strip().tolist()
    return df, headers


def extract_grouped_data(df, rows_map, cols, headers):
    """
    Estrae i dati da df seguendo le righe specificate in rows_map.
    Ricostruisce i gruppi PE, PD_PPE, ME e il resto.
    Rimuove la chiave 'TOTALE' se presente (case sens).
    """
    PD_TO_PPE_KEYS = headers[3:8]
    rest_keys = headers[11:]

    extracted = []
    for descrizione, row_idx in rows_map.items():
        values = df.iloc[row_idx, cols].tolist()

        pe = dict(zip(PE_KEYS, values[0:3]))
        pd_ppe = dict(zip(PD_TO_PPE_KEYS, values[3:8]))
        me = dict(zip(ME_KEYS, values[8:11]))
        rest = dict(zip(rest_keys, values[11:]))

        row_dict = {"PE": pe, "Materia energia": me}
        row_dict.update(pd_ppe)
        row_dict.update(rest)

        # Rimuove eventuale chiave TOTALE (case-insensitive)
        row_dict = {k: v for k, v in row_dict.items() if str(k).strip().upper() != "TOTALE"}

        extracted.append({"descrizione": descrizione, "valori": row_dict})

    return extracted


def round_values(obj, decimals=5):
    """
    Ricorsivamente arrotonda tutti i valori numerici in obj a 'decimals' cifre decimali.
   
    """
    if isinstance(obj, dict):
        return {k: round_values(v, decimals) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [round_values(elem, decimals) for elem in obj]
    elif isinstance(obj, Number):
        return round(obj, decimals)
    else:
        return obj


def save_json(data, filename):
    """Salva i dati in formato JSON leggibile."""
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)
    print(f"File JSON salvato in {filename}")


def main():
    df_raw, headers = load_raw_data(EXCEL_FILE, SHEET_NAME, HEADER_ROW, COLS)
    residential_data = extract_grouped_data(df_raw, RESIDENTIAL_ROWS, COLS, headers)
    non_residential_data = extract_grouped_data(df_raw, NON_RESIDENTIAL_ROWS, COLS, headers)

    final_result = {
        "Abitazioni di residenza anagrafica": residential_data,
        "Abitazioni diverse dalla residenza anagrafica": non_residential_data,
    }

    # Arrotonda tutti i valori numerici a 5 decimali prima di salvare
    final_result = round_values(final_result, decimals=5)

    save_json(final_result, "output.json")


if __name__ == "__main__":
    main()

File JSON salvato in output.json


In [None]:
# === RAW FILE WITH NO FUNCTIONS FOR TESTING "ON THE FLY" ONLY ===


import pandas as pd
import json

# === PARAMETRI ===
excel_file = "E2025-3_smt.xlsx"
sheet_name = 0   # "apr-giu 2025"
header_row = 16  # riga 17 excel (zero-based 16)
cols = list(range(2, 21))  # C (2) fino a U (20) zero-based

# mapping righe
res_rows = {"EN €/kWh": 19, "FIX €/Y": 20, "POT €/kW/Y": 21}
nonres_rows = {"EN €/kWh": 28, "FIX €/Y": 29, "POT €/kW/Y": 30}

# === LETTURA RAW ===
df_raw = pd.read_excel(excel_file, sheet_name=sheet_name, header=None)

# intestazioni col C:U
headers = df_raw.iloc[header_row, cols].tolist()
headers = [str(h).strip() for h in headers]

# gruppi di colonne
PE_cols = headers[0:3]          # C-E
PD_to_PPE = headers[3:8]        # F-I
ME_cols = headers[8:11]         # J-L
rest_cols = headers[11:]        # M-U

def extract_table(df, rows_map):
    result = []
    for descr, row_idx in rows_map.items():
        values = df.iloc[row_idx, cols].tolist()

        # ricostruzione dei gruppi
        pe = dict(zip(["F0", "F1", "F23"], values[0:3]))
        pd_ppe = dict(zip(PD_to_PPE, values[3:8]))
        me = dict(zip(["Monorario", "F1", "F23"], values[8:11]))
        rest = dict(zip(rest_cols, values[11:]))

        # unione in un unico dizionario
        row_dict = {"PE": pe, "Materia energia": me}
        row_dict.update(pd_ppe)
        row_dict.update(rest)

        # rimozione eventuale colonna "TOTALE"
        row_dict = {k: v for k, v in row_dict.items() if str(k).upper() != "TOTALE"}

        result.append({"descrizione": descr, "valori": row_dict})
    return result

# estrazioni
tab_res = extract_table(df_raw, res_rows)
tab_nonres = extract_table(df_raw, nonres_rows)

# JSON finale
result = {
    "Abitazioni di residenza anagrafica": tab_res,
    "Abitazioni diverse dalla residenza anagrafica": tab_nonres
}

with open("output.json", "w", encoding="utf-8") as f:
    json.dump(result, f, indent=4, ensure_ascii=False)

print("File JSON salvato in output.json")