## Preparation of final dataset
1. Loading files for Transport and Industry
2. Process industry data per subsector (Steel, Basic Chemicals, NMM)
3. Process transport data per subsectors and fuels (Electricity, Hydrogen, Gases, Liquids)
4. Prepare final excel file with one per each year and country merging all values

In [76]:
import pandas as pd
from collections import defaultdict
import os

1. Loading files for Transport and Industry

In [77]:
transport_file = os.path.join('..', 'REMIND', 'Results_REMIND_JRC.csv')
industry_path = os.path.join('Industry', 'Results_per_Country')

In [78]:
country_code = "AT"
# ['EU27', 'AT', 'BE', 'BG', 'CY', 
# 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES', 'FI', 'FR', 'HR', 
# 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'MT', 'NL', 'PL', 
# 'PT', 'RO', 'SE', 'SI', 'SK']

years = [2030, 2040, 2050]

In [79]:
output_path = os.path.join('..', 'Outputs', f'PtX_demand_{country_code}_2.xlsx')

In [80]:
def load_transport_data(filepath):
    df = pd.read_csv(filepath)
    df['Year'] = df['Year'].astype(int)
    return df

In [81]:
def load_industry_data(filepath):
    industry_data = []
    industry_files = [f for f in os.listdir(filepath) if f.endswith(".xlsx")]

    for file_name in industry_files:
        year, country = file_name.replace(".xlsx", "").split("_")
        file_path = os.path.join(filepath, file_name)
        df = pd.read_excel(file_path, index_col=0)
        df = df.apply(pd.to_numeric, errors='coerce').fillna(0)

        for material in df.index:
            for sector in df.columns:
                industry_data.append({
                    "Year": int(year),
                    "Country": country,
                    "Category": sector,
                    "Material": material.strip(),
                    "Value": df.loc[material, sector] * 3.6 * 0.000001
                })

    return pd.DataFrame(industry_data)

In [82]:
transport_df = load_transport_data(transport_file)
industry_df = load_industry_data(industry_path)

2. Process industry data per subsector (Steel, Basic Chemicals, NMM)

In [83]:
industry_mapping = {
    'Overall Demand': 'Overall Demand',
    'Hydrogen': 'Hydrogen',
    'Ammonia': 'Ammonia',
    'Methanol': 'Methanol',
    'Biomass': 'Biomass [Solid]',
    'Biogas': 'Biogenic Gases',
    'Other': 'Renewable Energy Carrier',
}

In [84]:
industry_output = defaultdict(lambda: defaultdict(float))

In [85]:
industry_filtered = industry_df[industry_df["Country"] == country_code]

In [86]:

for _, row in industry_filtered.iterrows():
    year = row["Year"]
    mat = row["Material"]
    fuel = industry_mapping.get(mat)
    if fuel:
        key = (fuel, year)
        industry_output[key][row["Category"]] += row["Value"]

In [87]:
industry_rows = []

for (fuel_group, year), sectors in industry_output.items():
    for sector, value in sectors.items():
        industry_rows.append({
            "FuelGroup": fuel_group,
            "Year": year,
            "IndustrySector": sector,
            "Value": value if value != 0 else None 
        })

industry_df = pd.DataFrame(industry_rows)

In [88]:
pivot_industry = industry_df.pivot_table(
    index=["FuelGroup", "Year"],
    columns="IndustrySector",
    values="Value",
    aggfunc="sum"
).reset_index()
pivot_industry

IndustrySector,FuelGroup,Year,Chemicals,Iron & steel,Non-metallic minerals
0,Ammonia,2030,0.00873,0.0,0.0
1,Ammonia,2040,0.009971,0.0,0.0
2,Ammonia,2050,0.011166,0.0,0.0
3,Biogenic Gases,2030,0.0,0.0,0.001482
4,Biogenic Gases,2040,0.0,0.0,0.006237
5,Biogenic Gases,2050,0.0,0.0,0.016343
6,Biomass [Solid],2030,0.0,0.0,0.012366
7,Biomass [Solid],2040,0.0,0.0,0.013906
8,Biomass [Solid],2050,0.0,0.0,0.015485
9,Hydrogen,2030,0.0,0.005276,0.0


3. Process transport data per subsectors and fuels (Electricity, Hydrogen, Gases, Liquids)

In [89]:
fuel_map = {
    'Liquids|Biomass': 'Biogenic Liquids',
    'Gases|Biomass': 'Biogenic Gases',
    'Liquids|Hydrogen': 'Synthetic Liquids',
    'Gases|Hydrogen': 'Synthetic Gases',
    'Hydrogen': 'Hydrogen',
}


In [90]:
transport_output = defaultdict(lambda: defaultdict(float))

In [91]:
categories = [
    "FE|Transport|Freight|Road|Heavy",
    "FE|Transport|Freight|Road|Light",
    "FE|Transport|Pass|Road|Bus",
    "FE|Transport|Pass|Road|LDV|Four Wheelers",
    "FE|Transport|Pass|Road|LDV|Two Wheelers",
    "FE|Transport|Pass|Domestic Aviation",
    "FE|Transport|Pass|Aviation",
    "FE|Transport|Pass|Rail",
    "FE|Transport|Freight|Rail",
    "FE|Transport|Bunkers|Freight|International Shipping",
    "FE|Transport|Freight|Domestic Shipping"               
]

transport_fuel_paths = [
    # Freight Road Heavy
    "FE|Transport|Freight|Road|Heavy|Electricity",
    "FE|Transport|Freight|Road|Heavy|Hydrogen",
    "FE|Transport|Freight|Road|Heavy|Gases",
    "FE|Transport|Freight|Road|Heavy|Gases|Biomass",
    "FE|Transport|Freight|Road|Heavy|Gases|Fossil",
    "FE|Transport|Freight|Road|Heavy|Gases|Hydrogen",
    "FE|Transport|Freight|Road|Heavy|Liquids",
    "FE|Transport|Freight|Road|Heavy|Liquids|Biomass",
    "FE|Transport|Freight|Road|Heavy|Liquids|Fossil",
    "FE|Transport|Freight|Road|Heavy|Liquids|Hydrogen",

    # Freight Road Light
    "FE|Transport|Freight|Road|Light|Electricity",
    "FE|Transport|Freight|Road|Light|Hydrogen",
    "FE|Transport|Freight|Road|Light|Gases",
    "FE|Transport|Freight|Road|Light|Gases|Biomass",
    "FE|Transport|Freight|Road|Light|Gases|Fossil",
    "FE|Transport|Freight|Road|Light|Gases|Hydrogen",
    "FE|Transport|Freight|Road|Light|Liquids",
    "FE|Transport|Freight|Road|Light|Liquids|Biomass",
    "FE|Transport|Freight|Road|Light|Liquids|Fossil",
    "FE|Transport|Freight|Road|Light|Liquids|Hydrogen",

    # Passenger Road Bus
    "FE|Transport|Pass|Road|Bus|Electricity",
    "FE|Transport|Pass|Road|Bus|Hydrogen",
    "FE|Transport|Pass|Road|Bus|Gases",
    "FE|Transport|Pass|Road|Bus|Gases|Biomass",
    "FE|Transport|Pass|Road|Bus|Gases|Fossil",
    "FE|Transport|Pass|Road|Bus|Gases|Hydrogen",
    "FE|Transport|Pass|Road|Bus|Liquids",
    "FE|Transport|Pass|Road|Bus|Liquids|Biomass",
    "FE|Transport|Pass|Road|Bus|Liquids|Fossil",
    "FE|Transport|Pass|Road|Bus|Liquids|Hydrogen",

    # Passenger Road LDV Four Wheelers
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Electricity",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Hydrogen",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Gases",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Gases|Biomass",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Gases|Fossil",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Gases|Hydrogen",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Liquids",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Liquids|Biomass",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Liquids|Fossil",
    "FE|Transport|Pass|Road|LDV|Four Wheelers|Liquids|Hydrogen",

    # Passenger Road LDV Two Wheelers
    "FE|Transport|Pass|Road|LDV|Two Wheelers|Electricity",
    "FE|Transport|Pass|Road|LDV|Two Wheelers|Liquids",
    "FE|Transport|Pass|Road|LDV|Two Wheelers|Liquids|Biomass",
    "FE|Transport|Pass|Road|LDV|Two Wheelers|Liquids|Fossil",
    "FE|Transport|Pass|Road|LDV|Two Wheelers|Liquids|Hydrogen",

    # Bunkers Freight International Shipping
    "FE|Transport|Bunkers|Freight|International Shipping|Liquids",

    # Freight Domestic Shipping
    "FE|Transport|Freight|Domestic Shipping|Liquids",
    "FE|Transport|Freight|Domestic Shipping|Liquids|Biomass",
    "FE|Transport|Freight|Domestic Shipping|Liquids|Fossil",
    "FE|Transport|Freight|Domestic Shipping|Liquids|Hydrogen",

    # Bunkers Pass International Aviation
    "FE|Transport|Bunkers|Pass|International Aviation|Liquids",

    # Passenger Domestic Aviation
    "FE|Transport|Pass|Domestic Aviation|Hydrogen",
    "FE|Transport|Pass|Domestic Aviation|Liquids",
    "FE|Transport|Pass|Domestic Aviation|Liquids|Biomass",
    "FE|Transport|Pass|Domestic Aviation|Liquids|Fossil",
    "FE|Transport|Pass|Domestic Aviation|Liquids|Hydrogen",

    # Passenger Aviation
    "FE|Transport|Pass|Aviation|Hydrogen",
    "FE|Transport|Pass|Aviation|Liquids",
    "FE|Transport|Pass|Aviation|Liquids|Biomass",
    "FE|Transport|Pass|Aviation|Liquids|Fossil",
    "FE|Transport|Pass|Aviation|Liquids|Hydrogen",

    # Passenger Rail
    "FE|Transport|Pass|Rail|Hydrogen",
    "FE|Transport|Pass|Rail|Liquids",
    "FE|Transport|Pass|Rail|Liquids|Biomass",
    "FE|Transport|Pass|Rail|Liquids|Fossil",
    "FE|Transport|Pass|Rail|Liquids|Hydrogen",

    # Freight Rail
    "FE|Transport|Freight|Rail|Hydrogen",
    "FE|Transport|Freight|Rail|Liquids", 
    "FE|Transport|Freight|Rail|Liquids|Biomass",
    "FE|Transport|Freight|Rail|Liquids|Fossil",
    "FE|Transport|Freight|Rail|Liquids|Hydrogen",
]

In [92]:
def extract_main_and_fuel(category_str, categories):
    categories_sorted = sorted(categories, key=len, reverse=True)
    
    for cat_prefix in categories_sorted:
        if category_str.startswith(cat_prefix):
            # Fuel is whatever comes after the prefix (skip the '|')
            remainder = category_str[len(cat_prefix):]
            if remainder.startswith("|"):
                remainder = remainder[1:]  # remove leading '|'
            return cat_prefix, remainder
    # If no prefix matched, return None and full string as fuel
    return None, category_str

In [93]:
transport_filtered = transport_df[(transport_df["Country"] == country_code) & 
                                  (transport_df["Year"].isin(years))]

In [94]:
transport_filtered[["MainCategory", "Fuel"]] = transport_filtered["Category"].apply(lambda x: pd.Series(extract_main_and_fuel(x, categories)))
fuel_transport = transport_filtered[(transport_filtered["Fuel"].notna()) & 
                              (transport_filtered["Country"] == country_code)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  transport_filtered[["MainCategory", "Fuel"]] = transport_filtered["Category"].apply(lambda x: pd.Series(extract_main_and_fuel(x, categories)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  transport_filtered[["MainCategory", "Fuel"]] = transport_filtered["Category"].apply(lambda x: pd.Series(extract_main_and_fuel(x, categories)))


In [95]:
main_category_mapping = {
    "FE|Transport|Freight|Road|Heavy": "Freight Road",
    "FE|Transport|Freight|Road|Light": "Freight Road",
    "FE|Transport|Pass|Road|Bus": "Pass Road",
    "FE|Transport|Pass|Road|LDV|Four Wheelers": "Pass Road",
    "FE|Transport|Pass|Road|LDV|Two Wheelers": "Pass Road",
    "FE|Transport|Pass|Domestic Aviation": "Pass Aviation",
    "FE|Transport|Pass|Aviation": "Pass Aviation",
    "FE|Transport|Pass|Rail": "Pass Rail",
    "FE|Transport|Freight|Rail": "Freight Rail",
    "FE|Transport|Bunkers|Freight|International Shipping": "Maritime",
    "FE|Transport|Freight|Domestic Shipping": "Maritime",
}

In [96]:
fuel_transport["TransportSector"] = fuel_transport["MainCategory"].map(main_category_mapping)

In [97]:
fuel_transport["FuelGroup"] = fuel_transport["Fuel"].map(fuel_map)

In [98]:
fuel_transport = fuel_transport[fuel_transport["FuelGroup"].notna()]

In [99]:
agg_transport = fuel_transport.groupby(
    ["Year", "FuelGroup", "TransportSector"]
)["Value"].sum().reset_index()

In [100]:
pivot_transport = agg_transport.pivot_table(
    index=["FuelGroup", "Year"],
    columns="TransportSector",
    values="Value",
    aggfunc="sum"
).reset_index()
pivot_transport

TransportSector,FuelGroup,Year,Freight Rail,Freight Road,Maritime,Pass Aviation,Pass Rail,Pass Road
0,Biogenic Gases,2030,,5.035199e-05,,,,0.0001468597
1,Biogenic Gases,2040,,7.116808e-05,,,,0.000181119
2,Biogenic Gases,2050,,1.931652e-05,,,,3.081962e-05
3,Biogenic Liquids,2030,0.0004216652,0.007502952,0.0001424132,1.93582e-05,0.0002490546,0.008898802
4,Biogenic Liquids,2040,0.0005122548,0.005149383,0.0001870184,3.005616e-05,0.0004060001,0.003933003
5,Biogenic Liquids,2050,0.0006588408,0.001343317,0.0002539892,4.748745e-05,0.0007254509,0.000504343
6,Hydrogen,2030,,0.0001827231,,1.11523e-09,,0.0004855638
7,Hydrogen,2040,,0.0002938141,,9.335706e-08,,0.0023341
8,Hydrogen,2050,,0.0004748991,,1.582327e-07,,0.003237152
9,Synthetic Gases,2030,,0.0,,,,0.0


4. Prepare final excel file with one per each year and country merging all values

In [101]:
pivot_transport = pivot_transport.rename(columns={"TransportSector": "SectorGroup"})
pivot_industry = pivot_industry.rename(columns={"FuelGroup": "FuelGroup", "Year": "Year"})

In [102]:
pivot_industry["SectorGroup"] = "Industry"
pivot_transport["SectorGroup"] = "Transport"

In [103]:
column_order = [
    "Iron & steel", "Chemicals", "Non-metallic minerals",  # Industry
    "Pass Road", "Pass Rail", "Pass Aviation",             # Transport (passenger)
    "Freight Road", "Freight Rail",                        # Transport (freight)
    "Maritime"                                             # Maritime (bunkers)
]

In [104]:
all_columns = set(column_order + ["FuelGroup", "Year", "SectorGroup"])

# Add missing columns as None
for col in all_columns:
    if col not in pivot_transport.columns:
        pivot_transport[col] = None
    if col not in pivot_industry.columns:
        pivot_industry[col] = None

In [105]:
combined_df = pd.concat([pivot_transport, pivot_industry], ignore_index=True)

  combined_df = pd.concat([pivot_transport, pivot_industry], ignore_index=True)


In [106]:
# Ignore Overall Demand in this step
fuel_order = [
    "Hydrogen", "Methanol", "Ammonia", "Synthetic Gases",
    "Biogenic Gases", "Synthetic Liquids", "Biogenic Liquids",
    "Biomass [Solid]", "Renewable Energy Carrier"
]

In [107]:
for year in [2030, 2040, 2050]:
    # Calculate the sum for EVERY column (Industry AND Transport) for that year
    sector_totals = combined_df[(combined_df["Year"] == year) & 
                             (combined_df["FuelGroup"].isin(fuel_order))][column_order].sum()
    
    # Place that sum into the "Overall Demand" row for that year
    combined_df.loc[(combined_df["Year"] == year) & 
                 (combined_df["FuelGroup"] == "Overall Demand"), column_order] = sector_totals.values

In [108]:
fuel_order_map = {fuel: i for i, fuel in enumerate(fuel_order)}
combined_df["FuelSortOrder"] = combined_df["FuelGroup"].map(fuel_order_map)

In [109]:
final_df = combined_df.drop(columns=["SectorGroup", "FuelSortOrder"])  # drop SectorGroup
final_df = final_df.groupby(["FuelGroup", "Year"], as_index=False).sum()

In [110]:
def clean_zeros_and_nans(df):
    return df.applymap(lambda x: "" if pd.isna(x) or x == 0 else x)

In [111]:
clean_df = clean_zeros_and_nans(final_df)

  return df.applymap(lambda x: "" if pd.isna(x) or x == 0 else x)


In [112]:
ordered_cols = ["FuelGroup", "Year"] + column_order
final_correct_order = clean_df[ordered_cols]
final_correct_order["FuelSortOrder"] = final_correct_order["FuelGroup"].map(fuel_order_map)
final_correct_order = final_correct_order.sort_values(by=["Year", "FuelSortOrder"]).drop(columns=["FuelSortOrder"])
final_correct_order

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_correct_order["FuelSortOrder"] = final_correct_order["FuelGroup"].map(fuel_order_map)


Unnamed: 0,FuelGroup,Year,Iron & steel,Chemicals,Non-metallic minerals,Pass Road,Pass Rail,Pass Aviation,Freight Road,Freight Rail,Maritime
12,Hydrogen,2030,0.005276,,,0.000486,,0.0,0.000183,,
15,Methanol,2030,,0.000122,,,,,,,
0,Ammonia,2030,,0.00873,,,,,,,
24,Synthetic Gases,2030,,,,,,,,,
3,Biogenic Gases,2030,,,0.001482,0.000147,,,5e-05,,
27,Synthetic Liquids,2030,,,,,,,,,
6,Biogenic Liquids,2030,,,,0.008899,0.000249,1.9e-05,0.007503,0.000422,0.000142
9,Biomass [Solid],2030,,,0.012366,,,,,,
21,Renewable Energy Carrier,2030,,,0.000457,,,,,,
18,Overall Demand,2030,0.005276,0.008852,0.014305,0.009531,0.000249,1.9e-05,0.007736,0.000422,0.000142


In [113]:
final_correct_order.to_excel(output_path, index=False, na_rep="")