
# Legacy inputs from a GDX file
Run the cells below to convert a legacy `.gdx` input file into the CSV directory tree expected by the old workflow.



## Step 1 · Load dependencies
Import the required libraries. Ensure `gams.transfer` is available in your environment.


In [9]:

from pathlib import Path
from typing import Optional

import pandas as pd

try:
    import gams.transfer as gt
except ImportError as err:
    raise ImportError("Install the GAMS Python API before running this notebook.") from err



## Step 2 · Configure paths
Keep all exports inside the local `./output` folder. Update the placeholders before continuing.


In [10]:

# TODO: point this to your legacy GDX file
GDX_PATH = Path("input/input.gdx")  # default sample path

# TODO: choose a subfolder inside ./output for the generated CSV files
OUTPUT_BASE = Path("output")
TARGET_FOLDER = "data_test_from_gdx"  # update if you prefer a different name

# Mapping table that translates legacy GDX symbols into the expected CSV names
MAPPING_PATH = Path("input/symbol_mapping.csv")

OVERWRITE = True  # set False to retain existing CSV files

if not MAPPING_PATH.exists():
    raise FileNotFoundError(f"Missing mapping table: {MAPPING_PATH}. Populate it before continuing.")

EXPORT_ROOT = (OUTPUT_BASE / TARGET_FOLDER).resolve()
EXPECTED_ROOT = (Path.cwd() / "output").resolve()
if not GDX_PATH.exists():
    raise FileNotFoundError(f"Update GDX_PATH to point to your legacy file. Missing: {GDX_PATH}")
try:
    EXPORT_ROOT.relative_to(EXPECTED_ROOT)
except ValueError as exc:
    raise ValueError("Choose TARGET_FOLDER inside the ./output directory.") from exc

EXPORT_ROOT.mkdir(parents=True, exist_ok=True)
EXTRAS_ROOT = (EXPORT_ROOT / "extras")
EXTRAS_ROOT.mkdir(parents=True, exist_ok=True)



## Step 3 · Legacy CSV layout (hard-coded)
The structure below mirrors the historical `input_readers.gms` file. No parsing is performed at runtime.


In [11]:
CSV_LAYOUT = [{'column_names': ['year', 'value'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pCarbonPrice',
  'relative_path': 'constraint/pCarbonPrice.csv',
  'symbols': ['pCarbonPrice'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pEmissionsCountry',
  'relative_path': 'constraint/pEmissionsCountry.csv',
  'symbols': ['pEmissionsCountry'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['y', 'value'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pEmissionsTotal',
  'relative_path': 'constraint/pEmissionsTotal.csv',
  'symbols': ['pEmissionsTotal'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['zone',
                   'fuel',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pMaxFuellimit',
  'relative_path': 'constraint/pMaxFuellimit.csv',
  'symbols': ['pMaxFuellimit'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['', 'dry', 'wet'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pAvailabilityH2',
  'relative_path': 'h2/pAvailabilityH2.csv',
  'symbols': ['pAvailabilityH2'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pCapexTrajectoryH2',
  'relative_path': 'h2/pCapexTrajectoryH2.csv',
  'symbols': ['pCapexTrajectoryH2'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['ZONE',
                   'Season',
                   '2020',
                   '2021',
                   '2022',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pExternalH2',
  'relative_path': 'h2/pExternalH2.csv',
  'symbols': ['pExternalH2'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['Type of fuel', 'Hydrogen index'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pFuelDataH2',
  'relative_path': 'h2/pFuelDataH2.csv',
  'symbols': ['pFuelDataH2'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['gen',
                   'StYr',
                   'RetrYr',
                   'Zone',
                   'Capacity',
                   'MinLimitShare',
                   'HeatRate',
                   'RampUpRate',
                   'RampDnRate',
                   'OverLoadFactor',
                   'ResLimShare',
                   'FOMperMW',
                   'VOM',
                   'ReserveCost',
                   'Capex',
                   'Life',
                   'BuildLimitperYear',
                   'MaxTotalBuild',
                   'DescreteCap',
                   'Type',
                   'fuel1',
                   'fuel2',
                   'Status',
                   'HeatRate2',
                   'VRETech',
                   'UnitSize',
                   'pvwsto',
                   'stowpv',
                   'RE_Flag',
                   'CapacityCredit',
                   'Error'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pH2DataExcel',
  'relative_path': 'h2/pH2DataExcel.csv',
  'symbols': ['pH2DataExcel'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'q',
                   'd',
                   'y',
                   't04',
                   't05',
                   't06',
                   't07',
                   't08',
                   't09',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2, 3, 4],
  'primary_symbol': 'pDemandData',
  'relative_path': 'load/pDemandData.csv',
  'symbols': ['pDemandData'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone', 'type', '2025', '2030', '2035', '2040', '2045', '2050'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pDemandForecast',
  'relative_path': 'load/pDemandForecast.csv',
  'symbols': ['pDemandForecast'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'season',
                   'daytype',
                   't1',
                   't2',
                   't3',
                   't4',
                   't5',
                   't6',
                   't7',
                   't8',
                   't9',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pDemandProfile',
  'relative_path': 'load/pDemandProfile.csv',
  'symbols': ['pDemandProfile'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pEnergyEfficiencyFactor',
  'relative_path': 'load/pEnergyEfficiencyFactor.csv',
  'symbols': ['pEnergyEfficiencyFactor'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['d'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'sRelevant',
  'relative_path': 'load/sRelevant.csv',
  'symbols': ['sRelevant'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['season',
                   'daytype',
                   't1',
                   't2',
                   't3',
                   't4',
                   't5',
                   't6',
                   't7',
                   't8',
                   't9',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pHours',
  'relative_path': 'pHours.csv',
  'symbols': ['pHours'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['Parameter', 'Abbreviation', 'Value'],
  'header': [],
  'indexColumns': [2],
  'primary_symbol': 'pSettings',
  'relative_path': 'pSettings.csv',
  'symbols': ['pSettings'],
  'type': 'par',
  'valueColumns': [3]},
 {'column_names': ['',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pPlanningReserveMargin',
  'relative_path': 'reserve/pPlanningReserveMargin.csv',
  'symbols': ['pPlanningReserveMargin'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pSpinningReserveReqCountry',
  'relative_path': 'reserve/pSpinningReserveReqCountry.csv',
  'symbols': ['pSpinningReserveReqCountry'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['Year', 'MW'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pSpinningReserveReqSystem',
  'relative_path': 'reserve/pSpinningReserveReqSystem.csv',
  'symbols': ['pSpinningReserveReqSystem'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['Fuel', 'Index'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'ftfindex',
  'relative_path': 'resources/ftfindex.csv',
  'symbols': ['ftfindex'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['Fuel', 'value'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pFuelCarbonContent',
  'relative_path': 'resources/pFuelCarbonContent.csv',
  'symbols': ['pFuelCarbonContent'],
  'type': 'par',
  'valueColumns': [2]},
 {'column_names': ['pGenDataInputHeader'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pGenDataInputHeader',
  'relative_path': 'resources/pGenDataInputHeader.csv',
  'symbols': ['pGenDataInputHeader'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['pH2Header'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pH2Header',
  'relative_path': 'resources/pH2Header.csv',
  'symbols': ['pH2Header'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['Abbreviation'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pSettingsHeader',
  'relative_path': 'resources/pSettingsHeader.csv',
  'symbols': ['pSettingsHeader'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['pStoreDataHeader'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'pStoreDataHeader',
  'relative_path': 'resources/pStoreDataHeader.csv',
  'symbols': ['pStoreDataHeader'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['Technology', 'Hourly Variation', 'RE Technology'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pTechData',
  'relative_path': 'resources/pTechData.csv',
  'symbols': ['pTechData'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen', 'Q1', 'Q2'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pAvailability',
  'relative_path': 'supply/pAvailabilityCustom.csv',
  'symbols': ['pAvailability'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone', 'tech', 'fuel', 'Q1', 'Q2'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pAvailabilityDefault',
  'relative_path': 'supply/pAvailabilityDefault.csv',
  'symbols': ['pAvailabilityDefault'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen', 'attribute', 'Capacity', 'Capex', 'FixedOM', 'VOM', 'Availability', 'Efficiency'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pCSPData',
  'relative_path': 'supply/pCSPData.csv',
  'symbols': ['pCSPData'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pCapexTrajectories',
  'relative_path': 'supply/pCapexTrajectoriesCustom.csv',
  'symbols': ['pCapexTrajectories'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'tech',
                   'fuel',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pCapexTrajectoriesDefault',
  'relative_path': 'supply/pCapexTrajectoriesDefault.csv',
  'symbols': ['pCapexTrajectoriesDefault'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'fuel',
                   '2019',
                   '2020',
                   '2021',
                   '2022',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2035',
                   '2040'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pFuelPrice',
  'relative_path': 'supply/pFuelPrice.csv',
  'symbols': ['pFuelPrice'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen',
                   'zone',
                   'tech',
                   'fuel',
                   'Status',
                   'StYr',
                   'RetrYr',
                   'Capacity',
                   'DescreteCap',
                   'fuel2',
                   'HeatRate2',
                   'BuildLimitperYear',
                   'Life',
                   'MinLimitShare',
                   'HeatRate',
                   'RampUpRate',
                   'RampDnRate',
                   'OverLoadFactor',
                   'ResLimShare',
                   'Capex',
                   'FOMperMW',
                   'VOM',
                   'ReserveCost'],
  'header': [1],
  'indexColumns': [1, 2, 3, 4],
  'primary_symbol': 'pGenDataInput',
  'relative_path': 'supply/pGenDataInput.csv',
  'symbols': ['gmap', 'pGenDataInput'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'tech',
                   'fuel',
                   'MinLimitShare',
                   'HeatRate',
                   'RampUpRate',
                   'RampDnRate',
                   'OverLoadFactor',
                   'ResLimShare',
                   'FOMperMW',
                   'VOM',
                   'ReserveCost',
                   'Capex',
                   'Life'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pGenDataInputDefault',
  'relative_path': 'supply/pGenDataInputDefault.csv',
  'symbols': ['pGenDataInputDefault'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen', 'Linked plant', 'CapacityMWh', 'CapexMWh', 'VOMMWh', 'FixedOMMWh', 'Efficiency'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pStorDataExcel',
  'relative_path': 'supply/pStorDataExcel.csv',
  'symbols': ['pStorDataExcel'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone',
                   'tech',
                   'season',
                   'daytype',
                   't1',
                   't2',
                   't3',
                   't4',
                   't5',
                   't6',
                   't7',
                   't8',
                   't9',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2, 3, 4],
  'primary_symbol': 'pVREProfile',
  'relative_path': 'supply/pVREProfile.csv',
  'symbols': ['pVREProfile'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['gen',
                   'q',
                   'd',
                   't1',
                   't2',
                   't3',
                   't4',
                   't5',
                   't6',
                   't7',
                   't8',
                   't9',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pVREgenProfile',
  'relative_path': 'supply/pVREgenProfile.csv',
  'symbols': ['pVREgenProfile'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['Internal zone',
                   'External zone',
                   'Seasons',
                   'Import-Export',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040'],
  'header': [1],
  'indexColumns': [1, 2, 3, 4],
  'primary_symbol': 'pExtTransferLimit',
  'relative_path': 'trade/pExtTransferLimit.csv',
  'symbols': ['pExtTransferLimit'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone1',
                   'zone2',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pLossFactorInternal',
  'relative_path': 'trade/pLossFactorInternal.csv',
  'symbols': ['pLossFactorInternal'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['y',
                   'Angola',
                   'Burundi',
                   'Cameroon',
                   'CAR',
                   'Chad',
                   'Congo',
                   'DRC',
                   'EquatorialGuinea',
                   'Gabon',
                   'Rwanda',
                   'STP'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pMaxAnnualExternalTradeShare',
  'relative_path': 'trade/pMaxAnnualExternalTradeShare.csv',
  'symbols': ['pMaxAnnualExternalTradeShare'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['y',
                   'Angola',
                   'Burundi',
                   'Cameroon',
                   'CAR',
                   'Chad',
                   'Congo',
                   'DRC',
                   'EquatorialGuinea',
                   'Gabon',
                   'Rwanda',
                   'STP'],
  'header': [1],
  'indexColumns': [1],
  'primary_symbol': 'pMaxPriceImportShare',
  'relative_path': 'trade/pMaxPriceImportShare.csv',
  'symbols': ['pMaxPriceImportShare'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone1',
                   'zone2',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pMinImport',
  'relative_path': 'trade/pMinImport.csv',
  'symbols': ['pMinImport'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['From',
                   'To',
                   'EarliestEntry',
                   'MaximumNumOfLines',
                   'CapacityPerLine',
                   'CostPerLine',
                   'Life',
                   'Status'],
  'header': [1],
  'indexColumns': [1, 2],
  'primary_symbol': 'pNewTransmission',
  'relative_path': 'trade/pNewTransmission.csv',
  'symbols': ['pNewTransmission'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zext',
                   'q',
                   'daytype',
                   'y',
                   't1',
                   't2',
                   't3',
                   't4',
                   't5',
                   't6',
                   't7',
                   't8',
                   't9',
                   't10',
                   't11',
                   't12',
                   't13',
                   't14',
                   't15',
                   't16',
                   't17',
                   't18',
                   't19',
                   't20',
                   't21',
                   't22',
                   't23',
                   't24'],
  'header': [1],
  'indexColumns': [1, 2, 3, 4],
  'primary_symbol': 'pTradePrice',
  'relative_path': 'trade/pTradePrice.csv',
  'symbols': ['pTradePrice'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['From',
                   'To',
                   'q',
                   '2023',
                   '2024',
                   '2025',
                   '2026',
                   '2027',
                   '2028',
                   '2029',
                   '2030',
                   '2031',
                   '2032',
                   '2033',
                   '2034',
                   '2035',
                   '2036',
                   '2037',
                   '2038',
                   '2039',
                   '2040',
                   '2041',
                   '2042',
                   '2043',
                   '2044',
                   '2045',
                   '2046',
                   '2047',
                   '2048',
                   '2049',
                   '2050'],
  'header': [1],
  'indexColumns': [1, 2, 3],
  'primary_symbol': 'pTransferLimit',
  'relative_path': 'trade/pTransferLimit.csv',
  'symbols': ['pTransferLimit'],
  'type': 'par',
  'valueColumns': []},
 {'column_names': ['zone'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'zext',
  'relative_path': 'trade/zext.csv',
  'symbols': ['zext'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['y'],
  'header': [],
  'indexColumns': [1],
  'primary_symbol': 'y',
  'relative_path': 'y.csv',
  'symbols': ['y'],
  'type': 'set',
  'valueColumns': []},
 {'column_names': ['zone', 'country'],
  'header': [],
  'indexColumns': [1, 2],
  'primary_symbol': 'zcmap',
  'relative_path': 'zcmap.csv',
  'symbols': ['zcmap'],
  'type': 'set',
  'valueColumns': []}]

LAYOUT_BY_SYMBOL = {}
for entry in CSV_LAYOUT:
    for symbol in entry['symbols']:
        LAYOUT_BY_SYMBOL[symbol] = entry

EXPECTED_SYMBOLS = {entry['primary_symbol'] for entry in CSV_LAYOUT}
OPTIONAL_SYMBOLS = {
    'ftfindex',
    'pAvailability',
    'pAvailabilityDefault',
    'pAvailabilityH2',
    'pCSPData',
    'pCapexTrajectories',
    'pCapexTrajectoriesDefault',
    'pCapexTrajectoryH2',
    'pCarbonPrice',
    'pDemandData',
    'pDemandForecast',
    'pDemandProfile',
    'pEmissionsCountry',
    'pEmissionsTotal',
    'pEnergyEfficiencyFactor',
    'pExtTransferLimit',
    'pExternalH2',
    'pFuelCarbonContent',
    'pFuelDataH2',
    'pFuelPrice',
    'pGenDataInputDefault',
    'pH2DataExcel',
    'pHours',
    'pLossFactorInternal',
    'pMaxFuellimit',
    'pMaxPriceImportShare',
    'pMinImport',
    'pNewTransmission',
    'pPlanningReserveMargin',
    'pSpinningReserveReqCountry',
    'pSpinningReserveReqSystem',
    'pStorDataExcel',
    'pTechData',
    'pTradePrice',
    'pTransferLimit',
    'pVREProfile',
    'pVREgenProfile',
    'y',
    'zcmap',
    'zext',
}
CRITICAL_SYMBOLS_WITH_COLUMNS = {
    'pGenDataInput',
    'pGenDataInputDefault',
}



## Step 4 · Load the symbol mapping
The mapping table lives in `input/symbol_mapping.csv`. Edit it when a legacy GDX uses different symbol names. By default each CSV expects the same-named symbol. Optional inputs such as `pCarbonPrice` or `pEmissionsCountry` must still have an empty CSV with the correct headers, so keep those rows in the mapping even if the legacy GDX omits them.


In [12]:

symbol_mapping_df = pd.read_csv(MAPPING_PATH)
if {'csv_symbol', 'gdx_symbol'} - set(symbol_mapping_df.columns):
    raise ValueError("symbol_mapping.csv must contain 'csv_symbol' and 'gdx_symbol' columns")

symbol_mapping_df = symbol_mapping_df.fillna('').drop_duplicates(subset='csv_symbol', keep='last')
symbol_mapping = {
    row.csv_symbol: (row.gdx_symbol or row.csv_symbol)
    for row in symbol_mapping_df.itertuples()
}

missing_mapping_rows = [entry['primary_symbol'] for entry in CSV_LAYOUT if entry['primary_symbol'] not in symbol_mapping]
if missing_mapping_rows:
    print("Mapping table missing entries for:", ', '.join(missing_mapping_rows))



## Step 5 · Read the GDX file
Load the legacy GDX once so the symbols are available for export.


In [13]:

container = gt.Container()
container.read(str(GDX_PATH.resolve()))

loaded_symbols = set(container.data.keys())
len(loaded_symbols)


49


## Step 6 · Helper functions
These utilities reshape each symbol so it follows the legacy CSV conventions.


In [14]:

def find_value_column(df: pd.DataFrame) -> Optional[str]:
    """Return the canonical value column name, if present."""
    for candidate in ("value", "Value"):
        if candidate in df.columns:
            return candidate
    return None

def format_set(df: pd.DataFrame) -> pd.DataFrame:
    """Return distinct set elements sorted by all columns."""
    if not list(df.columns):
        return df.copy().reset_index(drop=True)
    return df.drop_duplicates().sort_values(list(df.columns)).reset_index(drop=True)

def format_header_table(df: pd.DataFrame, spec: dict) -> pd.DataFrame:
    """Pivot a parameter that contains header columns into the expected layout."""
    value_col = find_value_column(df)
    domain_cols = [col for col in df.columns if col != value_col]
    index_cols = domain_cols[: len(spec['indexColumns'])]
    header_cols = domain_cols[len(index_cols): len(index_cols) + len(spec['header'])]
    base_cols = index_cols + header_cols + ([value_col] if value_col else [])
    data = df[base_cols] if base_cols else df.copy()
    if header_cols and value_col:
        pivot = data.pivot_table(
            index=index_cols,
            columns=header_cols,
            values=value_col,
            aggfunc='first',
            observed=False,
        )
        pivot.columns = [col if isinstance(col, str) else '_'.join(map(str, col)) for col in pivot.columns]
        return pivot.reset_index().reset_index(drop=True)
    if value_col:
        return data.rename(columns={value_col: 'value'}).reset_index(drop=True)
    return data.reset_index(drop=True)

def format_value_table(df: pd.DataFrame, spec: dict, csv_symbol: str, gdx_symbol: str) -> pd.DataFrame:
    """Return index columns plus the numeric value column."""
    value_col = find_value_column(df)
    if value_col is None:
        raise KeyError(f"No value column found for '{gdx_symbol}' mapped to '{csv_symbol}'")
    domain_cols = [col for col in df.columns if col != value_col]
    index_cols = domain_cols[: len(spec['indexColumns'])]
    columns = index_cols + [value_col]
    return df[columns].reset_index(drop=True)

def build_frame(gdx_symbol: str, csv_symbol: str, spec: dict) -> Optional[pd.DataFrame]:
    """Fetch and reshape a single symbol using the provided spec."""
    if gdx_symbol not in container:
        return None
    records = container[gdx_symbol].records
    if records is None:
        return None
    data = records.copy()
    if spec['type'] == 'set':
        return format_set(data)
    if spec['valueColumns']:
        return format_value_table(data, spec, csv_symbol, gdx_symbol)
    return format_header_table(data, spec)

def fallback_frame(gdx_symbol: str) -> Optional[pd.DataFrame]:
    """Return a plain DataFrame export for symbols without legacy specs."""
    if gdx_symbol not in container:
        return None
    records = container[gdx_symbol].records
    if records is None:
        return None
    return records.copy().reset_index(drop=True)

def empty_frame_from_spec(entry: dict) -> pd.DataFrame:
    """Return an empty frame using the legacy column names."""
    columns = [col for col in entry.get('column_names', [])]
    return pd.DataFrame(columns=columns)



## Step 7 · Write the legacy CSV tree
Iterate over the hard-coded layout first, then place any extra symbols under `extras/`.


In [15]:

summary = []
extras_written = []
skipped = []
missing_in_gdx = []
optional_stubbed = []
empty_in_gdx = []
column_warnings = []
used_gdx_symbols = set()

for entry in CSV_LAYOUT:
    csv_symbol = entry['primary_symbol']
    gdx_symbol = symbol_mapping.get(csv_symbol, csv_symbol)
    expected_cols = [col for col in entry.get('column_names', [])]

    frame = build_frame(gdx_symbol, csv_symbol, entry)
    stubbed_optional = False
    if frame is None:
        if csv_symbol in OPTIONAL_SYMBOLS:
            frame = empty_frame_from_spec(entry)
            stubbed_optional = True
            optional_stubbed.append((csv_symbol, gdx_symbol))
        else:
            missing_in_gdx.append((csv_symbol, gdx_symbol))
            continue
    else:
        frame = frame.copy()

    if not stubbed_optional and frame.empty:
        empty_in_gdx.append((csv_symbol, gdx_symbol))

    if expected_cols:
        missing_cols = [col for col in expected_cols if col not in frame.columns]
        for col in missing_cols:
            frame[col] = pd.NA
        if missing_cols and csv_symbol in CRITICAL_SYMBOLS_WITH_COLUMNS:
            column_warnings.append((csv_symbol, missing_cols))
        frame = frame.reindex(columns=expected_cols)

    relative_path = Path(entry['relative_path'])
    target_path = EXPORT_ROOT / relative_path
    target_path.parent.mkdir(parents=True, exist_ok=True)
    if not OVERWRITE and target_path.exists():
        skipped.append(target_path)
        continue

    frame.to_csv(target_path, index=False, na_rep='')
    summary.append({
        'csv_symbol': csv_symbol,
        'gdx_symbol': gdx_symbol,
        'rows': len(frame),
        'path': target_path.relative_to(EXPORT_ROOT).as_posix(),
    })

    if not stubbed_optional:
        used_gdx_symbols.add(gdx_symbol)
        for alias in entry['symbols']:
            used_gdx_symbols.add(symbol_mapping.get(alias, alias))

extras_candidates = sorted(loaded_symbols - used_gdx_symbols)
for gdx_symbol in extras_candidates:
    frame = fallback_frame(gdx_symbol)
    if frame is None:
        continue
    target_path = EXTRAS_ROOT / f"{gdx_symbol}.csv"
    if not OVERWRITE and target_path.exists():
        skipped.append(target_path)
        continue
    frame.to_csv(target_path, index=False, na_rep='')
    extras_written.append({
        'csv_symbol': '',
        'gdx_symbol': gdx_symbol,
        'rows': len(frame),
        'path': target_path.relative_to(EXPORT_ROOT).as_posix(),
    })

pd.DataFrame(summary + extras_written).sort_values('path')


Unnamed: 0,csv_symbol,gdx_symbol,rows,path
0,pCarbonPrice,pCarbonPrice,0,constraint/pCarbonPrice.csv
1,pEmissionsCountry,pEmissionsCountry,0,constraint/pEmissionsCountry.csv
2,pEmissionsTotal,pEmissionsTotal,0,constraint/pEmissionsTotal.csv
3,pMaxFuellimit,pMaxFuellimit,0,constraint/pMaxFuellimit.csv
4,pAvailabilityH2,pAvailabilityH2,3,h2/pAvailabilityH2.csv
5,pCapexTrajectoryH2,pCapexTrajectoryH2,3,h2/pCapexTrajectoryH2.csv
6,pExternalH2,pExternalH2,2,h2/pExternalH2.csv
7,pFuelDataH2,pFuelDataH2,1,h2/pFuelDataH2.csv
8,pH2DataExcel,pH2DataExcel,0,h2/pH2DataExcel.csv
9,pDemandData,pDemandData,0,load/pDemandData.csv



## Step 8 · Review any messages
Understand which symbols were missing, skipped, or written to the fallback `extras/` folder.


In [16]:

if missing_mapping_rows:
    print("Mapping rows missing for:", ', '.join(sorted(missing_mapping_rows)))
if missing_in_gdx:
    formatted = ', '.join(f"{csv} (expected '{gdx}')" for csv, gdx in sorted(missing_in_gdx))
    print("Symbols missing in GDX:", formatted)
if optional_stubbed:
    formatted = ', '.join(f"{csv} (stubbed as '{gdx}')" for csv, gdx in sorted(optional_stubbed))
    print("Optional symbols absent in GDX; wrote empty CSV with headers:", formatted)
if empty_in_gdx:
    formatted = ', '.join(f"{csv} (mapped to '{gdx}')" for csv, gdx in sorted(empty_in_gdx))
    print("Symbols present in GDX but empty:", formatted)
if column_warnings:
    for csv_symbol, cols in column_warnings:
        print(f"Missing columns in {csv_symbol}: {', '.join(cols)}")
if extras_written:
    print("Extras written:")
    for item in extras_written:
        print(f"  - {item['gdx_symbol']} -> {item['path']}")
if skipped:
    print("Skipped existing files (set OVERWRITE = True to replace them):")
    for path in skipped:
        print(f"  - {path.relative_to(EXPORT_ROOT)}")


Symbols missing in GDX: sRelevant (expected 'sRelevant')
Optional symbols absent in GDX; wrote empty CSV with headers: pCSPData (stubbed as 'pCSPData'), pCapexTrajectories (stubbed as 'pCapexTrajectories'), pCarbonPrice (stubbed as 'pCarbonPrice'), pDemandData (stubbed as 'pDemandData'), pEmissionsCountry (stubbed as 'pEmissionsCountry'), pEmissionsTotal (stubbed as 'pEmissionsTotal'), pEnergyEfficiencyFactor (stubbed as 'pEnergyEfficiencyFactor'), pH2DataExcel (stubbed as 'pH2DataExcel'), pMaxFuellimit (stubbed as 'pMaxFuellimit'), pMinImport (stubbed as 'pMinImport'), pSpinningReserveReqCountry (stubbed as 'pSpinningReserveReqCountry'), pSpinningReserveReqSystem (stubbed as 'pSpinningReserveReqSystem')
Missing columns in pGenDataInput: DescreteCap, fuel2, HeatRate2, MinLimitShare, HeatRate, RampUpRate, RampDnRate, OverLoadFactor, ResLimShare, VOM, ReserveCost
