In [None]:
import pandas as pd
from datetime import datetime
from itertools import product

# Load the CSV
df = pd.read_csv("life-expectancy.csv")

# Simplify to needed columns
df_simple = df[["DateCode", "GeographyCode", "Sex", "Value"]]

# Make values numeric if needed
df_simple["Value"] = pd.to_numeric(df_simple["Value"], errors="coerce")

# Fix duplicates by grouping and averaging
df_simple = df_simple.groupby(["Sex", "DateCode", "GeographyCode"], as_index=False).mean()

# Create all combinations of the 3 dimensions
sex_vals = sorted(df_simple["Sex"].dropna().unique())
date_vals = sorted(df_simple["DateCode"].dropna().unique())
geo_vals = sorted(df_simple["GeographyCode"].dropna().unique())

# Build complete multiindex and reindex
full_index = pd.MultiIndex.from_product(
    [sex_vals, date_vals, geo_vals],
    names=["Sex", "DateCode", "GeographyCode"]
)
indexed_df = df_simple.set_index(["Sex", "DateCode", "GeographyCode"]).reindex(full_index)

# Generate data values
data_values = [
    '".."' if pd.isna(v) else str(round(v, 1))
    for v in indexed_df["Value"]
]

# Metadata
creation_date = datetime.today().strftime("%Y%m%d %H:%M")
header = f"""CHARSET="UTF-16";
AXIS-VERSION="2013";
LANGUAGE="en";
CREATION-DATE="{creation_date}";
MATRIX="LIFEEXP_SIMPLE_V2";
DECIMALS=1;
SUBJECT-AREA="Life Expectancy";
SUBJECT-CODE="LE01";
CONTENTS="Life Expectancy";
TITLE="Life Expectancy by Year, Region, and Sex";
UNITS="Years";
STUB="Sex";
HEADING="DateCode","GeographyCode";
SOURCE="National Records of Scotland";
"""

# VALUES + CODES blocks
def px_values_and_codes(name, values):
    quoted_vals = ",".join(f'"{v}"' for v in values)
    quoted_codes = ",".join(f'"{i+1:02d}"' for i in range(len(values)))
    return f'VALUES("{name}")={quoted_vals};\nCODES("{name}")={quoted_codes};\n'

meta_parts = "".join([
    px_values_and_codes("Sex", sex_vals),
    px_values_and_codes("DateCode", date_vals),
    px_values_and_codes("GeographyCode", geo_vals)
])

# Save to file
with open("life-expectancy-output.px", "w", encoding="utf-16") as f:
    f.write(header)
    f.write(meta_parts)
    f.write("DATA=\n" + " ".join(data_values) + ";")

print("✅ PX file saved as: life-expectancy-output.px")


ValueError: cannot handle a non-unique multi-index!