In [None]:
from pathlib import Path
import pandas as pd

# Load The dataset

# --- Paths ---
PROJECT_DIR = Path("/files") / "sustainability-economic-performance"
DATA_PATH = PROJECT_DIR / "data" / "processed" / "panel_50_countries.csv"

print("Reading:", DATA_PATH)

# --- Load ---
df_50 = pd.read_csv(DATA_PATH)

# --- Quick checks ---
print("Shape:", df_50.shape)
print("\nColumns:", df_50.columns.tolist())
df_50.head(16)

In [None]:
# Define to which ESG category each indicator belongs: Indicator
# Define the direction of each indicator: +1 = higher value is better, -1 = higher value is worse

ESG_MAP = {
    # --- Environmental (E) ---
    "CO2 emissions (metric tons per capita)": {
        "category": "E", "direction": -1
    },
    "Methane emissions (metric tons of CO2 equivalent per capita)": {
        "category": "E", "direction": -1
    },
    "Nitrous oxide emissions (metric tons of CO2 equivalent per capita)": {
        "category": "E", "direction": -1
    },
    "Fossil fuel energy consumption (% of total)": {
        "category": "E", "direction": -1
    },
    "Renewable energy consumption (% of total final energy consumption)": {
        "category": "E", "direction": +1
    },
    "Renewable electricity output (% of total electricity output)": {
        "category": "E", "direction": +1
    },

    # --- Social (S) ---
    "Unemployment, total (% of total labor force) (modeled ILO estimate)": {
        "category": "S", "direction": -1
    },
    "Gini index": {
        "category": "S", "direction": -1
    },
    "Economic and Social Rights Performance Score": {
        "category": "S", "direction": +1
    },

    # --- Governance (G) ---
    "Control of Corruption: Estimate": {
        "category": "G", "direction": +1
    },
    "Political Stability and Absence of Violence/Terrorism: Estimate": {
        "category": "G", "direction": +1
    },
}

# Quick sanity check
print("Indicators in ESG_MAP:", len(ESG_MAP))
for k, v in ESG_MAP.items():
    print(f"- {v['category']} ({v['direction']:+d}): {k}")

In [None]:
import numpy as np
import pandas as pd

# ----- Standardize the indicators -----

# Temporarly keep only ESG indicators in the dataset
df_esg = df_50[df_50["Indicator"].isin(ESG_MAP.keys())].copy()

# Apply direction (make "higher = better" for all indicators)
df_esg["direction"] = df_esg["Indicator"].map(
    lambda x: ESG_MAP[x]["direction"]
)

df_esg["value_signed"] = df_esg["Value"] * df_esg["direction"]

# Standardize within each indicator (z-score)
df_esg["value_z"] = (
    df_esg
    .groupby("Indicator")["value_signed"]
    .transform(lambda x: (x - x.mean()) / x.std())
)

# Quick sanity checks
print("Rows in ESG working dataset:", df_esg.shape[0])
df_esg[[
    "Country Name", "Year", "Indicator",
    "Value", "direction", "value_z"
]].head(11)

In [None]:
# ----- Build the ESG indices -----

# Map each indicator to its ESG category using the list made before
df_esg["ESG_category"] = df_esg["Indicator"].map(lambda x: ESG_MAP[x]["category"])

# Compute ESG indices as mean of standardized indicators
esg_indices = (
    df_esg
    .groupby(["Country Name", "Country Code", "Year", "ESG_category"])["value_z"]
    .mean()
    .reset_index()
)

# Pivot the dataset to wide format: one row per countryâ€“year
esg_indices_wide = (
    esg_indices
    .pivot(
        index=["Country Name", "Country Code", "Year"],
        columns="ESG_category",
        values="value_z",
    )
    .reset_index()
)

# Remove the column-index name ("ESG_category") so it doesn't appear above the table
esg_indices_wide.columns.name = None

# Rename columns for clarity
esg_indices_wide = esg_indices_wide.rename(columns={
    "E": "ENV_index",
    "S": "SOC_index",
    "G": "GOV_index"
})

# Sort the dataset by country code for consistency (Country Code, then Year) + clean integer index
esg_indices_wide = (
    esg_indices_wide
    .sort_values(["Country Code", "Year"])
    .reset_index(drop=True)
)

# Quick checks
print("Rows in ESG index dataset:", esg_indices_wide.shape[0])
esg_indices_wide.head(10)

In [None]:
import pandas as pd

# ----- Merge the economic indicators with the ESG indices to get the full dataset for the regression -----

ECON_INDICATORS = [
    "GDP growth (annual %)",
    "GDP per capita (constant 2015 US$)",
    "Inflation, consumer prices (annual %)",
    "Foreign direct investment, net inflows (% of GDP)",
    "Research and development expenditure (% of GDP)",
]

df_econ = (
    df_50[df_50["Indicator"].isin(ECON_INDICATORS)]
    [["Country Name", "Country Code", "Year", "Indicator", "Value", "Region", "Income Group"]]
    .copy()
)

# Pivot economic indicators to wide (one row per country-year)
econ_wide = (
    df_econ
    .pivot_table(
        index=["Country Name", "Country Code", "Year", "Region", "Income Group"],
        columns="Indicator",
        values="Value",
        aggfunc="mean" 
    )
    .reset_index()
)

# Remove column index name produced by pivot_table (this is just cosmetic)
econ_wide.columns.name = None

# Rename economic columns to short names (makes regression formulas nicer)
econ_wide = econ_wide.rename(columns={
    "GDP growth (annual %)": "gdp_growth",
    "GDP per capita (constant 2015 US$)": "gdp_per_capita",
    "Inflation, consumer prices (annual %)": "inflation",
    "Foreign direct investment, net inflows (% of GDP)": "fdi_inflows",
    "Research and development expenditure (% of GDP)": "R&D_expenditure",
})

# Merge ESG indices (ENV_index, SOC_index, GOV_index) onto the economic wide table
reg_df = econ_wide.merge(
    esg_indices_wide,
    on=["Country Name", "Country Code", "Year"],
    how="left"
)

# Sort the dataset by country code for consistency
reg_df = reg_df.sort_values(["Country Code", "Year"]).reset_index(drop=True)

# Quick checks
print("Regression dataset shape:", reg_df.shape)
print("\nColumns:", list(reg_df.columns))
reg_df.head(5)

In [None]:
# Rename the country code column to make the regression code work
reg_df = reg_df.rename(columns={"Country Code": "country_code"})

reg_df.head(5)

In [None]:
from pathlib import Path

# ----- Save the final dataset for the fixed effects regression -----

# Define project root
PROJECT_ROOT = Path("/files/sustainability-economic-performance")

# Path
processed_dir = PROJECT_ROOT / "data" / "processed"
processed_dir.mkdir(parents=True, exist_ok=True)

output_path = processed_dir / "panel_FE_regression.csv"

reg_df.to_csv(output_path, index=False)

print(f"FE regression dataset saved to: {output_path}")

In [None]:
import statsmodels.formula.api as smf

# ----- Country + Year Fixed-Effects Regression -----

# Prepare regression dataset
fe_df = reg_df[
    ["country_code", "Year", "gdp_growth", "ENV_index", "SOC_index", "GOV_index"]
].dropna()

# FE regression
country_year_fe_model = smf.ols(
    formula="""
        gdp_growth ~ ENV_index + SOC_index + GOV_index
        + C(country_code)
        + C(Year)
    """,
    data=fe_df
).fit(
    cov_type="cluster",
    cov_kwds={"groups": fe_df["country_code"]}
)

print(country_year_fe_model.summary())

In [None]:
# Save regression table to LaTeX
latex_table = country_year_fe_model.summary().as_latex()

output_path = "sustainability-economic-performance/results/regression/fixed_effects_regression.tex"

with open(output_path, "w") as f:
    f.write(latex_table)

print(f"Regression table saved to {output_path}")