In [1]:
# ------------------------------
# 1. Import datasets
# ------------------------------
import pandas as pd
esg = pd.read_csv("esg_metrics.csv", encoding="latin1")
thresholds = pd.read_csv("thresholds_ESG.csv", encoding="latin1")

# Normalize column names
esg.columns = esg.columns.str.strip().str.lower().str.replace(" ", "_")
thresholds.columns = thresholds.columns.str.strip().str.lower().str.replace(" ", "_")

In [2]:
# ------------------------------
# 2. Join esg_metrics with thresholds by sector
# ------------------------------
df = esg.merge(thresholds, on="sector", how="left", suffixes=("", "_threshold"))

In [3]:
# ------------------------------
# 3. Metrics list to evaluate
# ------------------------------
metrics = ["co2_emissions", "energy_consumption_mwh", "%_renewable_energy", "waste_generated_tons", "water_withdrawal_m3"]

# ------------------------------
# 4. Compare metrics with their thresholds
# ------------------------------
metrics_rules = {
    "co2_emissions": "lower",
    "water_withdrawal_m3": "lower",
    "waste_generated_tons": "lower",
    "energy_consumption_mwh": "lower",
    "%_renewable_energy": "higher"
}

for m, rule in metrics_rules.items():
    threshold_col = f"{m}_threshold"
    
    # Create compliance column (True/False)
    if rule == "higher":
        df[f"{m}_compliance"] = (df[m] >= df[threshold_col]).astype(int)
    else:
        df[f"{m}_compliance"] = (df[m] <= df[threshold_col]).astype(int)

In [4]:
# ------------------------------
# 5. Calculate score & global state
# ------------------------------
df["compliance_score"] = df[[f"{m}_compliance" for m in metrics]].mean(axis=1)

def classify(score):
    if score == 1:
        return "Fully compliant"
    elif score == 0:
        return "Non compliant"
    else:
        return "Partially compliant"

df["compliance_status"] = df["compliance_score"].apply(classify)

In [5]:
df = df.iloc[:, [1, 2, 7, 6, 8, 13, 18, 9, 14, 21, 10, 15, 22, 11, 16, 20, 12, 17, 19, 23, 24]]
df.head()

Unnamed: 0,company_id,name,sector,country,co2_emissions,co2_emissions_threshold,co2_emissions_compliance,energy_consumption_mwh,energy_consumption_mwh_threshold,energy_consumption_mwh_compliance,...,%_renewable_energy_threshold,%_renewable_energy_compliance,waste_generated_tons,waste_generated_tons_threshold,waste_generated_tons_compliance,water_withdrawal_m3,water_withdrawal_m3_threshold,water_withdrawal_m3_compliance,compliance_score,compliance_status
0,1,SAP,tech,Germany,40887.07,50000,1,106777.47,120000,1,...,20,0,4900.01,5000,1,131487.56,100000,0,0.6,Partially compliant
1,2,ASML,tech,Netherlands,35312.12,50000,1,101772.33,120000,1,...,20,0,5824.95,5000,0,80305.08,100000,1,0.6,Partially compliant
2,3,LVMH,retail,France,77199.17,70000,0,121700.68,150000,1,...,25,0,11680.81,10000,0,199909.73,200000,1,0.4,Partially compliant
3,4,Prosus,financial,Netherlands,19647.29,20000,1,72151.85,60000,0,...,20,1,1925.51,2000,1,49818.82,50000,1,0.8,Partially compliant
4,5,HermÃ¯Â¿Â½s,retail,France,65378.71,70000,1,127047.35,150000,1,...,25,0,8580.4,10000,1,216997.01,200000,0,0.6,Partially compliant


In [6]:
# ------------------------------
# 6. Export results
# ------------------------------
df.to_csv("compliance.csv", index=False, encoding="utf-8")

print("✅ Dataset compliance.csv generated correctly")

✅ Dataset compliance.csv generated correctly
