<a href="https://colab.research.google.com/github/DankoFox/ESG/blob/main/ESG_on_performance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

1. IMPORT DATA

In [None]:
data = {
    "Variable": [
        "ESG scores", "E scores", "S scores", "G scores",
        "ROA", "ROE", "Profit margins", "Z index", "NPL/TA", "LLP/TA",
        "Liquidity coverage ratio", "Net stable funding ratio",
        "Capital Adequacy Ratio", "Leverage Ratio"
    ],
    2014: [91, 92, 86, 99, 0.0094, 0.1478, 0.2463, 45.0, 0.0194, 0.0013, 111, 100, 0.1206, 0.0441],
    2015: [87, 81, 85, 99, 0.0084, 0.1309, 0.2453, 44.7, 0.0175, 0.0013, 122, 100, 0.1347, 0.0490],
    2016: [90, 84, 90, 99, 0.0062, 0.0988, 0.1907, 42.7, 0.0183, 0.0021, 125, 105, 0.1421, 0.0510],
    2017: [89, 85, 90, 98, 0.0071, 0.1086, 0.2201, 44.8, 0.0193, 0.0013, 135, 110, 0.1476, 0.0550],
    2018: [88, 84, 87, 99, 0.0068, 0.1079, 0.2110, 42.8, 0.0184, 0.0007, 138, 115, 0.1493, 0.0530],
    2019: [87, 66, 88, 95, 0.0061, 0.0979, 0.1914, 41.8, 0.0126, 0.0008, 140, 116, 0.1502, 0.0530],
    2020: [86, 64, 88, 92, 0.0034, 0.0584, 0.1465, 38.3, 0.0067, 0.0026, 139, 124, 0.1671, 0.0540],
    2021: [86, 67, 86, 93, 0.0063, 0.0967, 0.3154, 43.9, 0.0054, -0.0006, 137, 124, 0.1778, 0.0510],
    2022: [84, 67, 83, 92, 0.0066, 0.1080, 0.3016, 41.3, 0.0041, -0.0002, 131, 119, 0.1794, 0.0530],
    2023: [86, 66, 87, 94, 0.0064, 0.1022, 0.1423, 42.6, 0.0000, 0.0002, 130, 116, 0.1794, 0.0510],
    2024: [87, 88, 84, 89, 0.0053, 0.0936, 0.1078, 38.2, 0.0055, 0.0003, 133, 116, 0.1945, 0.0440]
}

df_raw = pd.DataFrame(data)
df = df_raw.set_index("Variable").T
df.head()

Variable,ESG scores,E scores,S scores,G scores,ROA,ROE,Profit margins,Z index,NPL/TA,LLP/TA,Liquidity coverage ratio,Net stable funding ratio,Capital Adequacy Ratio,Leverage Ratio
2014,91.0,92.0,86.0,99.0,0.0094,0.1478,0.2463,45.0,0.0194,0.0013,111.0,100.0,0.1206,0.0441
2015,87.0,81.0,85.0,99.0,0.0084,0.1309,0.2453,44.7,0.0175,0.0013,122.0,100.0,0.1347,0.049
2016,90.0,84.0,90.0,99.0,0.0062,0.0988,0.1907,42.7,0.0183,0.0021,125.0,105.0,0.1421,0.051
2017,89.0,85.0,90.0,98.0,0.0071,0.1086,0.2201,44.8,0.0193,0.0013,135.0,110.0,0.1476,0.055
2018,88.0,84.0,87.0,99.0,0.0068,0.1079,0.211,42.8,0.0184,0.0007,138.0,115.0,0.1493,0.053


*3*. Create Lagged Variables
To check for delayed effects (e.g., whether ESG scores from year t-1 affect ROA in year t), we need to lag the independent variables.

- ESG scores → current year → short-term impact
- ESG_Lag1 → 1 year lag → short to mid-term impact
- ESG_Lag2 → 2 years lag → mid-term impact
- ESG_Lag3 → 3 years lag → long-term impact

In [None]:
# Step 1: Start from the original raw DataFrame
df_raw = df.copy()  # Keep unlagged version

# Step 2: ESG lagged model base
df_esg = df_raw.copy()
df_esg["ESG_Lag1"] = df_esg["ESG scores"].shift(1)
df_esg["ESG_Lag2"] = df_esg["ESG scores"].shift(2)
df_esg["ESG_Lag3"] = df_esg["ESG scores"].shift(3)
df_esg = df_esg.dropna()

# Step 3: E-only model
df_E = df_raw.copy()
df_E["E_Lag1"] = df_E["E scores"].shift(1)
df_E["E_Lag2"] = df_E["E scores"].shift(2)
df_E["E_Lag3"] = df_E["E scores"].shift(3)
df_E = df_E.dropna()

# Step 4: S-only model
df_S = df_raw.copy()
df_S["S_Lag1"] = df_S["S scores"].shift(1)
df_S["S_Lag2"] = df_S["S scores"].shift(2)
df_S["S_Lag3"] = df_S["S scores"].shift(3)
df_S = df_S.dropna()

# Step 5: G-only model
df_G = df_raw.copy()
df_G["G_Lag1"] = df_G["G scores"].shift(1)
df_G["G_Lag2"] = df_G["G scores"].shift(2)
df_G["G_Lag3"] = df_G["G scores"].shift(3)
df_G = df_G.dropna()


In [None]:
df_esg

Variable,ESG scores,E scores,S scores,G scores,ROA,ROE,Profit margins,Z index,NPL/TA,LLP/TA,Liquidity coverage ratio,Net stable funding ratio,Capital Adequacy Ratio,Leverage Ratio,ESG_Lag1,ESG_Lag2,ESG_Lag3
2017,89.0,85.0,90.0,98.0,0.0071,0.1086,0.2201,44.8,0.0193,0.0013,135.0,110.0,0.1476,0.055,90.0,87.0,91.0
2018,88.0,84.0,87.0,99.0,0.0068,0.1079,0.211,42.8,0.0184,0.0007,138.0,115.0,0.1493,0.053,89.0,90.0,87.0
2019,87.0,66.0,88.0,95.0,0.0061,0.0979,0.1914,41.8,0.0126,0.0008,140.0,116.0,0.1502,0.053,88.0,89.0,90.0
2020,86.0,64.0,88.0,92.0,0.0034,0.0584,0.1465,38.3,0.0067,0.0026,139.0,124.0,0.1671,0.054,87.0,88.0,89.0
2021,86.0,67.0,86.0,93.0,0.0063,0.0967,0.3154,43.9,0.0054,-0.0006,137.0,124.0,0.1778,0.051,86.0,87.0,88.0
2022,84.0,67.0,83.0,92.0,0.0066,0.108,0.3016,41.3,0.0041,-0.0002,131.0,119.0,0.1794,0.053,86.0,86.0,87.0
2023,86.0,66.0,87.0,94.0,0.0064,0.1022,0.1423,42.6,0.0,0.0002,130.0,116.0,0.1794,0.051,84.0,86.0,86.0
2024,87.0,88.0,84.0,89.0,0.0053,0.0936,0.1078,38.2,0.0055,0.0003,133.0,116.0,0.1945,0.044,86.0,84.0,86.0


In [None]:
import statsmodels.api as sm

def run_regression(y_var, x_vars, data):
    X = data[x_vars]
    X = sm.add_constant(X)  # adds intercept
    y = data[y_var]
    model = sm.OLS(y, X).fit()
    return model


### ESG chung

In [None]:
regression_targets = [
    "ROA",
    "ROE",
    "Profit margins",
    "Z index",
    "NPL/TA",
    "LLP/TA",
    "Liquidity coverage ratio",
    "Net stable funding ratio",
    "Capital Adequacy Ratio",
    "Leverage Ratio"
]

for target in regression_targets:
    model = run_regression(target, ["ESG scores", "ESG_Lag1", "ESG_Lag2", "ESG_Lag3"], df_esg)
    print(f"\n--- {target} Regression ---")
    print(model.summary())


--- ROA Regression ---
                            OLS Regression Results                            
Dep. Variable:                    ROA   R-squared:                       0.113
Model:                            OLS   Adj. R-squared:                 -1.070
Method:                 Least Squares   F-statistic:                   0.09531
Date:                Thu, 05 Jun 2025   Prob (F-statistic):              0.977
Time:                        16:47:51   Log-Likelihood:                 43.610
No. Observations:                   8   AIC:                            -77.22
Df Residuals:                       3   BIC:                            -76.82
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0025      0

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


### E - Individual

In [None]:
for target in regression_targets:
    model = run_regression(target, ["E scores", "E_Lag1", "E_Lag2", "E_Lag3"], df_E)
    print(f"\n--- {target} Regression (Environmental - E) ---")
    print(model.summary())


--- ROA Regression (Environmental - E) ---
                            OLS Regression Results                            
Dep. Variable:                    ROA   R-squared:                       0.953
Model:                            OLS   Adj. R-squared:                  0.890
Method:                 Least Squares   F-statistic:                     15.13
Date:                Thu, 05 Jun 2025   Prob (F-statistic):             0.0249
Time:                        16:49:25   Log-Likelihood:                 55.342
No. Observations:                   8   AIC:                            -100.7
Df Residuals:                       3   BIC:                            -100.3
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const   

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


### S - Individual

In [None]:
for target in regression_targets:
    model = run_regression(target, ["S scores", "S_Lag1", "S_Lag2", "S_Lag3"], df_S)
    print(f"\n--- {target} Regression (Social - S) ---")
    print(model.summary())


--- ROA Regression (Social - S) ---
                            OLS Regression Results                            
Dep. Variable:                    ROA   R-squared:                       0.601
Model:                            OLS   Adj. R-squared:                  0.069
Method:                 Least Squares   F-statistic:                     1.130
Date:                Thu, 05 Jun 2025   Prob (F-statistic):              0.479
Time:                        16:49:33   Log-Likelihood:                 46.808
No. Observations:                   8   AIC:                            -83.62
Df Residuals:                       3   BIC:                            -83.22
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


### G - Individual

In [None]:
for target in regression_targets:
    model = run_regression(target, ["G scores", "G_Lag1", "G_Lag2", "G_Lag3"], df_G)
    print(f"\n--- {target} Regression (Governance - G) ---")
    print(model.summary())


--- ROA Regression (Governance - G) ---
                            OLS Regression Results                            
Dep. Variable:                    ROA   R-squared:                       0.972
Model:                            OLS   Adj. R-squared:                  0.935
Method:                 Least Squares   F-statistic:                     26.37
Date:                Thu, 05 Jun 2025   Prob (F-statistic):             0.0113
Time:                        16:51:02   Log-Likelihood:                 57.484
No. Observations:                   8   AIC:                            -105.0
Df Residuals:                       3   BIC:                            -104.6
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)
