In [None]:
#Import and preprocess data
!pip install linearmodels

Collecting linearmodels
  Downloading linearmodels-6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.0.2-py3-none-any.whl.metadata (6.8 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmodels)
  Downloading setuptools_scm-8.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Downloading linearmodels-6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import numpy as np

In [None]:
dataset = pd.read_excel('/content/Pannel_data.xlsx',
                        usecols=['Banks', 'Year', 'ROA(Y)%', 'CAR(X1)%', 'NPALR(X2)%'],
                        index_col=[0, 1])


In [None]:
dataset.head(20)

Unnamed: 0,Banks,Year,ROA(Y)%,CAR(X1)%,NPALR(X2)%
0,Bank of Baroda,2017,0.2,12.24,4.72
1,Bank of Baroda,2018,-0.34,12.13,5.49
2,Bank of Baroda,2019,0.06,13.42,3.33
3,Bank of India,2017,-0.24,12.14,6.9
4,Bank of India,2018,-0.91,12.94,8.28
5,Bank of India,2019,-0.84,14.19,5.61
6,Bank of Maharashtra,2017,-0.86,11.18,11.76
7,Bank of Maharashtra,2018,-0.73,11.01,11.24
8,Bank of Maharashtra,2019,-3.01,11.859,5.52
9,Canara Bank,2017,0.2,12.86,6.33


In [None]:
#PooledOLS
from linearmodels import PooledOLS
import statsmodels.api as sm
exog = sm.tools.tools.add_constant(dataset[['CAR(X1)%', 'NPALR(X2)%']])
endog = dataset['ROA(Y)%']
mod = PooledOLS(endog, exog)
pooledOLS_res = mod.fit(cov_type='clustered', cluster_entity=True)
print(pooledOLS_res)

                          PooledOLS Estimation Summary                          
Dep. Variable:                ROA(Y)%   R-squared:                        0.7284
Estimator:                  PooledOLS   R-squared (Between):              0.8933
No. Observations:                  99   R-squared (Within):              -0.3109
Date:                Mon, Sep 30 2024   R-squared (Overall):              0.7284
Time:                        10:17:01   Log-likelihood                   -109.29
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      128.72
Entities:                          33   P-value                           0.0000
Avg Obs:                       3.0000   Distribution:                    F(2,96)
Min Obs:                       3.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             125.07
                            

In [None]:
# FE and RE model
from linearmodels import PanelOLS
from linearmodels import RandomEffects
exog = sm.tools.tools.add_constant(dataset[['CAR(X1)%', 'NPALR(X2)%']])
endog = dataset['ROA(Y)%']

# Fixed effects model
model_fe = PanelOLS(endog, exog, entity_effects = True)
fe_res = model_fe.fit()

# Random effects model
model_re = RandomEffects(endog, exog)
re_res = model_re.fit()

#print results
print(fe_res)
print(re_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                ROA(Y)%   R-squared:                        0.0050
Estimator:                   PanelOLS   R-squared (Between):              0.1228
No. Observations:                  99   R-squared (Within):               0.0050
Date:                Mon, Sep 30 2024   R-squared (Overall):              0.1067
Time:                        10:22:33   Log-likelihood                   -75.147
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      0.1612
Entities:                          33   P-value                           0.8515
Avg Obs:                       3.0000   Distribution:                    F(2,64)
Min Obs:                       3.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             0.1612
                            

In [None]:
#hausman test (RE Vs FE)
def hausman_test():
    print('hausman test result')
    a=(fe_res.params-re_res.params).transpose()
    b=np.linalg.inv(fe_res.cov-re_res.cov)
    k=np.matmul(a,b)
    k=np.matmul(k,a.transpose())
    print(k)
    from scipy.stats import chi2
    dof=np.linalg.matrix_rank(fe_res.cov-re_res.cov)
    t=chi2.ppf([0.99,0.95,0.90],dof)
    print(t)
    if k<t.any():
        print('Null hypothesis is rejected, FE model is Preferred' )
    else:
        print('Failed to reject the null hypothesis, RE model is preferred')

In [None]:
hausman_test()

hausman test result
22.161410922177488
[11.34486673  7.8147279   6.25138863]
Failed to reject the null hypothesis, RE model is preferred


In [None]:
#Pooled Vs RE
#Pooled Vs RE
#Store values for checking homoskedasticity graphically
fittedvals_pooled_OLS = pooledOLS_res.predict().fitted_values
residuals_pooled_OLS = pooledOLS_res.resids
from statsmodels.stats.diagnostic import het_white, het_breuschpagan
pooled_OLS_dataset = pd.concat([dataset, residuals_pooled_OLS], axis=1)
exog = sm.tools.tools.add_constant(dataset[['CAR(X1)%', 'NPALR(X2)%']])

#2-Breusch-Pagan-Test
breusch_pagan_test_results = het_breuschpagan(pooled_OLS_dataset['residual'], exog)
labels = ['BP_LM-Stat', 'BP_LM p-val', 'BP_F-Stat', 'BP_F p-val']
print(dict(zip(labels, breusch_pagan_test_results)))

{'BP_LM-Stat': 0.5545569794658272, 'BP_LM p-val': 0.7578434161093842, 'BP_F-Stat': 0.2703907280787766, 'BP_F p-val': 0.7636604684262427}
