In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
from statsmodels.stats.diagnostic import het_breuschpagan

df = pd.read_excel('DATA.xlsx')
df = df.rename(columns = {'Field of Technology':'FT','Control de la Corrupción': 'CC','Calidad Regulatoria':'RQ','Estado de Derecho':'LR','Riesgo de Expropiación':'ER'})
df['Dummy'] = df['LegalOrg']
df = pd.get_dummies(df, columns=['Dummy'], drop_first=True, prefix='')
df = df.rename(columns = {'_French':'French','_German':'German','_Scandinavian':'Scandinavian'})
df = df[df['Patents'] > 0]

df.head()

Unnamed: 0,Country,Code,Year,FT,Patents,Chemistry,Electrical engineering,Instruments,Mechanical engineering,Other fields,...,CapitalHumano,CC,RQ,LR,ER,Porcent,LegalOrg,French,German,Scandinavian
0,Austria,AUT,2014,Chemistry,205,1,0,0,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
1,Austria,AUT,2014,Electrical engineering,135,0,1,0,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
2,Austria,AUT,2014,Instruments,111,0,0,1,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
3,Austria,AUT,2014,Mechanical engineering,308,0,0,0,1,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
4,Austria,AUT,2014,Other fields,132,0,0,0,0,1,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0


In [2]:
dfreg2 = df[df['Total'] == 0]
dfreg2.head()

Unnamed: 0,Country,Code,Year,FT,Patents,Chemistry,Electrical engineering,Instruments,Mechanical engineering,Other fields,...,CapitalHumano,CC,RQ,LR,ER,Porcent,LegalOrg,French,German,Scandinavian
0,Austria,AUT,2014,Chemistry,205,1,0,0,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
1,Austria,AUT,2014,Electrical engineering,135,0,1,0,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
2,Austria,AUT,2014,Instruments,111,0,0,1,0,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
3,Austria,AUT,2014,Mechanical engineering,308,0,0,0,1,0,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0
4,Austria,AUT,2014,Other fields,132,0,0,0,0,1,...,5010.507324,7.933004,7.96434,8.876872,8.475253,1.662861,German,0,1,0


In [3]:
from linearmodels.iv import IVGMM

gmm_modelCC = IVGMM.from_formula('np.log(Patents) ~ 1 + np.log(ArtCientificos) + np.log(CapitalHumano)  + Porcent + CC*FT ', data=dfreg2)
gmm_resultsCC = gmm_modelCC.fit()

gmm_modelLR = IVGMM.from_formula('np.log(Patents) ~ 1 + np.log(ArtCientificos) + np.log(CapitalHumano) + Porcent + LR*FT  ', data=dfreg2)
gmm_resultsLR = gmm_modelLR.fit()

gmm_modelRQ = IVGMM.from_formula('np.log(Patents) ~ 1 + np.log(ArtCientificos) + np.log(CapitalHumano) + Porcent + RQ*FT  ', data=dfreg2)
gmm_resultsRQ = gmm_modelRQ.fit()

gmm_modelER = IVGMM.from_formula('np.log(Patents) ~ 1 + np.log(ArtCientificos) + np.log(CapitalHumano) + Porcent + ER*FT  ', data=dfreg2)
gmm_resultsER = gmm_modelER.fit()

In [4]:
from linearmodels.iv.results import compare
table = {'(FT): CC': gmm_resultsCC,'(FT): RQ': gmm_resultsLR,'(FT): LR': gmm_resultsRQ,'(FT): ER': gmm_resultsER}
tabla = compare(table,stars= True)
tabla_df = pd.read_html(tabla.summary.as_html(), header=0, index_col=0)[0]
tabla_df.to_excel('ResultadosRegresiones\Resultados3.xlsx')

In [5]:

pd.set_option('display.max_rows', None)
tabla_df

Unnamed: 0,(FT): CC,(FT): RQ,(FT): LR,(FT): ER
Dep. Variable,np.log(Patents),np.log(Patents),np.log(Patents),np.log(Patents)
Estimator,IV-GMM,IV-GMM,IV-GMM,IV-GMM
No. Observations,1212,1212,1212,1212
Cov. Est.,robust,robust,robust,robust
R-squared,0.8135,0.8063,0.7996,0.7889
Adj. R-squared,0.8116,0.8043,0.7976,0.7868
F-statistic,6356.0,6340.7,5969.4,5040.0
P-value (F-stat),0.0000,0.0000,0.0000,0.0000
=================================,=================,=================,=================,=================
Intercept,-12.892***,-13.898***,-16.295***,-16.030***


In [6]:
exog_vars1 = gmm_modelCC.exog.ndarray
exog_vars2 = gmm_modelRQ.exog.ndarray
exog_vars3 = gmm_modelLR.exog.ndarray
exog_vars4 = gmm_modelER.exog.ndarray
resid1 = gmm_resultsCC.resids.reset_index(drop=True)
resid2 = gmm_resultsRQ.resids.reset_index(drop=True)
resid3 = gmm_resultsLR.resids.reset_index(drop=True)
resid4 = gmm_resultsER.resids.reset_index(drop=True)

exo_vars = [exog_vars1,exog_vars2,exog_vars3,exog_vars4]
resid = [resid1,resid2,resid3,resid4]

for i in range(len(resid)):
    bp_test_statistic, bp_p_value, _, _ = het_breuschpagan(resid[i], exo_vars[i])
    
    # Mostrar resultados del test de heterocedasticidad
    print(round(bp_test_statistic,4),round(bp_p_value,4))

100.0274 0.0
154.6972 0.0
123.5017 0.0
169.9081 0.0
