In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy as sp
import linearmodels as lm
from sklearn.preprocessing import PolynomialFeatures

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/Alalalalaki/UT_IO/master/data/d3.csv')

In [3]:
df

Unnamed: 0,key_0,idx,year,logY,1,logE,logK,logM,logE^2,logE logK,...,logE-1,logK-1,logM-1,logE^2-1,logE logK-1,logE logM-1,logK^2-1,logK logM-1,logM^2-1,Fy-1
0,0,18.00,1956.00,16.53,1.00,8.27,16.01,15.74,68.35,132.39,...,,,,,,,,,,
1,1,18.00,1957.00,16.70,1.00,8.33,16.33,15.84,69.35,135.98,...,8.27,16.01,15.74,68.35,132.39,130.16,256.42,252.09,247.84,17.16
2,2,18.00,1958.00,16.82,1.00,8.34,16.43,15.92,69.61,137.08,...,8.33,16.33,15.84,69.35,135.98,131.91,266.61,258.64,250.91,17.34
3,3,18.00,1959.00,16.99,1.00,8.36,16.26,16.02,69.95,135.99,...,8.34,16.43,15.92,69.61,137.08,132.85,269.92,261.59,253.51,17.41
4,4,18.00,1960.00,17.16,1.00,8.72,16.36,16.41,76.04,142.70,...,8.36,16.26,16.02,69.95,135.99,133.97,264.36,260.43,256.55,17.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5372,5372,44683.00,2005.00,14.96,1.00,5.04,14.39,13.90,25.37,72.49,...,,,,,,,,,,
5373,5373,44683.00,2006.00,14.97,1.00,4.98,14.28,13.74,24.77,71.09,...,5.04,14.39,13.90,25.37,72.49,70.02,207.13,200.06,193.23,14.92
5374,5374,44683.00,2007.00,14.92,1.00,4.98,14.25,13.24,24.84,70.99,...,4.98,14.28,13.74,24.77,71.09,68.36,204.03,196.20,188.67,14.80
5375,5375,44683.00,2008.00,14.92,1.00,4.96,14.28,13.49,24.63,70.88,...,4.98,14.25,13.24,24.84,70.99,66.01,202.94,188.68,175.42,14.58


**LP**

In [4]:
def dLP(xL,xK,xM,d=df,TE=True):
    X = d[[xK,xM]]
    poly = PolynomialFeatures(degree=3).fit(X)
    XX = poly.transform(X) 
    pm = poly.get_feature_names(X.columns)
    d = pd.DataFrame(np.column_stack((d.idx, d.year,d.logY,d[xL],XX)))
    d.columns = ['idx','year','logY','logL'] + pm
    d = d.set_index(['idx','year'])
    ols1s = lm.PanelOLS(d.logY,d.iloc[:,1:],time_effects=TE).fit()  # I find if using TE or not affects a lot!!  
    d['error'] = ols1s.resids
    param = ols1s.params
    bl = param[0]
    d['Fy'] = d.logY - d.logL*bl - d.error
    d = d.reset_index()
    lag = d.groupby('idx')[['logL'] + pm[1:] + ['Fy']].shift(1)
    d = d.merge(lag, on=d.index ,suffixes=('', '-1'))
    pm0 = [ x + '-1' for x in pm ][1:]
    return d,pm,pm0,bl

def gmm(beta,X0,X1,Fy0,Fy1,Z,e):
    w0 = Fy0 - X0 @ beta
    w1 = Fy1 - X1 @ beta
    Xw = sm.add_constant(np.column_stack((w0, w0**2)))
    Xi = smf.OLS(w1,Xw).fit().resid
    # Xi = Xi + e               # altertive: the results however would change a lot
    fy = np.linalg.inv(Z.T @ Z)
    m = Xi.T @ Z @ fy @ Z.T @ Xi
    return m 

def LPCD(i,d=df,xL='logE',xK='logK',xM='logM',zL='logE-1',zK='logK',zM='logM-1',x0=(0.2,0.2),disp=False,):
    d,pm,pm0,bl = dLP(xL=xL,xK=xK,xM=xM,d=d,TE=False) 
    dd = d.dropna().copy()
    X1 = dd[pm[1:3]] 
    X0 = dd[pm0[0:2]]  
    Fy1 = dd['Fy']
    Fy0 = dd['Fy-1']
    Z = dd[[zK,zM]]
    e = dd.error
    res = sp.optimize.minimize(gmm,args=(X0,X1,Fy0,Fy1,Z,e), method='Nelder-Mead', tol=1e-7,  x0 = x0
                    , options={'maxiter':1e5,'xatol':1e-7,'fatol':1e-7,'disp': disp} )
    beta = res.x
    bk = beta[0]
    bm = beta[1]
    rs = bl + bk + bm
    w1 = Fy1 - X1 @ beta
    n0 = dd.shape[0]
    print(f'n:{n0},  βL: {bl}, βK: {bk}, βM: {bm}, rs: {rs}, ω: {w1.mean()}')
    coff = pd.DataFrame([bl,bk,bm,rs,w1]).T
    coff.columns = ['βL','βK','βM','rs','ω']
    return coff

In [5]:
res = LPCD(df)

n:5233,  βL: 0.2521971908043707, βK: 0.5318918859420678, βM: -0.3474066701601457, rs: 0.43668240658629287, ω: 12.000313961904412


**ACF (value-added)**

In [6]:
def dACF(xL,xK,xM,d=df,TE=True):
    X = d[[xL,xK,xM]]
    poly = PolynomialFeatures(degree=3).fit(X)
    XX = poly.transform(X) 
    pm = poly.get_feature_names(X.columns)
    d['Y'] = d.logY - d[xM]  # value added
    d = pd.DataFrame(np.column_stack((d.idx,d.year,d.Y,XX)))
    d.columns = ['idx','year','logY'] + pm
    d = d.set_index(['idx','year'])
    ols1s = lm.PanelOLS(d.logY,d.iloc[:,1:],time_effects=TE).fit()  # I find if using TE or not affects a lot!!  
    d['error'] = ols1s.resids
    param = ols1s.params
    d['Fy'] = d.logY - d.error
    d = d.reset_index()
    lag = d.groupby('idx')[pm[1:] + ['Fy']].shift(1)
    d = d.merge(lag, on=d.index ,suffixes=('', '-1'))
    pm0 = [ x + '-1' for x in pm ][1:]
    return d,pm,pm0

def ACFCD(d=df,xL='logE',xK='logK',xM='logM',zL='logE-1',zK='logK',zM='logM-1',x0=(0.2,0.2),disp=False,):
    d,pm,pm0 = dACF(xL=xL,xK=xK,xM=xM,d=d,TE=False) 
    dd = d.dropna().copy()
    X1 = dd[pm[1:3]] 
    X0 = dd[pm0[0:2]]  
    Fy1 = dd['Fy']
    Fy0 = dd['Fy-1']
    Z = dd[[zL,zK]]
    e = dd.error
    res = sp.optimize.minimize(gmm,args=(X0,X1,Fy0,Fy1,Z,e), method='Nelder-Mead', tol=1e-7,  x0 = x0
                    , options={'maxiter':1e5,'xatol':1e-7,'fatol':1e-7,'disp': disp} )
    beta = res.x
    bl = beta[0]
    bk = beta[1]
    rs = bl + bk 
    w1 = Fy1 - X1 @ beta
    n0 = dd.shape[0]
    print(f'n:{n0},  βL: {bl}, βK: {bk}, rs: {rs}, ω: {w1.mean()}')
    coff = pd.DataFrame([bl,bk,rs,w1]).T
    coff.columns = ['βL','βK','rs','ω']
    return coff

In [7]:
res = ACFCD(df)

n:5233,  βL: -0.14667677969691184, βK: 0.42341490207710314, rs: 0.2767381223801913, ω: -4.5748230783948935
