# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [345]:
import numpy as np
from numpy import linalg as la
import pandas as pd
from io import StringIO
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns

# Import this weeks LinearModels .py file
import LinearModelsWeek3 as lm
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [346]:
dat = pd.read_csv('firms.csv')

# Descriptives

In [347]:
#dat.describe()

In [348]:
#dat[['lcap','lemp','ldsa']].hist();

In [349]:
#sns.scatterplot(x='lemp', y='ldsa', data=dat); 

# Converting data to numpy format 

In [350]:
#Keep only 3 years 

dat = dat[(dat['year'] >= 1968) & (dat['year'] <= 1970)]
dat=dat.reset_index(drop=True)
dat

Unnamed: 0,firmid,year,lcap,lemp,ldsa
0,1,1968,0.998602,-0.242185,0.349053
1,1,1969,0.925214,-0.241278,0.312492
2,1,1970,0.879616,-0.265134,0.347566
3,2,1968,-0.069588,-0.323021,-0.945831
4,2,1969,-0.056724,-0.358177,-1.143830
...,...,...,...,...,...
1318,440,1969,-0.228757,0.031242,-0.246864
1319,440,1970,-0.038354,0.062158,-0.345710
1320,441,1968,-1.618390,-1.944210,-2.032340
1321,441,1969,-1.635030,-1.856580,-2.011210


In [351]:
dat.ldsa.values.shape

(1323,)

In [352]:
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=3


Extract data from `pandas` to `numpy` arrays. 

In [353]:
y = dat.ldsa.values.reshape((N*T,1))

ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
x = np.hstack([l, k])

In [354]:
#Laver labels
label_y = 'log Deflated sales'
label_x = [
    'log Employment', 
    'log Adjusted Capital Stock',  
]

In [355]:
############################ FE-estimation ###################################
###############################################################################

In [356]:
def remove_zero_columns(x, label_x):
    """
    The function removes columns from a matrix that are all zeros and returns the updated matrix and
    corresponding labels.
    
    Args:
      x: The parameter `x` is a numpy array representing a matrix with columns that may contain zeros.
      label_x: The parameter `label_x` is a list that contains the labels for each column in the input
    array `x`.
    
    Returns:
      x_nonzero: numpy array of x with columns that are all zeros removed.
      label_nonzero: list of labels for each column in x_nonzero.
    """
    
    # Find the columns that are not all zeros
    nonzero_cols = ~np.all(x == 0, axis=0)
    
    # Remove the columns that are all zeros
    x_nonzero = x[:, nonzero_cols]
    
    # Get the labels for the columns that are not all zeros
    label_nonzero = [label_x[i] for i in range(len(label_x)) if nonzero_cols[i]]
    return x_nonzero, label_nonzero

In [357]:
# Transform the data
# Create transformation matrix
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T, (T, T))
    return Q_T
# Print the matrix
Q_T = demeaning_matrix(T)
print(f'Demeaning matrix for T={T} \n', Q_T)

y_dot = lm.perm(Q_T, y)
x_dot = lm.perm(Q_T, x)

# Remove the columns that are only zeroes
x_dot, label_x_dot = remove_zero_columns(x_dot, label_x)

# Estimate 
fe_result = lm.estimate(y_dot, x_dot, transform='fe', T=T)
lm.print_table((label_y, label_x_dot), fe_result, title="Fixed Effects", floatfmt='.4f')

Demeaning matrix for T=3 
 [[ 0.66666667 -0.33333333 -0.33333333]
 [-0.33333333  0.66666667 -0.33333333]
 [-0.33333333 -0.33333333  0.66666667]]
Fixed Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6004  0.0346     17.3515
log Adjusted Capital Stock  0.0502  0.0382      1.3143
R² = 0.284
σ² = 0.008


In [384]:
b_hat_shape=fe_result['b_hat'].reshape(-1,1)

fe_u=y_dot-x_dot@b_hat_shape

x_dot_T = x_dot.T
sum_part=x_dot_T@fe_u@fe_u.T@x_dot

robust_var_re=np.linalg.inv(x_dot_T@x_dot)@sum_part@np.linalg.inv(x_dot_T@x_dot)

robust_var_re

array([[ 1.02740155e-31, -3.47762213e-32],
       [-3.96574081e-32,  1.37052588e-32]])

In [359]:
############################ FD-estimation ###################################
###############################################################################

In [360]:
# Transform the data
# Create transformation matrix
def fd_matrix(T):
    D_T = np.eye(T) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T

# Print the matrix
D_T = fd_matrix(T)
print(f'First differening matrix for T={T} \n', D_T)

y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# Remove the columns that are only zeroes
x_diff, label_x_diff = remove_zero_columns(x_diff, label_x)

# Estimate 
fd_result = lm.estimate(y_diff, x_diff, transform='fd', T=T-1) 
lm.print_table((label_y, label_x_diff), fd_result, title="First Difference", floatfmt='.4f')

First differening matrix for T=3 
 [[-1.  1.  0.]
 [ 0. -1.  1.]]
First Difference
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.5509  0.0365     15.0788
log Adjusted Capital Stock  0.0381  0.0432      0.8816
R² = 0.217
σ² = 0.013


In [361]:
############################ RE-estimation ###################################
###############################################################################

In [362]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)

print(P_T) 
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# Estimate 
be_result = lm.estimate(y_mean, x_mean, transform='be')
lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')


[[0.33333333 0.33333333 0.33333333]]
Between Estimator
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6856  0.0333     20.5565
log Adjusted Capital Stock  0.2778  0.0295      9.4042
R² = 0.921
σ² = 0.122


In [363]:
# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
sigma2_u = fe_result['sigma2']
sigma2_w = be_result['sigma2']
sigma2_c = sigma2_w - (1/T*sigma2_u)
_lambda = 1-np.sqrt(sigma2_u/(sigma2_u+T*sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {_lambda.item():.4f}.')

Lambda is approximately equal to 0.8509.


In [364]:
# Transform the data
C_T = np.identity(T)-_lambda*P_T
y_re = lm.perm(C_T, y)
x_re = lm.perm(C_T, x)

# Estimate 
re_result = lm.estimate(y_re, x_re, transform='re', T=T)
lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f')

Random Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6912  0.0235     29.4669
log Adjusted Capital Stock  0.2477  0.0214     11.6004
R² = 0.797
σ² = 0.008


In [365]:
####################  WALD TEST #########################
###### for derterming if the joined hyphotesis hold #####
#########################################################

In [366]:
R=np.array([1,1]).reshape(1,-1)
r=np.array([1]).reshape(1,-1)

In [367]:
varb_fe=fe_result['cov']
varb_fe.shape

b_hat_fe=fe_result['b_hat']
b_hat_fe.shape

varb_fd=fd_result['cov']
varb_fd.shape

b_hat_fd=fd_result['b_hat']
b_hat_fd.shape

varb_re=re_result['cov']
varb_re.shape

b_hat_re=re_result['b_hat']
b_hat_re.shape

(2, 1)

In [368]:
######### Implementing the Wald-test for the three estiamtors ##########

W_fe=(R@b_hat_fe-r).T@np.linalg.inv(R@varb_fe@(R.T))@(R@b_hat_fe-r)
W_fd=(R@b_hat_fd-r).T@np.linalg.inv(R@varb_fd@(R.T))@(R@b_hat_fd-r)
W_re=(R@b_hat_re-r).T@np.linalg.inv(R@varb_re@(R.T))@(R@b_hat_re-r)

print(W_fe, W_fd, W_re)

[[66.06939843]] [[66.06273481]] [[21.95818802]]


In [369]:
#################### TEST FOR FE.1 ######################
########### Are the delta(u_it)'s correlated? ##################
#########################################################

In [370]:
# Make function to calculate the serial correlation
def serial_corr(y, x, T):
    # Calculate the residuals
    b_hat = lm.est_ols(y, x)
    e = y - x@b_hat
    
    # Create a lag transformation matrix
    L_T = np.eye(T, k=-1)
    L_T = L_T[1:]

    # Lag residuals
    e_l = lm.perm(L_T, e)

    # Create a transformation matrix that removes the first observation of each individal
    I_T = np.eye(T, k=0)
    I_T = I_T[1:]
    
    # Remove first observation of each individual
    e = lm.perm(I_T, e)
    
    # Calculate the serial correlation
    return lm.estimate(e, e_l,T=T-1)

In [371]:
# Estimate serial correlation
corr_result = serial_corr(y_diff, x_diff, T-1)

# Print results
label_ye = 'OLS residual, e\u1d62\u209c'
label_e = ['e\u1d62\u209c\u208B\u2081']
lm.print_table(
    (label_ye, label_e), corr_result, 
    title='Serial Correlation', floatfmt='.4f'
)

Serial Correlation
Dependent variable: OLS residual, eᵢₜ

          Beta      Se    t-values
-----  -------  ------  ----------
eᵢₜ₋₁  -0.1849  0.0483     -3.8295
R² = 0.032
σ² = 0.013


In [372]:
#################### TEST FOR FE.3 ######################
############# strict exogeneity  ########################
#########################################################

In [373]:
######### First adding leaded variables of labour and capital to x ##########
print(x_dot.shape)
x_dot

(1323, 2)


array([[ 0.00734733,  0.0641244 ],
       [ 0.00825433, -0.0092634 ],
       [-0.01560167, -0.054861  ],
       ...,
       [-0.1643    , -0.04895333],
       [-0.07667   , -0.06559333],
       [ 0.24097   ,  0.11454667]])

In [374]:
# Lead labour and capital 
F_T = np.eye(T, k=1)
F_T = F_T[:-1]

labour_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))
capital_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Remove the last observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[:-1]

x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo = np.hstack((x_exo, labour_lead, capital_lead))

x_exo.shape

(882, 4)

In [375]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

xw_exo

array([[-0.0004535 ,  0.0366939 ,  0.011928  ,  0.0227988 ],
       [ 0.0004535 , -0.0366939 , -0.011928  , -0.0227988 ],
       [ 0.017578  , -0.0064321 ,  0.0241675 , -0.0120218 ],
       ...,
       [-0.01765535,  0.015343  ,  0.01545775,  0.0952014 ],
       [-0.043815  ,  0.00832   , -0.15882   , -0.09007   ],
       [ 0.043815  , -0.00832   ,  0.15882   ,  0.09007   ]])

In [376]:
# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, T=T - 1, transform='fe')

# Print results
label_exo = label_x + ['Labour lead'] + ['Capital lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.4599  0.0550      8.3615
log Adjusted Capital Stock  0.0580  0.0674      0.8617
Labour lead                 0.1552  0.0504      3.0768
Capital lead                0.0656  0.0702      0.9342
R² = 0.221
σ² = 0.006


In [377]:
######### Adding lagged variables of labour and capital to x ##########
L_T = np.eye(T, k=-1)
L_T = L_T[1:]

labour_lag = lm.perm(L_T, x[:, 0].reshape(-1, 1))
capital_lag = lm.perm(L_T, x[:, 1].reshape(-1, 1))

# Remove the first observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[1:]

x_exo2 = lm.perm(I_T, x)
y_exo2 = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo2 = np.hstack((x_exo2, labour_lag, capital_lag))

x_exo2

array([[-0.241278 ,  0.9252139, -0.242185 ,  0.9986017],
       [-0.265134 ,  0.8796163, -0.241278 ,  0.9252139],
       [-0.358177 , -0.0567239, -0.323021 , -0.0695881],
       ...,
       [ 0.0621576, -0.0383542,  0.0312421, -0.228757 ],
       [-1.85658  , -1.63503  , -1.94421  , -1.61839  ],
       [-1.53894  , -1.45489  , -1.85658  , -1.63503  ]])

In [378]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo2 = lm.perm(Q_T, y_exo2)
xw_exo2 = lm.perm(Q_T, x_exo2)

In [379]:
# Estimate model
exo_test = lm.estimate(yw_exo2, xw_exo2, T=T - 1, transform='fe')

# Print results
label_exo = label_x + ['Labour lag'] + ['Capital lag']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log Employment               0.5920  0.0522     11.3387
log Adjusted Capital Stock  -0.1270  0.0727     -1.7481
Labour lag                   0.1057  0.0569      1.8563
Capital lag                  0.1248  0.0697      1.7904
R² = 0.248
σ² = 0.007


In [380]:
#################### The Hausmann test ######################
#############  Comparing FE and RE ##########################
#############################################################

In [381]:
# Calculate the test statistic
b_diff = b_hat_fe-b_hat_re
cov_diff = varb_fe-varb_re
H = b_diff.T@la.inv(cov_diff)@b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = 4

import scipy.stats as stats
crit_val = stats.chi2.ppf(1-0.05, df=M)
p_val = 1-stats.chi2.cdf(H.item(), df=M)

# Print the results
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

The test statistic is 50.88.
The critical value at a 5% significance level is 9.49.
The p-value is 0.00000000.
