# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [43]:
import numpy as np
from numpy import linalg as la
import pandas as pd
from io import StringIO
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns

# Import this weeks LinearModels .py file
import LinearDynamic_ante as lm
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
dat = pd.read_csv('firms.csv')

# Converting data to numpy format and making variables

In [75]:
#Keep only 3 years 

dat = dat[(dat['year'] >= 1968) & (dat['year'] <= 1970)]
dat=dat.reset_index(drop=True)
dat

Unnamed: 0,firmid,year,lcap,lemp,ldsa
0,1,1968,0.998602,-0.242185,0.349053
1,1,1969,0.925214,-0.241278,0.312492
2,1,1970,0.879616,-0.265134,0.347566
3,2,1968,-0.069588,-0.323021,-0.945831
4,2,1969,-0.056724,-0.358177,-1.143830
...,...,...,...,...,...
1318,440,1969,-0.228757,0.031242,-0.246864
1319,440,1970,-0.038354,0.062158,-0.345710
1320,441,1968,-1.618390,-1.944210,-2.032340
1321,441,1969,-1.635030,-1.856580,-2.011210


In [76]:
dat.ldsa.values.shape

(1323,)

In [77]:
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=3


Extract data from `pandas` to `numpy` arrays. 

In [78]:
y = dat.ldsa.values.reshape((N*T,1))

ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
x = np.hstack([ones, l, k])

In [79]:
#Laver labels
label_y = 'log Deflated sales'
label_x = [
    'log constant',
    'log Employment', 
    'log Adjusted Capital Stock',  
]

## Creating function to remove zero columns

In [80]:
def remove_zero_columns(x, label_x):
    """
    The function removes columns from a matrix that are all zeros and returns the updated matrix and
    corresponding labels.
    
    Args:
      x: The parameter `x` is a numpy array representing a matrix with columns that may contain zeros.
      label_x: The parameter `label_x` is a list that contains the labels for each column in the input
    array `x`.
    
    Returns:
      x_nonzero: numpy array of x with columns that are all zeros removed.
      label_nonzero: list of labels for each column in x_nonzero.
    """
    
    # Find the columns that are not all zeros
    nonzero_cols = ~np.all(x == 0, axis=0)
    
    # Remove the columns that are all zeros
    x_nonzero = x[:, nonzero_cols]
    
    # Get the labels for the columns that are not all zeros
    label_nonzero = [label_x[i] for i in range(len(label_x)) if nonzero_cols[i]]
    return x_nonzero, label_nonzero

# The FE-estimator

In [81]:
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T, (T, T))
    return Q_T

In [82]:
# Transform the data
Q_T = demeaning_matrix(T)

y_dot = lm.perm(Q_T, y)
x_dot = lm.perm(Q_T, x)[:,1:3]

label_x_dt=[
    'log Employment', 
    'log Adjusted Capital Stock',  
]
# Remove the columns that are only zeroes
#x_dot, label_x_dot = remove_zero_columns(x_dot, label_x)

# Estimate 
fe_result = lm.estimate(y_dot, x_dot, z=None, transform='fe', T=T, robust_se=True)
lm.print_table((label_y, label_x_dt), fe_result, title="Fixed Effects", floatfmt='.4f')

Fixed Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6004  0.0497     12.0916
log Adjusted Capital Stock  0.0502  0.0477      1.0533
R² = 0.284
σ² = 0.008


# The FD-estimator

In [83]:
# Transform the data
# Create transformation matrix
def fd_matrix(T):
    D_T = np.eye(T) - np.eye(T, k=-1)
    D_T = D_T[1:]
    return D_T

# Print the matrix
D_T = fd_matrix(T)
print(f'First differening matrix for T={T} \n', D_T)

y_diff = lm.perm(D_T, y)
x_diff = lm.perm(D_T, x)

# Remove the columns that are only zeroes
x_diff, label_x_diff = remove_zero_columns(x_diff, label_x)

# Estimate 
fd_result = lm.estimate(y_diff, x_diff, z=None, transform='fd', T=T-1, robust_se=True) 
lm.print_table((label_y, label_x_diff), fd_result, title="First Difference", floatfmt='.4f')

First differening matrix for T=3 
 [[-1.  1.  0.]
 [ 0. -1.  1.]]
First Difference
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.5509  0.0497     11.0762
log Adjusted Capital Stock  0.0381  0.0458      0.8329
R² = 0.217
σ² = 0.013


# The RE-estimator

In [84]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# Estimate 
be_result = lm.estimate(y_mean, x_mean, z=None, transform='be', T=T, robust_se=True) 

lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')


Between Estimator
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log constant                0.0000  0.0171      0.0000
log Employment              0.6856  0.0370     18.5500
log Adjusted Capital Stock  0.2778  0.0320      8.6919
R² = 0.921
σ² = 0.122


In [85]:
# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
sigma2_u = fe_result['sigma2']
sigma2_w = be_result['sigma2']
sigma2_c = sigma2_w - (1/T*sigma2_u)
_lambda = 1-np.sqrt(sigma2_u/(sigma2_u+T*sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {_lambda.item():.4f}.')

Lambda is approximately equal to 0.8511.


In [86]:
# Transform the data
C_T = np.identity(T)-_lambda*P_T
y_re = lm.perm(C_T, y)
x_re = lm.perm(C_T, x)

# Estimate 
re_result = lm.estimate(y_re, x_re, z=None, transform='re', T=T, robust_se=True)

lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f')

Random Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log constant                0.0000  0.0167      0.0000
log Employment              0.6912  0.0303     22.8178
log Adjusted Capital Stock  0.2476  0.0261      9.4722
R² = 0.797
σ² = 0.008


# WALD TEST 
## for derterming if the joined hyphotesis hold 

In [87]:
R_fe=np.array([1,1]).reshape(1,-1)
r_fe=np.array([1]).reshape(1,-1)

R_re=np.array([0,1,1]).reshape(1,-1)
r_re=np.array([1]).reshape(1,-1)

In [88]:
varb_fe=fe_result['cov']
varb_fe.shape

b_hat_fe=fe_result['b_hat']
b_hat_fe.shape

varb_re=re_result['cov']
varb_re.shape

b_hat_re=re_result['b_hat']
b_hat_re.shape

(3, 1)

In [89]:
######### Implementing the Wald-test for the two estiamtors ##########

W_fe=(R_fe@b_hat_fe-r_fe).T@np.linalg.inv(R_fe@varb_fe@(R_fe.T))@(R_fe@b_hat_fe-r_fe)
W_re=(R_re@b_hat_re-r_re).T@np.linalg.inv(R_re@varb_re@(R_re.T))@(R_re@b_hat_re-r_re)

print(W_fe, W_re)

[[38.64345266]] [[22.51375093]]


# Testing assumptions 

## TEST FOR FE.3: Are the $\Delta$ $u_{it}$'s correlated? 

In [90]:
# Make function to calculate the serial correlation
def serial_corr(y, x, T):
    # Calculate the residuals
    b_hat = lm.est_ols(y, x)
    e = y - x@b_hat
    
    # Create a lag transformation matrix
    L_T = np.eye(T, k=-1)
    L_T = L_T[1:]

    # Lag residuals
    e_l = lm.perm(L_T, e)

    # Create a transformation matrix that removes the first observation of each individal
    I_T = np.eye(T, k=0)
    I_T = I_T[1:]
    
    # Remove first observation of each individual
    e = lm.perm(I_T, e)
    
    # Calculate the serial correlation
    return lm.estimate(e, e_l,T=T-1)

In [91]:
# Estimate serial correlation
corr_result = serial_corr(y_diff, x_diff, T-1)

# Print results
label_ye = 'OLS residual, e\u1d62\u209c'
label_e = ['e\u1d62\u209c\u208B\u2081']
lm.print_table(
    (label_ye, label_e), corr_result, 
    title='Serial Correlation', floatfmt='.4f'
)

Serial Correlation
Dependent variable: OLS residual, eᵢₜ

          Beta      Se    t-values
-----  -------  ------  ----------
eᵢₜ₋₁  -0.1849  0.0483     -3.8295
R² = 0.032
σ² = 0.013


## TEST FOR FE.1/RE.1

### Constructing x_exo: regressors including leaded and lagged values

#### Leads of x's

In [92]:
# Lead labour and capital 
F_T = np.eye(T, k=1)
F_T = F_T[:-1]

labour_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))
capital_lead = lm.perm(F_T, x[:, 2].reshape(-1, 1))

# Remove the last observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[:-1]

x_exo_lead = lm.perm(I_T, x[:,1:3])
x_exo_lead_cons = lm.perm(I_T, x)
y_exo_lead = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo_lead = np.hstack((x_exo_lead, labour_lead, capital_lead))
x_exo_lead_cons = np.hstack((x_exo_lead_cons, labour_lead, capital_lead))

x_exo_lead.shape
x_exo_lead_cons.shape

(882, 5)

In [93]:
############### Fixed Effects ###############

In [94]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo_lead = lm.perm(Q_T, y_exo_lead)
xw_exo_lead = lm.perm(Q_T, x_exo_lead)

In [95]:
# Estimate model
exo_lead_fe = lm.estimate(yw_exo_lead, xw_exo_lead, z=None, transform='fe', T=T-1, robust_se=True)

# Print results
label_exo = label_x_dt + ['Labour lead'] + ['Capital lead']
lm.print_table((label_y, label_exo), exo_lead_fe, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.4599  0.0660      6.9707
log Adjusted Capital Stock  0.0580  0.0746      0.7783
Labour lead                 0.1552  0.0473      3.2785
Capital lead                0.0656  0.0838      0.7828
R² = 0.221
σ² = 0.006


In [96]:
############### Random Effects ###############

In [97]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_exo_lead_mean = lm.perm(P_T, y_exo_lead)
x_exo_lead_cons_mean = lm.perm(P_T, x_exo_lead_cons)

# Estimate 
exo_lead_be = lm.estimate(y_exo_lead_mean, x_exo_lead_cons_mean, z=None, transform='be', T=T-1, robust_se=True) 

#print(be_result)

label_exo_cons = label_x + ['Labour lead'] + ['Capital lead']

print(label_exo_cons)

lm.print_table((label_y, label_exo_cons), exo_lead_be, title='Exogeneity test', floatfmt='.4f')

# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
lead_sigma2_u = exo_lead_fe['sigma2']
lead_sigma2_w = exo_lead_be['sigma2']
lead_sigma2_c = lead_sigma2_w - (1/T*lead_sigma2_u)
lead_lambda = 1-np.sqrt(lead_sigma2_u/(lead_sigma2_u+T*lead_sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {lead_lambda.item():.4f}.')

# Transform the data
C_T = np.identity(T)-lead_lambda*P_T
y_exo_lead_re = lm.perm(C_T, y_exo_lead)
x_exo_lead_re = lm.perm(C_T, x_exo_lead_cons)

# Estimate 
exo_lead_re = lm.estimate(y_exo_lead_re, x_exo_lead_re, z=None, transform='re', T=T-1, robust_se=True)

lm.print_table((label_y, label_exo_cons), exo_lead_re, title="Random Effects", floatfmt='.4f')

['log constant', 'log Employment', 'log Adjusted Capital Stock', 'Labour lead', 'Capital lead']
Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log constant                 0.0000  0.0163      0.0000
log Employment               0.4825  0.2419      1.9948
log Adjusted Capital Stock  -0.9159  0.2448     -3.7418
Labour lead                  0.1597  0.2270      0.7033
Capital lead                 1.2536  0.2485      5.0457
R² = 0.930
σ² = 0.066
Lambda is approximately equal to 0.8204.
Random Effects
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log constant                 0.0000  0.0568      0.0000
log Employment               0.2166  0.0862      2.5129
log Adjusted Capital Stock  -0.2247  0.1219     -1.8434
Labour lead                  0.4689  0.0861      5.4450
Ca

#### Lag of x's

In [98]:
######### Adding lagged variables of labour and capital to x ##########
L_T = np.eye(T, k=-1)
L_T = L_T[1:]

labour_lag = lm.perm(L_T, x[:, 1].reshape(-1, 1))
capital_lag = lm.perm(L_T, x[:, 2].reshape(-1, 1))

# Remove the first observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[1:]

x_exo_lag = lm.perm(I_T, x[:,1:3])
x_exo_lag_cons = lm.perm(I_T, x)

y_exo_lag = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo_lag = np.hstack((x_exo_lag, labour_lag, capital_lag))
x_exo_lag_cons = np.hstack((x_exo_lag_cons, labour_lag, capital_lag))

In [70]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo_lag = lm.perm(Q_T, y_exo_lag)
xw_exo_lag = lm.perm(Q_T, x_exo_lag)

In [71]:
# Estimate model
exo_lag_fe = lm.estimate(yw_exo_lag, xw_exo_lag, z=None, transform='fe', T=T-1, robust_se=True)

# Print results
label_exo = label_x_dt + ['Labour lag'] + ['Capital lag']
lm.print_table((label_y, label_exo), exo_lag_fe, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6433  0.0357     18.0009
log Adjusted Capital Stock  0.1431  0.0449      3.1839
Labour lag                  0.0570  0.0374      1.5250
Capital lag                 0.0065  0.0331      0.1969
R² = 0.448
σ² = 0.017


In [72]:
############### Random Effects ###############

In [73]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_exo_lag_mean = lm.perm(P_T, y_exo_lag)
x_exo_lag_cons_mean = lm.perm(P_T, x_exo_lag_cons)

# Estimate 
exo_lag_be = lm.estimate(y_exo_lag_mean, x_exo_lag_cons_mean, z=None, transform='be', T=T-1, robust_se=True) 

#print(be_result)

label_exo_cons = label_x + ['Labour lead'] + ['Capital lead']

lm.print_table((label_y, label_exo_cons), exo_lag_be, title='Exogeneity test', floatfmt='.4f')

# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
lag_sigma2_u = exo_lag_fe['sigma2']
lag_sigma2_w = exo_lag_be['sigma2']
lag_sigma2_c = lag_sigma2_w - (1/T*lag_sigma2_u)
lag_lambda = 1-np.sqrt(lag_sigma2_u/(lag_sigma2_u+T*lag_sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {lag_lambda.item():.4f}.')

# Transform the data
C_T = np.identity(T)-lag_lambda*P_T
y_exo_lag_re = lm.perm(C_T, y_exo_lag)
x_exo_lag_re = lm.perm(C_T, x_exo_lag_cons)

# Estimate 
exo_lag_re = lm.estimate(y_exo_lag_re, x_exo_lag_re, z=None, transform='re', T=T-1, robust_se=True)

lm.print_table((label_y, label_exo_cons), exo_lag_re, title="Random Effects", floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log constant                -0.0001  0.0131     -0.0108
log Employment               1.5884  0.5762      2.7567
log Adjusted Capital Stock   1.6675  0.4753      3.5085
Labour lead                 -0.9468  0.5720     -1.6551
Capital lead                -1.3076  0.4661     -2.8053
R² = 0.936
σ² = 0.068
Lambda is approximately equal to 0.8569.
Random Effects
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log constant                -0.0003  0.0630     -0.0041
log Employment               0.8115  0.0475     17.0985
log Adjusted Capital Stock   0.4088  0.0557      7.3426
Labour lead                 -0.1437  0.0505     -2.8455
Capital lead                -0.0987  0.0503     -1.9624
R² = 0.878
σ² = 0.061


# The Hausmann test 
## Comparing FE and RE 

In [74]:
# Calculate the test statistic
b_diff = b_hat_fe-b_hat_re[1:3]
cov_diff = varb_fe-varb_re[1:3,1:3]
H = b_diff.T@la.inv(cov_diff)@b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = 4

import scipy.stats as stats
crit_val = stats.chi2.ppf(1-0.05, df=M)
p_val = 1-stats.chi2.cdf(H.item(), df=M)

# Print the results
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f'The p-value is {p_val:.8f}.')

The test statistic is 10.19.
The critical value at a 5% significance level is 9.49.
The p-value is 0.03739827.
