# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [1]:
import numpy as np
from numpy import linalg as la
import pandas as pd
from io import StringIO
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns
import scipy.stats as stats

# Import this weeks LinearModels .py file
import LinearDynamic_ante as lm
%load_ext autoreload
%autoreload 2

In [2]:
dat = pd.read_csv('firms.csv')

# Converting data to numpy format and making variables

In [3]:
#Keep only 3 years 

dat = dat[(dat['year'] >= 1968) & (dat['year'] <= 1970)]
dat=dat.reset_index(drop=True)
dat

Unnamed: 0,firmid,year,lcap,lemp,ldsa
0,1,1968,0.998602,-0.242185,0.349053
1,1,1969,0.925214,-0.241278,0.312492
2,1,1970,0.879616,-0.265134,0.347566
3,2,1968,-0.069588,-0.323021,-0.945831
4,2,1969,-0.056724,-0.358177,-1.143830
...,...,...,...,...,...
1318,440,1969,-0.228757,0.031242,-0.246864
1319,440,1970,-0.038354,0.062158,-0.345710
1320,441,1968,-1.618390,-1.944210,-2.032340
1321,441,1969,-1.635030,-1.856580,-2.011210


In [4]:
dat.ldsa.values.shape

(1323,)

In [5]:
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=3


Extract data from `pandas` to `numpy` arrays. 

In [6]:
y = dat.ldsa.values.reshape((N*T,1))

ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
x = np.hstack([l, k])

In [7]:
#Laver labels
label_y = 'log Deflated sales'
label_x = [
    'log Employment', 
    'log Adjusted Capital Stock',  
]

## Creating function to remove zero columns

In [8]:
def remove_zero_columns(x, label_x):
    """
    The function removes columns from a matrix that are all zeros and returns the updated matrix and
    corresponding labels.
    
    Args:
      x: The parameter `x` is a numpy array representing a matrix with columns that may contain zeros.
      label_x: The parameter `label_x` is a list that contains the labels for each column in the input
    array `x`.
    
    Returns:
      x_nonzero: numpy array of x with columns that are all zeros removed.
      label_nonzero: list of labels for each column in x_nonzero.
    """
    
    # Find the columns that are not all zeros
    nonzero_cols = ~np.all(x == 0, axis=0)
    
    # Remove the columns that are all zeros
    x_nonzero = x[:, nonzero_cols]
    
    # Get the labels for the columns that are not all zeros
    label_nonzero = [label_x[i] for i in range(len(label_x)) if nonzero_cols[i]]
    return x_nonzero, label_nonzero

In [12]:
################## Calculating p-values ####################

def p_vals_ttest(x , result, N, T, estimator=''):

    K = x.shape[1]

    # Define the number of entities, time periods, and other variables
    num_entities =  N
    num_time_periods = T
    num_regressors = K

    # Set the significance level (alpha) for your t-test
    alpha = 0.05  # Adjust as needed

    # Calculate degrees of freedom
    if estimator=='FE':
    
        df = N*T - N - K
        
    else:
        df = N*T - K
        
    t_vals =result['t_values']

    p_val = 1-stats.t.cdf(abs(t_vals), df)
    return p_val

# The FE-estimator

In [13]:
def demeaning_matrix(T):
    Q_T = np.eye(T) - np.tile(1/T, (T, T))
    return Q_T

In [14]:
# Transform the data
Q_T = demeaning_matrix(T)

y_dot = lm.perm(Q_T, y)
x_dot = lm.perm(Q_T, x)

label_x_dt=[
    'log Employment', 
    'log Adjusted Capital Stock',  
]
# Remove the columns that are only zeroes
#x_dot, label_x_dot = remove_zero_columns(x_dot, label_x)

# Estimate 
fe_result = lm.estimate(y_dot, x_dot, z=None, transform='fe', T=T, robust_se=True)
lm.print_table((label_y, label_x_dt), fe_result, title="Fixed Effects", floatfmt='.4f')

Fixed Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6004  0.0497     12.0916
log Adjusted Capital Stock  0.0502  0.0477      1.0533
R² = 0.284
σ² = 0.008


In [15]:
p_val_fe = p_vals_ttest(x_dot, fe_result, N, T, estimator='FE')

print(p_val_fe)

[[0.        ]
 [0.14623868]]


# The RE-estimator

In [16]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_mean = lm.perm(P_T, y)
x_mean = lm.perm(P_T, x)

# Estimate 
be_result = lm.estimate(y_mean, x_mean, z=None, transform='be', T=T, robust_se=True) 

lm.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')


Between Estimator
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6856  0.0370     18.5500
log Adjusted Capital Stock  0.2778  0.0320      8.6919
R² = 0.921
σ² = 0.122


In [17]:
# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
sigma2_u = fe_result['sigma2']
sigma2_w = be_result['sigma2']
sigma2_c = sigma2_w - (1/T*sigma2_u)
_lambda = 1-np.sqrt(sigma2_u/(sigma2_u+T*sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {_lambda.item():.4f}.')

Lambda is approximately equal to 0.8509.


In [18]:
# Transform the data
C_T = np.identity(T)-_lambda*P_T
y_re = lm.perm(C_T, y)
x_re = lm.perm(C_T, x)

# Estimate 
re_result = lm.estimate(y_re, x_re, z=None, transform='re', T=T, robust_se=True)

lm.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f')

Random Effects
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6912  0.0303     22.8253
log Adjusted Capital Stock  0.2477  0.0261      9.4766
R² = 0.797
σ² = 0.008


In [19]:
p_val_re = p_vals_ttest(x_re, re_result, N=N, T=T, estimator='RE')
print(p_val_re)

[[0.]
 [0.]]


# WALD TEST 
## for derterming if the joined hyphotesis hold 

In [20]:
R_fe=np.array([1,1]).reshape(1,-1)
r_fe=np.array([1]).reshape(1,-1)

R_re=np.array([1,1]).reshape(1,-1)
r_re=np.array([1]).reshape(1,-1)

In [21]:
varb_fe=fe_result['cov']
varb_fe.shape

b_hat_fe=fe_result['b_hat']
b_hat_fe.shape

varb_re=re_result['cov']
varb_re.shape

b_hat_re=re_result['b_hat']
b_hat_re.shape

(2, 1)

In [22]:
######### Implementing the Wald-test for the two estiamtors ##########

W_fe=(R_fe@b_hat_fe-r_fe).T@np.linalg.inv(R_fe@varb_fe@(R_fe.T))@(R_fe@b_hat_fe-r_fe)
W_re=(R_re@b_hat_re-r_re).T@np.linalg.inv(R_re@varb_re@(R_re.T))@(R_re@b_hat_re-r_re)

print(W_fe, W_re)

[[38.64345266]] [[22.48773531]]


In [24]:
################### calculating p-values #######################

M = 1

crit_val = stats.chi2.ppf(1-0.05, df=M)
p_val_fe = 1-stats.chi2.cdf(W_fe.item(), df=M)
p_val_re = 1-stats.chi2.cdf(W_re.item(), df=M)

# Print the results
print(f'The FE test statistic is {W_fe.item():.3f}.')
print(f'The RE test statistic is {W_re.item():.3f}.')
print(f'The critical value at a 5% significance level is {crit_val:.3f}.')
print(f'The FE p-value is {p_val_fe:.8f}.')
print(f'The RE p-value is {p_val_re:.8f}.')

The FE test statistic is 38.643.
The RE test statistic is 22.488.
The critical value at a 5% significance level is 3.841.
The FE p-value is 0.00000000.
The RE p-value is 0.00000211.


# Testing assumptions 

## TEST FOR FE.1/RE.1

### Constructing x_exo: regressors including leaded and lagged values

#### Leads of x's

In [25]:
# Lead labour and capital 
F_T = np.eye(T, k=1)
F_T = F_T[:-1]

labour_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))
capital_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Remove the last observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[:-1]

x_exo_lead = lm.perm(I_T, x)
#x_exo_lead_cons = lm.perm(I_T, x)
y_exo_lead = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo_lead = np.hstack((x_exo_lead, labour_lead, capital_lead))
#x_exo_lead_cons = np.hstack((x_exo_lead_cons, labour_lead, capital_lead))

x_exo_lead.shape
#x_exo_lead_cons.shape

(882, 4)

In [26]:
############### Fixed Effects ###############

In [27]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo_lead = lm.perm(Q_T, y_exo_lead)
xw_exo_lead = lm.perm(Q_T, x_exo_lead)

In [28]:
# Estimate model
exo_lead_fe = lm.estimate(yw_exo_lead, xw_exo_lead, z=None, transform='fe', T=T-1, robust_se=True)

# Print results
label_exo = label_x_dt + ['Labour lead'] + ['Capital lead']
lm.print_table((label_y, label_exo), exo_lead_fe, title='Exogeneity test', floatfmt='.3f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta     Se    t-values
--------------------------  ------  -----  ----------
log Employment               0.460  0.066       6.971
log Adjusted Capital Stock   0.058  0.075       0.778
Labour lead                  0.155  0.047       3.279
Capital lead                 0.066  0.084       0.783
R² = 0.221
σ² = 0.006


In [29]:
############### p-values #################

p_val_lead_fe = p_vals_ttest(xw_exo_lead, exo_lead_fe, N=N, T=T-1, estimator='FE')

print(p_val_lead_fe)

[[5.84665649e-12]
 [2.18414626e-01]
 [5.63476703e-04]
 [2.17082030e-01]]


In [42]:
############### Random Effects ###############

In [30]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_exo_lead_mean = lm.perm(P_T, y_exo_lead)
x_exo_lead_mean = lm.perm(P_T, x_exo_lead)

# Estimate 
exo_lead_be = lm.estimate(y_exo_lead_mean, x_exo_lead_mean, z=None, transform='be', T=T-1, robust_se=True) 

#print(be_result)

label_exo = label_x + ['Labour lead'] + ['Capital lead']

print(label_exo)

lm.print_table((label_y, label_exo), exo_lead_be, title='Exogeneity test', floatfmt='.4f')

# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
lead_sigma2_u = exo_lead_fe['sigma2']
lead_sigma2_w = exo_lead_be['sigma2']
lead_sigma2_c = lead_sigma2_w - (1/T*lead_sigma2_u)
lead_lambda = 1-np.sqrt(lead_sigma2_u/(lead_sigma2_u+T*lead_sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {lead_lambda.item():.4f}.')

# Transform the data
C_T = np.identity(T)-lead_lambda*P_T
y_exo_lead_re = lm.perm(C_T, y_exo_lead)
x_exo_lead_re = lm.perm(C_T, x_exo_lead)

# Estimate 
exo_lead_re = lm.estimate(y_exo_lead_re, x_exo_lead_re, z=None, transform='re', T=T-1, robust_se=True)

lm.print_table((label_y, label_exo), exo_lead_re, title="Random Effects", floatfmt='.3f')

['log Employment', 'log Adjusted Capital Stock', 'Labour lead', 'Capital lead']
Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log Employment               0.4825  0.2419      1.9948
log Adjusted Capital Stock  -0.9159  0.2448     -3.7418
Labour lead                  0.1597  0.2270      0.7033
Capital lead                 1.2536  0.2485      5.0457
R² = 0.930
σ² = 0.066
Lambda is approximately equal to 0.8201.
Random Effects
Dependent variable: log Deflated sales

                              Beta     Se    t-values
--------------------------  ------  -----  ----------
log Employment               0.217  0.086       2.513
log Adjusted Capital Stock  -0.225  0.122      -1.844
Labour lead                  0.469  0.086       5.444
Capital lead                 0.515  0.124       4.158
R² = 0.915
σ² = 0.056


In [31]:
############## p-vals ##########

p_val_lead_re = p_vals_ttest(x_exo_lead_re, exo_lead_re, N=N, T=T-1, estimator='RE')

print(p_val_lead_re)

[[6.07752668e-03]
 [3.27729930e-02]
 [3.37493540e-08]
 [1.76253701e-05]]


#### Lag of x's

In [32]:
######### Adding lagged variables of labour and capital to x ##########
L_T = np.eye(T, k=-1)
L_T = L_T[1:]

labour_lag = lm.perm(L_T, x[:, 0].reshape(-1, 1))
capital_lag = lm.perm(L_T, x[:, 1].reshape(-1, 1))

# Remove the first observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[1:]

x_exo_lag = lm.perm(I_T, x)
#x_exo_lag_cons = lm.perm(I_T, x)

y_exo_lag = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo_lag = np.hstack((x_exo_lag, labour_lag, capital_lag))
#x_exo_lag_cons = np.hstack((x_exo_lag_cons, labour_lag, capital_lag))

In [33]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo_lag = lm.perm(Q_T, y_exo_lag)
xw_exo_lag = lm.perm(Q_T, x_exo_lag)

In [34]:
# Estimate model
exo_lag_fe = lm.estimate(yw_exo_lag, xw_exo_lag, z=None, transform='fe', T=T-1, robust_se=True)

# Print results
label_exo = label_x_dt + ['Labour lag'] + ['Capital lag']
lm.print_table((label_y, label_exo), exo_lag_fe, title='Exogeneity test', floatfmt='.3f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta     Se    t-values
--------------------------  ------  -----  ----------
log Employment               0.592  0.075       7.843
log Adjusted Capital Stock  -0.127  0.076      -1.667
Labour lag                   0.106  0.057       1.842
Capital lag                  0.125  0.089       1.403
R² = 0.248
σ² = 0.007


In [36]:
############## p-vals ##########

p_val_lag_fe = p_vals_ttest(xw_exo_lag, exo_lag_fe, N=N, T=T-1, estimator='FE')

print(p_val_lag_fe)

[[1.70974346e-14]
 [4.81543390e-02]
 [3.30502315e-02]
 [8.05910125e-02]]


In [37]:
############### Random Effects ###############

In [38]:
value = 1 / T
P_T = np.full((T,1), value).reshape(1,-1)
 
y_exo_lag_mean = lm.perm(P_T, y_exo_lag)
x_exo_lag_mean = lm.perm(P_T, x_exo_lag)

# Estimate 
exo_lag_be = lm.estimate(y_exo_lag_mean, x_exo_lag_mean, z=None, transform='be', T=T-1, robust_se=True) 

#print(be_result)

label_exo = label_x + ['Labour lag'] + ['Capital lag']

lm.print_table((label_y, label_exo), exo_lag_be, title='Exogeneity test', floatfmt='.4f')

# Calculate lambda (note lambda is a reserved keyword in Python, so we use _lambda instead)
lag_sigma2_u = exo_lag_fe['sigma2']
lag_sigma2_w = exo_lag_be['sigma2']
lag_sigma2_c = lag_sigma2_w - (1/T*lag_sigma2_u)
lag_lambda = 1-np.sqrt(lag_sigma2_u/(lag_sigma2_u+T*lag_sigma2_c))

# Print lambda 
print(f'Lambda is approximately equal to {lag_lambda.item():.4f}.')

# Transform the data
C_T = np.identity(T)-lag_lambda*P_T
y_exo_lag_re = lm.perm(C_T, y_exo_lag)
x_exo_lag_re = lm.perm(C_T, x_exo_lag)

# Estimate 
exo_lag_re = lm.estimate(y_exo_lag_re, x_exo_lag_re, z=None, transform='re', T=T-1, robust_se=True)

lm.print_table((label_y, label_exo), exo_lag_re, title="Random Effects", floatfmt='.3f')

Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log Employment               0.8179  0.2389      3.4240
log Adjusted Capital Stock   1.2360  0.2503      4.9371
Labour lag                  -0.1594  0.2540     -0.6276
Capital lag                 -0.9187  0.2470     -3.7193
R² = 0.924
σ² = 0.070
Lambda is approximately equal to 0.8202.
Random Effects
Dependent variable: log Deflated sales

                              Beta     Se    t-values
--------------------------  ------  -----  ----------
log Employment               0.954  0.088      10.889
log Adjusted Capital Stock   0.540  0.126       4.305
Labour lag                  -0.289  0.088      -3.270
Capital lag                 -0.234  0.123      -1.904
R² = 0.911
σ² = 0.057


In [39]:
############## p-vals ##########

p_val_lag_re = p_vals_ttest(x_exo_lag_re, exo_lag_re, N=N, T=T-1, estimator='RE')

print(p_val_lag_re)

[[0.00000000e+00]
 [9.28011752e-06]
 [5.58252693e-04]
 [2.86393002e-02]]


# The Hausmann test 
## Comparing FE and RE 

In [40]:
# Calculate the test statistic
b_diff = b_hat_fe-b_hat_re #[1:3]
cov_diff = varb_fe-varb_re #[1:3,1:3]
H = b_diff.T@la.inv(cov_diff)@b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = 2

crit_val = stats.chi2.ppf(1-0.05, df=M)
p_val = 1-stats.chi2.cdf(H.item(), df=M)

# Print the results
print(f'The test statistic is {H.item():.3f}.')
print(f'The critical value at a 5% significance level is {crit_val:.3f}.')
print(f'The p-value is {p_val:.8f}.')

The test statistic is 30.992.
The critical value at a 5% significance level is 5.991.
The p-value is 0.00000019.
