# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [506]:
import numpy as np
from numpy import linalg as la
import pandas as pd
from io import StringIO
from tabulate import tabulate
from matplotlib import pyplot as plt
import seaborn as sns

# Import this weeks LinearModels .py file
import LinearModelsWeek3 as lm
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [507]:
dat = pd.read_csv('firms.csv')

In [508]:
#dat.sample(5)

In [509]:
#dat.year.unique()

# Descriptives

In [510]:
#dat.describe()
#dat

In [511]:
#dat[['lcap','lemp','ldsa']].hist();

In [512]:
#sns.scatterplot(x='lemp', y='ldsa', data=dat); 

# Converting data to numpy format 

In [513]:
#Keep only 3 years 

dat = dat[(dat['year'] >= 1968) & (dat['year'] <= 1970)]
dat=dat.reset_index(drop=True)
dat

Unnamed: 0,firmid,year,lcap,lemp,ldsa
0,1,1968,0.998602,-0.242185,0.349053
1,1,1969,0.925214,-0.241278,0.312492
2,1,1970,0.879616,-0.265134,0.347566
3,2,1968,-0.069588,-0.323021,-0.945831
4,2,1969,-0.056724,-0.358177,-1.143830
...,...,...,...,...,...
1318,440,1969,-0.228757,0.031242,-0.246864
1319,440,1970,-0.038354,0.062158,-0.345710
1320,441,1968,-1.618390,-1.944210,-2.032340
1321,441,1969,-1.635030,-1.856580,-2.011210


In [514]:
dat.ldsa.values.shape

(1323,)

In [515]:
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=3


Extract data from `pandas` to `numpy` arrays. 

In [516]:
y = dat.ldsa.values.reshape((N*T,1))

ones = np.ones((N*T,1))
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
x = np.hstack([l, k])

In [517]:
# Create transformation matrix
def demeaning_matrix(T):
    ones_T = np.ones(T)
    Q_T = np.identity(T) - (1 / T) * np.outer(ones_T, ones_T)
    return Q_T

# Print the matrix
Q_T = demeaning_matrix(T)
print(f'Demeaning matrix for T={T} \n', Q_T)

Demeaning matrix for T=3 
 [[ 0.66666667 -0.33333333 -0.33333333]
 [-0.33333333  0.66666667 -0.33333333]
 [-0.33333333 -0.33333333  0.66666667]]


In [518]:
# Transform the data
y_demean = lm.perm(Q_T, y)
x_demean = lm.perm(Q_T, x)

#print x_demean
print(x_demean.shape)

x_demean

(1323, 2)


array([[ 0.00734733,  0.0641244 ],
       [ 0.00825433, -0.0092634 ],
       [-0.01560167, -0.054861  ],
       ...,
       [-0.1643    , -0.04895333],
       [-0.07667   , -0.06559333],
       [ 0.24097   ,  0.11454667]])

In [519]:
#Laver labels
label_y = 'log Deflated sales'
label_x = [
    'log Employment', 
    'log Adjusted Capital Stock',  
]

In [520]:
# Create function to check rank of demeaned matrix, and return its eigenvalues.
def check_rank(x):
    print(f'Rank of demeaned x: {la.matrix_rank(x)}')
    lambdas, V = la.eig(x.T@x)
    np.set_printoptions(suppress=True)  # This is just to print nicely.
    print(f'Eigenvalues of within-transformed x: {lambdas.round(decimals=0)}')

# Check rank of demeaned x
check_rank(x_demean)

Rank of demeaned x: 2
Eigenvalues of within-transformed x: [9. 5.]


In [521]:
# Estimate using the demeaned variables, y_demean and x_demean
fe_result = lm.estimate(y_demean, x_demean, transform='fe', T=T)

# Print results
lm.print_table((label_y, label_x), fe_result, title='FE regression', floatfmt='.4f')
b_hat=fe_result['b_hat']

FE regression
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.6004  0.0346     17.3515
log Adjusted Capital Stock  0.0502  0.0382      1.3143
R² = 0.284
σ² = 0.008


In [522]:
####################  WALD TEST #########################
###### for derterming if the joined hyphotesis hold #####
#########################################################

In [523]:
R=np.array([1,1]).reshape(1,-1)
r=np.array([1]).reshape(1,-1)

In [524]:
varb=fe_result['cov']
varb.shape

(2, 2)

In [525]:
print(R)
print(R.shape)
print(r)
print(r.shape)
print(b_hat)
print(b_hat.shape)
print(varb)
print(varb.shape)

[[1 1]]
(1, 2)
[[1]]
(1, 1)
[[0.60038261]
 [0.05020358]]
(2, 1)
[[ 0.00119724 -0.00040423]
 [-0.00040423  0.00145913]]
(2, 2)


In [539]:
W=(R@b_hat-r).T@np.linalg.inv(R@varb@(R.T))@(R@b_hat-r)
W.shape

W

array([[66.06939843]])

In [552]:
#################### TEST FOR FE.1 ######################
########### Are the u_it's correlated? ##################
#########################################################

In [553]:
# Make function to calculate the serial correlation
def serial_corr(y, x, T):
    # Calculate the residuals
    b_hat = lm.est_ols(y, x)
    e = y - x@b_hat
    
    # Create a lag transformation matrix
    L_T = np.eye(T, k=-1)
    L_T = L_T[1:]

    # Lag residuals
    e_l = lm.perm(L_T, e)

    # Create a transformation matrix that removes the first observation of each individal
    I_T = np.eye(T, k=0)
    I_T = I_T[1:]
    
    # Remove first observation of each individual
    e = lm.perm(I_T, e)
    
    # Calculate the serial correlation
    return lm.estimate(e, e_l,T=T-1)

In [554]:
# Estimate serial correlation
corr_result = serial_corr(y_demean, x_demean, T)

# Print results
label_ye = 'OLS residual, e\u1d62\u209c'
label_e = ['e\u1d62\u209c\u208B\u2081']
lm.print_table(
    (label_ye, label_e), corr_result, 
    title='Serial Correlation', floatfmt='.4f'
)

Serial Correlation
Dependent variable: OLS residual, eᵢₜ

          Beta      Se    t-values
-----  -------  ------  ----------
eᵢₜ₋₁  -0.3636  0.0318    -11.4284
R² = 0.129
σ² = 0.004


In [555]:
#################### TEST FOR FE.3 ######################
############# strict exogeneity  ########################
#########################################################

In [556]:
######### First adding leaded variables of labour and capital to x ##########
print(x_demean.shape)
x_demean

(1323, 2)


array([[ 0.00734733,  0.0641244 ],
       [ 0.00825433, -0.0092634 ],
       [-0.01560167, -0.054861  ],
       ...,
       [-0.1643    , -0.04895333],
       [-0.07667   , -0.06559333],
       [ 0.24097   ,  0.11454667]])

In [557]:
# Lead labour and capital 
F_T = np.eye(T, k=1)
F_T = F_T[:-1]

labour_lead = lm.perm(F_T, x[:, 0].reshape(-1, 1))
capital_lead = lm.perm(F_T, x[:, 1].reshape(-1, 1))

# Remove the last observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[:-1]

x_exo = lm.perm(I_T, x)
y_exo = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo = np.hstack((x_exo, labour_lead, capital_lead))

x_exo.shape

(882, 4)

In [558]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo = lm.perm(Q_T, y_exo)
xw_exo = lm.perm(Q_T, x_exo)

xw_exo

array([[-0.0004535 ,  0.0366939 ,  0.011928  ,  0.0227988 ],
       [ 0.0004535 , -0.0366939 , -0.011928  , -0.0227988 ],
       [ 0.017578  , -0.0064321 ,  0.0241675 , -0.0120218 ],
       ...,
       [-0.01765535,  0.015343  ,  0.01545775,  0.0952014 ],
       [-0.043815  ,  0.00832   , -0.15882   , -0.09007   ],
       [ 0.043815  , -0.00832   ,  0.15882   ,  0.09007   ]])

In [559]:
# Estimate model
exo_test = lm.estimate(yw_exo, xw_exo, T=T - 1, transform='fe')

# Print results
label_exo = label_x + ['Labour lead'] + ['Capital lead']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                              Beta      Se    t-values
--------------------------  ------  ------  ----------
log Employment              0.4599  0.0550      8.3615
log Adjusted Capital Stock  0.0580  0.0674      0.8617
Labour lead                 0.1552  0.0504      3.0768
Capital lead                0.0656  0.0702      0.9342
R² = 0.221
σ² = 0.006


In [560]:
######### Adding lagged variables of labour and capital to x ##########
L_T = np.eye(T, k=-1)
L_T = L_T[1:]

labour_lag = lm.perm(L_T, x[:, 0].reshape(-1, 1))
capital_lag = lm.perm(L_T, x[:, 1].reshape(-1, 1))

# Remove the first observed year for every individial
I_T = np.eye(T, k=0)
I_T = I_T[1:]

x_exo2 = lm.perm(I_T, x)
y_exo2 = lm.perm(I_T, y)

# Add leaded to x_exo
x_exo2 = np.hstack((x_exo2, labour_lag, capital_lag))

x_exo2

array([[-0.241278 ,  0.9252139, -0.242185 ,  0.9986017],
       [-0.265134 ,  0.8796163, -0.241278 ,  0.9252139],
       [-0.358177 , -0.0567239, -0.323021 , -0.0695881],
       ...,
       [ 0.0621576, -0.0383542,  0.0312421, -0.228757 ],
       [-1.85658  , -1.63503  , -1.94421  , -1.61839  ],
       [-1.53894  , -1.45489  , -1.85658  , -1.63503  ]])

In [561]:
# Within transform the data
Q_T = demeaning_matrix(T - 1)
yw_exo2 = lm.perm(Q_T, y_exo2)
xw_exo2 = lm.perm(Q_T, x_exo2)

In [562]:
# Estimate model
exo_test = lm.estimate(yw_exo2, xw_exo2, T=T - 1, transform='fe')

# Print results
label_exo = label_x + ['Labour lag'] + ['Capital lag']
lm.print_table((label_y, label_exo), exo_test, title='Exogeneity test', floatfmt='.4f')

Exogeneity test
Dependent variable: log Deflated sales

                               Beta      Se    t-values
--------------------------  -------  ------  ----------
log Employment               0.5920  0.0522     11.3387
log Adjusted Capital Stock  -0.1270  0.0727     -1.7481
Labour lag                   0.1057  0.0569      1.8563
Capital lag                  0.1248  0.0697      1.7904
R² = 0.248
σ² = 0.007
