# Load in the data


In [22]:
#Source functions
#import autograd.numpy as np
#from autograd import grad, hessian
#import pandas as pd
#import scipy.optimize as opt
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
#import math
#from itertools import combinations_with_replacement, chain #used for constructing polynomials

#Functions for ACF and GNR
from source_functions import *

In [23]:
df = load_data("GNR")

#filename = "../PS3_data_changedtoxlsx.xlsx"
#df0 = pd.read_excel(filename)
#Remove missing materials columns
#df = df0[['year', 'firm_id', 'X03', 'X04', 'X05', 'X16', 'X40', 'X43', 'X44', 'X45', 'X49']]
#new_names = ["year", "firm_id", "obs", "ly", "s01", "s02", "lc", "ll", "lm"]
#new_names = ["t", "firm_id", "y_gross", "s01", "s02", "s13", "k", "l", "m", 'py', 'pm']
#df.columns = new_names
#Drop missing materials data
#df=df[df['m']!=0]
#Keep industry 1 only
#df=df[df['s13']==1]

#Creating value-added y
#df['y'] = df['y_gross']
#Creating the intermeidate share of output cost variable: emember everything is already in logs 
#df['s'] = np.log(np.exp(df['pm'])*np.exp(df['m']))/(np.exp(df['py'])*np.exp(df['y']))
#df['s'] = df['pm']+df['m'] - df['py'] - df['y']
#Creating lagged variables
#df = df.sort_values(by=['firm_id', 't'])
#df['kprev'] = df.groupby('firm_id')['k'].shift(1)
#df['lprev'] = df.groupby('firm_id')['l'].shift(1)
#df['mprev'] = df.groupby('firm_id')['m'].shift(1)

In [24]:
degree= 2
#Make the polynomial design matrix
xvars = df[['k', 'l', 'm']].to_numpy()
s = df[['s']].to_numpy()
X_poly_D = poly_design_matrix(xvars, degree)
#calculate the gradient of the objective function using AutoGrad
autogradient_nlls = grad(nlls_share_obj)
autohessian_nlls = hessian(nlls_share_obj)
#initial guess

gammaprime0 = np.ones(X_poly_D.shape[1])/2

In [25]:
#minimize to fit the coefficients gammaprime 
#Enforce that X@gamma is nonnegative, otherwise we get negative values in the log
nonnegative_b = {'type': 'ineq', 'fun': lambda b: (X_poly_D@b)}

gammaprime_results = opt.minimize(nlls_share_obj, gammaprime0, args=(X_poly_D, s),
                                  constraints = [nonnegative_b],
                       tol=1e-12, jac=autogradient_nlls, hess = autohessian_nlls, method='trust-constr'
)

print("The error is:",  gammaprime_results.fun)
print("The gradient is:",  gammaprime_results.grad)
print("The coefficients in the degree-1 fit are:",  gammaprime_results.x)

shat = np.log(X_poly_D@gammaprime_results.x)

The error is: 45.75189355502215
The gradient is: [2.24826140e-07 2.43663861e-06 1.03468366e-06 2.56415524e-06
 2.72901700e-05 1.16786740e-05 2.87435122e-05 5.12676749e-06
 1.23432865e-05 3.06782765e-05]
The coefficients in the degree-1 fit are: [-0.06492208  0.07132593 -0.04254507 -0.05276808 -0.00885781  0.02737997
 -0.00261827  0.00630371 -0.03944874  0.01855989]


## Actual by predicted plot for my polynomial approximation of $\Phi$, run on the entire sample

### Now we have $\hat D_{jt}$.

In [28]:
gammaprime = gammaprime_results.x
#Get Dhat, the elasticities
df['Dhat'] = X_poly_D@gammaprime
#Back out the residuals, epsilons
df['epsilonhat'] = np.log(df['Dhat']) - df['s']
# mean of epsilon is 1e-12 --- good sign
mean_eps = np.mean((df['epsilonhat']))
var_eps = np.var((df['epsilonhat']))
#From here, estimate curlyE which is the sample average of exp(epsilons)
#Turns out to be 1.02.  the mean of a lognormal variable of mean 0 is e^(sigma^2/2). That implies the variance of the epsilons is vey low. 
CurlyEhat = (np.mean(np.exp(df['epsilonhat'])))
lognormal_guess_curlyEhat = np.exp(var_eps/2)
#The theoretial guess for CurlyEhat given epsilon ~ N(0, sigma^2) is very close to the actual curlyEhat, 
#suggesting the epsilons are approximately normally distributed. 
#It follows from the math above that ...
gamma = gammaprime/CurlyEhat
df['df_dm'] = X_poly_D@gamma

In [29]:
#Get the design matrix associated with the integral of the polynomial
X_poly_D_integral =  poly_integral_design_matrix(xvars, degree, w_r_t = 2)
#Evaluate it to get curlyD, which is the integral of the log elasticities
df['CurlyD'] = X_poly_D_integral@gamma 
#from here, get CurlyY
df['CurlyY'] = df['y'] - df['epsilonhat'] - df['CurlyD']

In [30]:
#First, drop all NaNs
df['CurlyYprev'] = df.groupby('firm_id')['CurlyY'].shift(1)


df_nonans = df.dropna().copy()

degree_omega = 2
xvars_omega = df_nonans[["k", "l"]].to_numpy()
xvars_prev_omega = df_nonans[["kprev", "lprev"]].to_numpy()

#This polynomial fit has NO INTERCEPT. Even if we wanted an intercept it would not be identified because we end up taking first differences of omega. 
X_poly_omega = poly_design_matrix(xvars_omega, degree_omega)[:, 1:]
Xprev_poly_omega = poly_design_matrix(xvars_prev_omega, degree_omega)[:, 1:]

#Previous CurlyY
CurlyY = df_nonans['CurlyY'].to_numpy()
CurlyYprev = df_nonans['CurlyYprev'].to_numpy()

In [31]:
#initial guess for alpha, the polynomial fit for omega
alpha0 = np.ones(X_poly_omega.shape[1])*10
W0 = np.eye(len(alpha0))

args_GNR = (X_poly_omega, Xprev_poly_omega, CurlyY, CurlyYprev, W0)

gmm_results_GNR = opt.minimize(gmm_obj_fcn_GNR, alpha0, args=args_GNR,
                       tol=1e-24, jac=autogradient_GNR, method='L-BFGS-B'
)

alpha = gmm_results_GNR.x
delta, eta = gmm_stage2_error_GNR(alpha, X_poly_omega, Xprev_poly_omega, CurlyY, CurlyYprev)[1:3]

print("The error is:",  gmm_results_GNR.fun)
print("The gradient is:",  gmm_results_GNR.jac)
print("The coefficients for the integration constant [alpha] are:",  gmm_results_GNR.x)
print("The coefficients for productivity omega [delta] are:",  delta)


The error is: 4.302561928998854e-25
The gradient is: [1.90550302e-11 1.09182153e-11 4.55877902e-10 2.30643904e-10
 1.12858618e-10]
The coefficients for the integration constant [alpha] are: [ 0.22962852 -2.6976986  -0.08259352  0.26112497  0.05193076]
The coefficients for productivity omega [delta] are: [1.97089299 0.17895714 0.08014284]
