# OSM 2018 Economics 
## Problem Set 5
### 07.23.2018 
### Yung-Hsu Tsui*

### Exercise 1 Health Claims
#### (a)

In [2]:
# Importing the package
import numpy as np
import pandas as pd
import scipy.stats  as sts
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
%matplotlib notebook
pts = np.loadtxt('clms.txt') #importing health claims data
df1= DataFrame(pts, columns=['Health Claims'])
df1.describe()

OSError: clms.txt not found.

In [None]:
# Plotting First Historgram
count, bins, ignored = plt.hist(pts, 1000, edgecolor='black', normed=True)
plt.title('Fictitious Health Claims ', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([0, 10000])

In [None]:
# Plotting Second Historgram
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Fictitious Health Claims Less than 800', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])

I prefer the latter one. The reason is because the latter one focuses on the information that I desire the most. What I care about is the distributio for most of people, whose expdenditures are mostly below $800. 

#### (b)

In [None]:
# Importing the package
# Gamma Distribution
from scipy.special import gamma
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gamma_pdf(xvals,alpha,beta):
    pdf = xvals**(alpha-1) * (np.exp(-xvals/beta)) / (sps.gamma(alpha) * beta**alpha) 
    return pdf

def log_lik_gamma(xvals, a, b):
    pdf_vals = gamma_pdf(xvals, a , b)
    ln_pdf_vals = np.log(pdf_vals)
    log_lik_val = ln_pdf_vals.sum()
    return log_lik_val

def crit_gamma(params, args):
    alpha, beta = params
    xvals = args
    log_lik_val = log_lik_gamma(xvals,alpha,beta)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Settings
mu = np.mean(pts)
var = np.var(pts)
beta = var/mu
alpha = mu/beta
m = 1

#Initial Guesses
alpha_init = alpha
beta_init = beta -20000
params1_init = np.array([alpha_init, beta_init])
mle_args = pts

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and Gamma Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gamma_pdf(dist_pts, alpha=alpha, beta=beta),linewidth=2, color='r',label='1: Gamma Estimation')
plt.legend(loc='upper right')

#Log-likelihood
print('Log-likelihood 1: ', log_lik_gamma(pts, alpha, beta))

#MLE-result
results1 = opt.minimize(crit_gamma, params1_init, args=(mle_args), method='L-BFGS-B', bounds=((1e-10, None),(1e-10, None)))
alpha1_MLE, beta1_MLE = results1.x
print('alpha_MLE=', alpha1_MLE, ' beta_MLE=', beta1_MLE)
results1

#### (c)

In [None]:
#Importing Package
#Generalized Gamma Distribution
from scipy.special import gamma
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gengamma_pdf(xvals,a,b,m):
    pdf2  = (m*(xvals**(a-1))*(np.exp(-(xvals/b)**m)))/((b**a)*sps.gamma(a/m))
    return pdf2

def log_lik_gengamma(xvals, alpha, beta, m):
    pdf_vals = gengamma_pdf(xvals, a=alpha, b=beta, m=m)
    ln_pdf_vals = np.log(pdf_vals)
    log_lik_val = ln_pdf_vals.sum()
    return log_lik_val

def crit_gengamma(params, args):
    alpha, beta, m = params
    xvals = args
    log_lik_val = log_lik_gengamma(xvals,alpha, beta,m)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Initial Guesses from Part(b)
alpha_init = alpha1_MLE
beta_init = beta1_MLE
params2_init = np.array([alpha_init, beta_init,m])
mle_args = pts
m=1

#Log-likelihood
print('Log-likelihood 2: ', log_lik_gengamma(pts, alpha_init, beta_init,m))

#MLE-result
results2 = opt.minimize(crit_gengamma, params2_init, args=(mle_args), method='L-BFGS-B',bounds=((1e-10, None),(1e-10, None),(1e-10, None)))
alpha2_MLE, beta2_MLE, m2_MLE = results2.x
print('alpha_MLE=', alpha2_MLE, ' beta_MLE=', beta2_MLE, 'm_MLE=', m2_MLE)

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and General Gamma Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gengamma_pdf(dist_pts, a=alpha2_MLE, b=beta2_MLE, m=m_MLE),linewidth=2, color='g',label='1: Gengamma Estimation')
plt.legend(loc='upper right')
results2

In [None]:
#### 1(d)

In [None]:
#1.d
##Generalized Beta 2 Distribution
from scipy.special import beta
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gb2_pdf(xvals,a,b,p,q):
    pdf3 = a*xvals**(a*p-1)/(b**(a*p)*(1+(xvals/b)**a)**(p+q)*sps.beta(p,q))
    return pdf3

def log_lik_gb2(xvals, a, b, p, q):
    pdf_vals = gb2_pdf(xvals, a, b, p, q)
    log_pdf_vals = np.log(pdf_vals)
    log_lik_val = log_pdf_vals.sum()
    return log_lik_val

def crit_gb2(params, args):
    a, b, p, q = params
    xvals = args
    log_lik_val = log_lik_gb2(xvals, a, b, p, q)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Initial Guesses from Part(c)
q=10000
a_init = m2_MLE
b_init = beta2_MLE*q**(1/m2_MLE) 
p_init = alpha2_MLE/m2_MLE 
params3_init = np.array([a_init, b_init,p_init,q])
mle_args = pts

#Log-likelihood
print('Log-likelihood 3: ', log_lik_gb2(pts, a= a_init, b=b_init,p=p_init,q=q))

#MLE-result
results3 = opt.minimize(crit_gb2, params3_init, args=(mle_args), method='L-BFGS-B',bounds=((1e-10, None),(1e-10, None),(1e-10, None),(1e-10, None)))
a_MLE, b_MLE, p_MLE, q_MLE = results3.x
print('a_MLE=', a_MLE, ' b_MLE=', b_MLE, 'p_MLE=', p_MLE, 'q_MLE=',q_MLE)

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and GB2 Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gb2_pdf(dist_pts, a=a_MLE, b=b_MLE , p=p_MLE,q=q_MLE),linewidth=2, color='y',label='1: GB2 Estimation')
plt.legend(loc='upper right')

results3

In [None]:
#1.f
# Probability Calculation
from scipy.integrate import quad
def integrand1(x):
    return gb2_pdf(x, a=a_MLE, b=b_MLE, p=p_MLE, q=q_MLE)
def integrand2(x):
    return gamma_pdf(x, alpha=alpha1_MLE, beta=beta1_MLE)
prob1 = quad(integrand1,0, 1000)
prob2 = quad(integrand2,0, 1000)
print('Probability above $1000 GB2 = ', 1-prob1[0])
print('Probability above $1000 Gamma = ', 1-prob2[0])
#From the results we obtain, it is not less likely to have a health claim over $1,000 with GB2 distribution.
#The probability of having claims over $1,000 increases when we use Gamma distribution.

In [2]:
#1.a_1
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
pts = np.loadtxt('clms.txt') #importing health claims data
df1= DataFrame(pts, columns=['Health Claims'])
df1.describe()

OSError: clms.txt not found.

In [3]:
#1.a_2
import numpy as np
import scipy.stats as sts
import matplotlib.pyplot as plt
%matplotlib notebook
count, bins, ignored = plt.hist(pts, 1000, edgecolor='black', normed=True)
plt.title('Fictitious Health Claims ', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([0, 10000])

NameError: name 'B' is not defined

In [None]:
#1.a_3
import matplotlib.pyplot as plt
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Fictitious Health Claims Less than 800', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
#The reason that I might prefer the latter one is because the latter one focuses on the information that I desire the most.
#What I care about is the distributio for most of people, whose expdenditures are mostly below $800.
#I might not be interested to study for the extreme cases.

In [None]:
#1.b
##Gamma Distribution
from scipy.special import gamma
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gamma_pdf(xvals,alpha,beta):
    pdf = xvals**(alpha-1) * (np.exp(-xvals/beta)) / (sps.gamma(alpha) * beta**alpha) 
    return pdf

def log_lik_gamma(xvals, a, b):
    pdf_vals = gamma_pdf(xvals, a , b)
    ln_pdf_vals = np.log(pdf_vals)
    log_lik_val = ln_pdf_vals.sum()
    return log_lik_val

def crit_gamma(params, args):
    alpha, beta = params
    xvals = args
    log_lik_val = log_lik_gamma(xvals,alpha,beta)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Settings
mu = np.mean(pts)
var = np.var(pts)
beta = var/mu
alpha = mu/beta
m = 1

#Initial Guesses
alpha_init = alpha
beta_init = beta -20000
params1_init = np.array([alpha_init, beta_init])
mle_args = pts

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and Gamma Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gamma_pdf(dist_pts, alpha=alpha, beta=beta),linewidth=2, color='r',label='1: Gamma Estimation')
plt.legend(loc='upper right')

#Log-likelihood
print('Log-likelihood 1: ', log_lik_gamma(pts, alpha, beta))

#MLE-result
results1 = opt.minimize(crit_gamma, params1_init, args=(mle_args), method='L-BFGS-B', bounds=((1e-10, None),(1e-10, None)))
alpha1_MLE, beta1_MLE = results1.x
print('alpha_MLE=', alpha1_MLE, ' beta_MLE=', beta1_MLE)
results1


In [None]:
#1.c
##Generalized Gamma Distribution
from scipy.special import gamma
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gengamma_pdf(xvals,a,b,m):
    pdf2  = (m*(xvals**(a-1))*(np.exp(-(xvals/b)**m)))/((b**a)*sps.gamma(a/m))
    return pdf2

def log_lik_gengamma(xvals, alpha, beta, m):
    pdf_vals = gengamma_pdf(xvals, a=alpha, b=beta, m=m)
    ln_pdf_vals = np.log(pdf_vals)
    log_lik_val = ln_pdf_vals.sum()
    return log_lik_val

def crit_gengamma(params, args):
    alpha, beta, m = params
    xvals = args
    log_lik_val = log_lik_gengamma(xvals,alpha, beta,m)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Initial Guesses from Part(b)
alpha_init = alpha1_MLE
beta_init = beta1_MLE
params2_init = np.array([alpha_init, beta_init,m])
mle_args = pts
m=1

#Log-likelihood
print('Log-likelihood 2: ', log_lik_gengamma(pts, alpha_init, beta_init,m))

#MLE-result
results2 = opt.minimize(crit_gengamma, params2_init, args=(mle_args), method='L-BFGS-B',bounds=((1e-10, None),(1e-10, None),(1e-10, None)))
alpha2_MLE, beta2_MLE, m2_MLE = results2.x
print('alpha_MLE=', alpha2_MLE, ' beta_MLE=', beta2_MLE, 'm_MLE=', m2_MLE)

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and General Gamma Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gengamma_pdf(dist_pts, a=alpha2_MLE, b=beta2_MLE, m=m_MLE),linewidth=2, color='g',label='1: Gengamma Estimation')
plt.legend(loc='upper right')

results2

In [None]:
#1.d
##Generalized Beta 2 Distribution
from scipy.special import beta
import scipy.special as sps
import scipy.optimize as opt
import matplotlib.pyplot as plt

#Functions
def gb2_pdf(xvals,a,b,p,q):
    pdf3 = a*xvals**(a*p-1)/(b**(a*p)*(1+(xvals/b)**a)**(p+q)*sps.beta(p,q))
    return pdf3

def log_lik_gb2(xvals, a, b, p, q):
    pdf_vals = gb2_pdf(xvals, a, b, p, q)
    log_pdf_vals = np.log(pdf_vals)
    log_lik_val = log_pdf_vals.sum()
    return log_lik_val

def crit_gb2(params, args):
    a, b, p, q = params
    xvals = args
    log_lik_val = log_lik_gb2(xvals, a, b, p, q)
    neg_log_lik_val = -log_lik_val
    return neg_log_lik_val

#Initial Guesses from Part(c)
q=10000
a_init = m2_MLE
b_init = beta2_MLE*q**(1/m2_MLE) 
p_init = alpha2_MLE/m2_MLE 
params3_init = np.array([a_init, b_init,p_init,q])
mle_args = pts

#Log-likelihood
print('Log-likelihood 3: ', log_lik_gb2(pts, a= a_init, b=b_init,p=p_init,q=q))

#MLE-result
results3 = opt.minimize(crit_gb2, params3_init, args=(mle_args), method='L-BFGS-B',bounds=((1e-10, None),(1e-10, None),(1e-10, None),(1e-10, None)))
a_MLE, b_MLE, p_MLE, q_MLE = results3.x
print('a_MLE=', a_MLE, ' b_MLE=', b_MLE, 'p_MLE=', p_MLE, 'q_MLE=',q_MLE)

#Plot
%matplotlib notebook
n=100
weights = np.empty_like(pts)
weights.fill((len(pts[pts<=800])/len(pts)) * n / (800-0) / pts.size)
count, bins, ignored = plt.hist(pts, n, edgecolor='blue',weights=weights, range=(0,800))
plt.title('Health Claims and GB2 Estimation', fontsize=20)
plt.xlabel('Expenditures')
plt.ylabel('Percent of Observation')
plt.xlim([-10, 810])
plt.ylim([0,0.006])
dist_pts = np.linspace(0, 800, 1000)
plt.plot(dist_pts, gb2_pdf(dist_pts, a=a_MLE, b=b_MLE , p=p_MLE,q=q_MLE),linewidth=2, color='y',label='1: GB2 Estimation')
plt.legend(loc='upper right')

results3

In [None]:
#1.e
## Likelihood Ratio Tests
lrgamma = 2 * (log_lik_gb2(pts, a=a_MLE, b=b_MLE , p=p_MLE,q=q_MLE) - log_lik_gamma(pts, a=alpha2_MLE, b=beta2_MLE))
lrgengamma = 2 * (log_lik_gb2(pts, a=a_MLE, b=b_MLE , p=p_MLE,q=q_MLE) - log_lik_gengamma(pts, alpha=alpha2_MLE, beta=beta2_MLE,m=1))
print('Likelihood Ratio for Gamma:', lrgamma)
print('Likelihood Ratio for Generalized Gamma:', lrgengamma)

#1.f
# Probability Calculation
from scipy.integrate import quad
def integrand1(x):
    return gb2_pdf(x, a=a_MLE, b=b_MLE, p=p_MLE, q=q_MLE)
def integrand2(x):
    return gamma_pdf(x, alpha=alpha1_MLE, beta=beta1_MLE)
prob1 = quad(integrand1,0, 1000)
prob2 = quad(integrand2,0, 1000)
print('Probability above $1000 GB2 = ', 1-prob1[0])
print('Probability above $1000 Gamma = ', 1-prob2[0])
#From the results we obtain, it is not less likely to have a health claim over $1,000 with GB2 distribution.
#The probability of having claims over $1,000 increases when we use Gamma distribution.

In [None]:
#1.a_1
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
pts = np.loadtxt('usincmoms.txt')
df= DataFrame(pts, columns=['Percentage','Income'])
df.describe()

In [None]:
#1.a_2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
height_1=np.append(pts[:40,0],[pts[40,0]/10,pts[41,0]/20,0])
#Value of 41st bar is divided by 10.
#Value of 42nd bar is divided by 20
N=len(pts)
plt.figure()
tick_1=np.append(np.array(pts[:40,1]/1000-2.5),[200,250,350])
width_1=np.append(np.linspace(2.9,2.9,40),[2.9*10,2.9*20,0])
ypos_1=np.append(np.linspace(0,123,41),[152,210])
plt.bar(ypos_1,height=height_1,width=width_1,edgecolor='blue',align='edge') 
plt.title('US Income Distribution', fontsize=20)
plt.xlabel('House Hold Income in thousand dollars')
plt.ylabel('Percentage (%)')
plt.xticks(ypos_1,tick_1,fontsize=5.5, rotation=45)
plt.show()

In [None]:
#1.b_1
import scipy.optimize as opt
import scipy.integrate as intgr
import scipy.stats as sts
tick2 = np.append(np.array(pts[:40,1]/1000+2.5),[250,350])
tick3 = np.append(np.array(pts[:40,1]/1000-2.5),[200,250])

def model_moment(mu,sig):
    pb1 = sts.lognorm.cdf(tick2, sig, scale = np.exp(mu))
    pb2 = sts.lognorm.cdf(tick3, sig, scale = np.exp(mu))
    return pb1 - pb2

def err_vec(xvals, mu, sigma):
    data = xvals
    model = model_moment(mu,sigma)
    err_vec = data - model
    return err_vec

def criterion(params, *args):
    mu, sig = params
    xvals, W = args
    err = err_vec(xvals, mu, sig)
    crit_val = err.T @ W @ err
    return crit_val

mu_init = np.log(pts[:,1].mean())
sig_init = np.log(pts[:,1].std())
params_init = np.array([mu_init, sig_init])
W = np.diag(pts[:,0])
gmm_args = (pts[:,1], W)

result = opt.minimize(criterion, params_init, args = (gmm_args), method = 'L-BFGS-B', bounds = ((None,None),(1e-10,None)))
mu_GMM1, sig_GMM1 = result.x


mean_data=pts[:,1].mean()
sig_data = pts[:,1].std()
pdf = model_moment(mu_GMM1, sig_GMM1)
err1 = err_vec(pts[:,1], mu_GMM1, sig_GMM1)

print(result)
print('Mean of points =', mean_data, ', STD of points =', sig_data)
print('Error vector=', err1)
print('mu_GMM1=', mu_GMM1, ' sig_GMM1=', sig_GMM1)

In [None]:
#1.b_2
plt.figure()
aaa=pts[0:42,1]/1000
bbb=np.append(pts[0:40,0]/5,[pts[40,0]/50,pts[41,0]/100])
ccc=np.append(np.linspace(5,5,40),[5*10,5*20])
data = np.append(pts[0:40,1]/1000, [200,250,300,350])
plt.bar(aaa, bbb, width =ccc , align = 'center', edgecolor='blue')
plt.plot(data, sts.lognorm.pdf(data, sig_GMM1, scale = np.exp(mu_GMM1)), color = 'r', linewidth = 2, label = 'LN')
plt.legend(loc = 'upper right')
plt.show()


In [None]:
#1.c_1
def model_moment2(alpha,beta):
    pb1 = sts.gamma.cdf(tick2, alpha, scale = beta)
    pb2 = sts.gamma.cdf(tick3, alpha, scale = beta)
    return pb1-pb2

def err_vec2(xvals, alpha, beta):
    data = xvals
    model = model_moment2(alpha,beta)
    err_vec = data - model
    return err_vec

def criterion2(params, *args):
    alpha, beta = params
    xvals, W = args
    err = err_vec2(xvals, alpha, beta)
    crit_val = err.T @ W @ err
    return crit_val

alpha2_init = 3
beta2_init = 20
params2_init = np.array([alpha2_init, beta2_init])
W = np.diag(pts[:,0])
gmm_args = (pts[:,1], W)

result2 = opt.minimize(criterion2, params2_init, args = (gmm_args), method = 'L-BFGS-B', bounds = ((1e-10,None),(1e-10,None)))
alpha_gmm2, beta_gmm2= result2.x
print(result2)

In [None]:
#1.c_2
plt.figure()
aaa=pts[0:42,1]/1000
bbb=np.append(pts[0:40,0]/5,[pts[40,0]/50,pts[41,0]/100])
ccc=np.append(np.linspace(5,5,40),[5*10,5*20])
data = np.append(pts[0:40,1]/1000, [200,250,300,350])
plt.bar(aaa, bbb, width =ccc , align = 'center', edgecolor='blue')
plt.plot(data, sts.gamma.pdf(data, alpha_gmm2, scale =beta_gmm2), color = 'g', linewidth = 2, label = 'GA')
plt.legend(loc = 'upper right')
plt.show()

In [None]:
#1.d_1
plt.figure()
aaa=pts[0:42,1]/1000
bbb=np.append(pts[0:40,0]/5,[pts[40,0]/50,pts[41,0]/100])
ccc=np.append(np.linspace(5,5,40),[5*10,5*20])
data = np.append(pts[0:40,1]/1000, [200,250,300,350])
plt.bar(aaa, bbb, width =ccc , align = 'center', edgecolor='blue')
plt.plot(data, sts.lognorm.pdf(data, sig_GMM1, scale = np.exp(mu_GMM1)), color = 'r', linewidth = 2, label = 'LN')
plt.plot(data, sts.gamma.pdf(data, alpha_gmm2, scale =beta_gmm2), color = 'g', linewidth = 2, label = 'GA')
plt.legend(loc = 'upper right')
plt.show()

In [None]:
#1.d_2 
obj_val1 = result.fun
obj_val2 = result2.fun
print('Minimized Lognormal Objective =', obj_val1)
print('Minimized Gamma Objective =', obj_val2)
#The most precise way I guess is to compare their objective functions.
#From the graph and the test results shown above, Gamma Distribution fits the data better than Lognormal Distribution.

In [None]:
#1.e_1
#First Step W
params3_init=[alpha_gmm2,beta_gmm2]
WI = np.identity(pts[:,1].shape[0])
gmm2s_args = (pts[:,1], WI)

result3 = opt.minimize(criterion2, params3_init, args = gmm2s_args, method = 'L-BFGS-B', bounds = ((1e-10,None),(1e-10,None)))
alpha_1, beta_1 = result3.x
err_vec3 = np.reshape(err_vec2(pts[:,1], alpha_1, beta_1),(42,1))
omega = (err_vec3 @ err_vec3.T) / pts[:,1].shape[0] 

#Second Step W
W2S = np.linalg.pinv(omega)
gmm2s2_args = (pts[:,1], W2S)
params4_init = np.array([alpha_1, beta_1])
result4 = opt.minimize(criterion2, params4_init, args = gmm2s2_args, method = 'L-BFGS-B', bounds = ((1e-10,None),(1e-10,None)), tol = 1e-10)
alpha_2, beta_2 = result4.x
print('Estimation Result')
print('')
print(result4)

#1.e_2
obj_val3 = result2.fun
obj_val4 = result4.fun
print('Minimized Gamma Objective =', obj_val3)
print('Minimized Gamma with 2-Step Objective =', obj_val4)
#Yes, alpha and beta change quite a bit. From (3.99, 19.90) to (10.37, 23.26).
#I compare the difference by comparing the number of their repective objective functions.



In [None]:
#2. Brock and Mirman(1972) by GMM
pts2 = np.loadtxt('MacroSeries.txt', delimiter = ',')

def z_1(alpha, xvals):
    z = np.log(xvals[:,3]/(alpha*xvals[:,1]**(alpha-1)))
    return z

#Moment Conditions
def data_moment2(alpha, beta, rho, mu, xvals):
    z = z_1(alpha, xvals)
    z_t = z[1:100]
    z_t_1 = z[0:99]
    k_t = xvals[:,1][1:100] 
    c_t = xvals[:,0][1:100]
    c_t_1 = xvals[:,0][0:99]
    w_t_1 = xvals[:,2][0:99]
    m1 = (z_t - rho*z_t_1 - (1-rho)*mu).mean()
    m2 = ((z_t - rho*z_t_1 - (1-rho)*mu)*z_t).mean()
    m3 = (beta*alpha*np.exp(z_t)*k_t**(alpha-1)*c_t/c_t_1 - 1).mean()
    m4 = ((beta*alpha*np.exp(z_t)*k_t**(alpha-1)*c_t/c_t_1 - 1)*w_t_1).mean()    
    return m1, m2, m3, m4

def err_vec2(alpha, beta, rho, mu, xvals):
    m1, m2, m3, m4 = data_moment2(alpha, beta, rho, mu, xvals)
    err_vec = np.array([m1, m2, m3, m4])
    return err_vec.T

def criterion2(params, *args):
    alpha, beta, rho, mu = params
    xvals,W = args
    err = err_vec2(alpha, beta, rho, mu, xvals)
    crit_val = err.T @ W @ err
    return crit_val

#Initial Values
alpha_init2 = 0.30
beta_init2 = 0.85
rho_init2 = 0.85
mu_init2 = 9.5

#Identity Matrix
W = np.eye(4)
params_2 = np.array([alpha_init2, beta_init2, rho_init2, mu_init2])
gmm2_arg= (pts2,W)
result_2 = opt.minimize(criterion2, params_2, args = gmm2_arg, method = 'L-BFGS-B', bounds = ((1e-10, 1-1e-10),(1e-10, 1-1e-10),(-1+1e-10, 1-1e-10),(1e-10, None)))
alpha_gmm2, beta_gmm2, rho_gmm2, mu_gmm2 = result_2.x

#Result
print(result_2)
#Estimated Values
print('alpha_GMM =', alpha_gmm2,'beta_GMM =',beta_gmm2, 'rho_GMM =',rho_gmm2, 'mu_GMM =',mu_gmm2)
#Minimized Criterion Function
print('Minimized Criterion Function =', result_2.fun)