In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as scipy
import math

from numpy.linalg import inv
from scipy.stats import skewnorm

# Question1- Part 3
## a

$ E(y) = \mu   \\
E(y-\mu)^{2} =\sigma^{2} \\
E(y-\mu)^{3}=0 $

Task: Write a DGP that takes as arguments a sample size N and a vector of "true" parameters, and returns a dataset (y,X).
For this model with central moments of a distribution, we are given that:  
First moment is $\mu$, second centered moment is $\sigma^{2}$ and third centered moment is 0. 

In [2]:
def dgp_a(true_beta, N):
       
    # Parameters of interest: mu, sigma 
    # Here, y is not given in a linear regression, so we can draw it from a distribution. 
    # We know that E(y) = mu, and y~N(mu, var). Assume mu=0, var=1; then we can draw y from normal distribution. 
    mu = 0 # first moment 
    var = 2 # second centered moment
    skew = 0 # third centered moment 
    num_obs = 5000

    np.random.seed(123)

    # Generate random variable y with specified mean, variance, and third centered moment
    y = skewnorm.rvs(a=skew, loc=mu, scale=var**0.5, size=(num_obs,1))
    return (y)


## b
$y= \alpha + X\beta + u \\
E(X^{T}u) = Eu = 0 $ 

Please refer to link in LaTeX document under Question 1- Part 3b

## c
We add the 2nd moment condition to part b.  
$\alpha$ + X $\beta$ + u  
E($X^{T}u$) = E$u$=0   
E($u^{2}$) = $\sigma^{2}$

In [3]:
def dgp_c(true_beta, N):
    
    beta = np.array(beta).reshape(-1,1)

    # E(u)=0 
    # E(X^{T}u)=0 
    # E(u^2)=sigma^2  

    mu=0 ## mu= mean of u =0 (given)
    sigma = 5 # assumed
    N=1000
    k=beta.shape[0]

    np.random.seed(123)

    # generate U; which has mean 0 and second moment sigma^2 (ie variance is sigma^2)
    u = iid.norm.rvs(size=(N,1))*sigma # u is a Nx1 matrix 

    # now generate X. WKT E(X^{T}u)=0. X should be Nx|parameters| matrix 
    X= iid.norm.rvs(size=(N,k))
    X = np.c_[np.ones(shape = (N,1)), X] # add intercept

    mean = np.mean(u)
    variance = np.var(u)

    # Calculate y
    y = true_beta[0] + X*(true_beta[1]) + u

    return y,X 


## d
Now we add heteroskedasticity to part c.  
y= $\alpha$ + X $\beta$ + u  
E($X^{T}u$) = Eu = 0  
E($u^{2}$) = $e^{X\sigma}$

In [4]:
def dgp_d(true_beta, N):

    beta = np.array(beta).reshape(-1,1)

    # E(u)=0 
    # E(X^{T}u)=0 
    # E(u^2)=e^(X*sigma) 
        
    mu=0 ## mu= mean of u =0 (given)
    sigma = 5 # assumed
    N=1000
    k=beta.shape[0]

    np.random.seed(123)

    # now generate X. WKT E(X^{T}u)=0. X should be Nx|parameters| matrix 
    X= iid.norm.rvs(size=(N,k))
    X = np.c_[np.ones(shape = (N,1)), X] # add intercept

    # generate U; which has mean 0 and second moment sigma^2 (ie variance is sigma^2)
    exponent = X*sigma
    hetero = np.exp(exponent)
    u = iid.norm.rvs(size=(N,1))*(hetero)  # u is a Nx1 matrix 

    mean = np.mean(u)
    variance = np.var(u)

    # Calculate y
    y = true_beta[0] + X*(true_beta[1]) + u
    
    return y,X 


## e
Linear IV model  
y = $\alpha$ + X$\beta$ + u   
$\mbox{E}(Z^{T}u$) = $\mbox{E}u$ = 0  
$ \mbox{E}(Z^{T} X)$ = Q  

We need to describe processes that generate $(X,Z,u)$.

The following code block defines the important parameters governing
the DGP; this is the &ldquo;TRUTH&rdquo; we&rsquo;re designing tools to reveal.  



In [5]:
# DGP we looked at in class 

beta = 1     # "Coefficient of interest" 
gamma = 1    # Governs effect of u on X
sigma_u = 1  # Note assumption of homoskedasticity 

# Let Z have order ell, and X order 1, with Var([X,Z]|u)=VXZ
# ell = order of Z 
ell = 4

# Arbitrary (but deterministic) choice for VXZ = [VX Cov(X,Z);
#                                                 Cov(Z,X) VZ]
# Pinned down by choice of a matrix A...
A = np.sqrt(1/np.arange(1,(ell+1)**2+1)).reshape((ell+1,ell+1)) 

# Now Var([X,Z]|u) is constructed so guaranteed pos. def.
VXZ = A.T@A 

Q = -VXZ[1:,[0]]  # -EZX', or generally Edgj/db'

truth = (beta,gamma,sigma_u,VXZ)
Omega = (sigma_u**2)*VXZ[1:,1:] # E(Zu)(u'Z')
AVar_b = inv(Q.T@inv(Omega)@Q)


Omega

from scipy.stats import distributions as iid

def dgp(N,beta,gamma,sigma_u,VXZ):
    """Generate a tuple of (y,X,Z).

    Satisfies model:
        y = X@beta + u
        E Z'u = 0
        Var(u) = sigma^2
        Cov(X,u) = gamma*sigma_u^2
        Var([X,Z]|u) = VXZ
        u,X,Z mean zero, Gaussian

    Each element of the tuple is an array of N observations.

    Inputs include
    - beta :: the coefficient of interest
    - gamma :: linear effect of disturbance on X
    - sigma_u :: Variance of disturbance
    - VXZ :: Var([X,Z]|u)
    """
    
    u = iid.norm.rvs(size=(N,1))*sigma_u

    # "Square root" of VXZ via eigendecomposition
    lbda,v = np.linalg.eig(VXZ)
    SXZ = v@np.diag(np.sqrt(lbda))

    # Generate normal random variates [X*,Z]
    XZ = iid.norm.rvs(size=(N,VXZ.shape[0]))@SXZ.T

    # But X is endogenous...
    X = XZ[:,[0]] + gamma*u
    Z = XZ[:,1:]

    # Calculate y
    y = X*beta + u

    return y,X,Z


## 1f

$y=f(X,b) + u $; with $f$ a known scalar function and with $E(Z^{T}u)=Eu=0$ and $E(Z^{T}Xf'(X\beta)=Q(\beta)$

Please refer to 1g as it is the same model as 1f, just more complex as f is a function and in 1f it is a scalar.

## 1g 

$y=f(X,b) + u $; with $f$ a known  function and with $E(Z^{T}u)=Eu=0$ and $E(Z^{T}X(\frac{\partial f}{\partial \beta^{T}}))=Q(\beta)$

For this example, we consider a model we looked at in class, in which f is a known function. 

$$y_i = exp\{X_i\beta\} + e_i$$

$$ E(z_i'e_i) = 0$$
$$g_j(\beta) = E(z_i'e_i) = E(z_i'(y_i - exp\{X_i\beta\})) = 0$$

In [6]:
def dgp(N, true_beta, cov_ze=0):
    # define covariance mat of XEZ:
    cov_XEZZ = np.array(
        [[4, 1, 1, 2],
        [1, 2, cov_ze, cov_ze*0.5],
        [1, cov_ze, 3, 0.2],
        [2, cov_ze*.5, 0.2, 8]])*0.1
    
    XEZZ = iid.multivariate_normal(mean=[.2, 0, 0, 0],
                                   cov=cov_XEZZ).rvs(size = N)
    
    X = XEZZ[:, [0]]
    
    e = XEZZ[:, [1]]
    
    Z = XEZZ[:, 2:]
    
    y = np.exp(true_beta[0] + X*(true_beta[1])) + e
    X = np.c_[np.ones(shape = (N,1)), X] # add intercept
    Z = np.c_[np.ones(shape = (N,1)), Z]
    
    return (y, X, Z)

## 1h 
$ y^{\gamma} = \alpha + u$, with $y$ >0 and $\gamma$ a scalar, and $E(Z^{T}u)=Eu=0$ and $EZ^{T}$ $\begin{pmatrix}
                          \gamma y ^{\gamma-1} \\ 
                          -1
                        \end{pmatrix}$  = $Q(\gamma)$
                        

In [7]:
def dgp_h(true_beta, N):
    
    alpha = 4 
    gamma = 2
    beta = np.array(beta).reshape(-1,1)
    k=beta.shape[0]
    N= 1000
    
    # E(u)=0 
    # E(Z^{T}u)=0 
    u = iid.norm.rvs(size=(N,1))
    Z = iid.norm.rvs(size=(N,k))
    
    y = (alpha + u)^(1/gamma) 
    
    return y,Z
