Back to **[Fan](https://fanwangecon.github.io/)**'s R4Econ Homepage **[Table of Content](https://fanwangecon.github.io/R4Econ/)**

# IV (OR OLS) Regression

IV regression using AER package. Option to store all results in dataframe row for combining results from other estimations together. 

Produce Row Statistics

## Program

In [41]:
# IV regression function
# The code below uses the AER library's regresison function
# All results are stored in a single row as data_frame
# This functoin could work with dplyr do
# var.y is single outcome, vars.x, vars.c and vars.z are vectors of endogenous variables, controls and instruments.
regf.iv <- function(var.y, vars.x, vars.c, vars.z, df, transpose=TRUE) {
    
#     print(length(vars.z))
    
    # A. Set-Up Equation
    str.vars.x <- paste(vars.x, collapse='+')
    str.vars.c <- paste(vars.c, collapse='+')
    
    if (length(vars.z) >= 1) {
        #     library(AER)
            str.vars.z <- paste(vars.z, collapse='+')
            equa.iv <- paste(var.y,
                             paste(paste(str.vars.x, str.vars.c, sep='+'),
                                   paste(str.vars.z, str.vars.c, sep='+'),
                                   sep='|'),
                             sep='~')
        #     print(equa.iv)
        
        # B. IV Regression
        ivreg.summ <- summary(ivreg(as.formula(equa.iv), data=df),
                              vcov = sandwich, df = Inf, diagnostics = TRUE)

        # C. Statistics from IV Regression
    #     ivreg.summ$coef
    #     ivreg.summ$diagnostics

        # D. Combine Regression Results into a Matrix
        df.results <- suppressMessages(as_tibble(ivreg.summ$coef, rownames='rownames') %>%
            full_join(as_tibble(ivreg.summ$diagnostics, rownames='rownames')) %>%
            full_join(tibble(rownames=c('vars'),
                             var.y=var.y,
                             vars.x=str.vars.x,
                             vars.z=str.vars.z,
                             vars.c=str.vars.c)))
    } else {
        
        # OLS regression
        equa.ols <- paste(var.y, 
                          paste(paste(vars.x, collapse='+'), 
                                paste(vars.c, collapse='+'), sep='+'),
                          sep='~')
        
        lmreg.summ <- summary(lm(as.formula(equa.ols), data=df))
        
        lm.diagnostics <- as_tibble(list(df1=lmreg.summ$df[[1]],
                                         df2=lmreg.summ$df[[2]],
                                         df3=lmreg.summ$df[[3]],
                                         sigma=lmreg.summ$sigma,
                                         r.squared=lmreg.summ$r.squared,
                                         adj.r.squared=lmreg.summ$adj.r.squared)) %>% 
                                         gather(variable, value) %>% 
                                         rename(rownames = variable) %>%
                                         rename(v = value)
        
        df.results <- suppressMessages(as_tibble(lmreg.summ$coef, rownames='rownames') %>%
            full_join(lm.diagnostics) %>%
            full_join(tibble(rownames=c('vars'),
                             var.y=var.y,
                             vars.x=str.vars.x,
                             vars.c=str.vars.c)))
    }
    
    # E. Flatten Matrix, All IV results as a single tibble row to be combined with other IV results
    df.row.results <- df.results %>%
        gather(variable, value, -rownames) %>%
        drop_na() %>%
        unite(esti.val, rownames, variable) %>%
        mutate(esti.val = gsub(' ', '', esti.val))
    
    if (transpose) {
      df.row.results <- df.row.results %>% spread(esti.val, value)
    }

    # F. Return
    return(data.frame(df.row.results))
}

## Load Data

In [42]:
# Library
library(tidyverse)
library(AER)

# Load Sample Data
setwd('C:/Users/fan/R4Econ/_data/')
df <- read_csv('height_weight.csv')

# Setting
options(repr.matrix.max.rows=50, repr.matrix.max.cols=50)

Parsed with column specification:
cols(
  S.country = col_character(),
  vil.id = col_double(),
  indi.id = col_double(),
  sex = col_character(),
  svymthRound = col_double(),
  momEdu = col_double(),
  wealthIdx = col_double(),
  hgt = col_double(),
  wgt = col_double(),
  hgt0 = col_double(),
  wgt0 = col_double(),
  prot = col_double(),
  cal = col_double(),
  p.A.prot = col_double(),
  p.A.nProt = col_double()
)


## Example No Instrument, OLS

In [43]:
# One Instrucments
var.y <- c('hgt')
vars.x <- c('prot')
vars.z <- c('')
vars.c <- c('sex', 'hgt0', 'wgt0')
# Regression
regf.iv(var.y, vars.x, vars.c, vars.z, df, transpose=FALSE)

esti.val,value
(Intercept)_Estimate,52.1186286658651
prot_Estimate,0.374472386357917
sexMale_Estimate,0.611043720578292
hgt0_Estimate,0.148513781160842
wgt0_Estimate,0.00150560230505631
(Intercept)_Std.Error,1.57770483608693
prot_Std.Error,0.00418121191133815
sexMale_Std.Error,0.118396259120659
hgt0_Std.Error,0.0393807494783186
wgt0_Std.Error,0.000187123663624397


## Example 1 Insturment

In [44]:
# One Instrucments
var.y <- c('hgt')
vars.x <- c('prot')
vars.z <- c('momEdu')
vars.c <- c('sex', 'hgt0', 'wgt0')
# Regression
regf.iv(var.y, vars.x, vars.c, vars.z, df, transpose=FALSE)

esti.val,value
(Intercept)_Estimate,52.1186286658651
prot_Estimate,0.374472386357917
sexMale_Estimate,0.611043720578292
hgt0_Estimate,0.148513781160842
wgt0_Estimate,0.00150560230505631
(Intercept)_Std.Error,1.57770483608693
prot_Std.Error,0.00418121191133815
sexMale_Std.Error,0.118396259120659
hgt0_Std.Error,0.0393807494783186
wgt0_Std.Error,0.000187123663624397


## Example Multiple Instrucments

In [45]:
# Multiple Instrucments
var.y <- c('hgt')
vars.x <- c('prot')
vars.z <- c('momEdu', 'wealthIdx', 'p.A.prot', 'p.A.nProt')
vars.c <- c('sex', 'hgt0', 'wgt0')
# Regression
regf.iv(var.y, vars.x, vars.c, vars.z, df, transpose=FALSE)

esti.val,value
(Intercept)_Estimate,42.2437613555242
prot_Estimate,0.26699945194704
sexMale_Estimate,0.695548488812932
hgt0_Estimate,0.424954881263031
wgt0_Estimate,0.000486951420329484
(Intercept)_Std.Error,1.85356686789642
prot_Std.Error,0.0154939347964083
sexMale_Std.Error,0.133157977814374
hgt0_Std.Error,0.0463195803786233
wgt0_Std.Error,0.000224867994873235


## Example Multiple Endogenous Variables

In [46]:
# Multiple Instrucments
var.y <- c('hgt')
vars.x <- c('prot', 'cal')
vars.z <- c('momEdu', 'wealthIdx', 'p.A.prot', 'p.A.nProt')
vars.c <- c('sex', 'hgt0', 'wgt0')
# Regression
regf.iv(var.y, vars.x, vars.c, vars.z, df, transpose=FALSE)

esti.val,value
(Intercept)_Estimate,44.0243196254297
prot_Estimate,-1.4025623247106
cal_Estimate,0.065104895750151
sexMale_Estimate,0.120832787571818
hgt0_Estimate,0.286525437984517
wgt0_Estimate,0.000850481389651033
(Intercept)_Std.Error,2.75354847244082
prot_Std.Error,0.198640060273635
cal_Std.Error,0.00758881298880996
sexMale_Std.Error,0.209984580636303


## Examples Line by Line

The examples are just to test the code with different types of variables.

In [47]:
# Selecting Variables
var.y <- c('hgt')
vars.x <- c('prot', 'cal')
vars.z <- c('momEdu', 'wealthIdx', 'p.A.prot', 'p.A.nProt')
vars.c <- c('sex', 'hgt0', 'wgt0')

In [48]:
# A. create Equation
str.vars.x <- paste(vars.x, collapse='+')
str.vars.c <- paste(vars.c, collapse='+')
str.vars.z <- paste(vars.z, collapse='+')
print(str.vars.x)
print(str.vars.c)
print(str.vars.z)
equa.iv <- paste(var.y,
                 paste(paste(str.vars.x, str.vars.c, sep='+'),
                       paste(str.vars.z, str.vars.c, sep='+'),
                       sep='|'),
                 sep='~')
print(equa.iv)

# B. regression
res.ivreg <- ivreg(as.formula(equa.iv), data=df)
coef(res.ivreg)

[1] "prot+cal"
[1] "sex+hgt0+wgt0"
[1] "momEdu+wealthIdx+p.A.prot+p.A.nProt"
[1] "hgt~prot+cal+sex+hgt0+wgt0|momEdu+wealthIdx+p.A.prot+p.A.nProt+sex+hgt0+wgt0"


In [49]:
# C. Regression Summary
ivreg.summ <- summary(res.ivreg, vcov = sandwich, df = Inf, diagnostics = TRUE)

ivreg.summ$coef
ivreg.summ$diagnostics

Unnamed: 0,Estimate,Std. Error,z value,Pr(>|z|)
(Intercept),44.0243196254,2.7535484724,15.9882131,1.543966e-57
prot,-1.4025623247,0.1986400603,-7.0608231,1.655192e-12
cal,0.0651048958,0.007588813,8.5790618,9.565006e-18
sexMale,0.1208327876,0.2099845806,0.5754365,0.5649961
hgt0,0.286525438,0.0707828183,4.0479518,5.166778e-05
wgt0,0.0008504814,0.0003371121,2.5228444,0.01164099


Unnamed: 0,df1,df2,statistic,p-value
Weak instruments (prot),4,14914.0,274.14708,8.61732e-228
Weak instruments (cal),4,14914.0,315.03685,1.1891859999999999e-260
Wu-Hausman,2,14914.0,94.70201,1.3502409999999999e-41
Sargan,2,,122.08198,3.091968e-27


In [50]:
# D. Combine Regression Results into a Matrix
df.results <- suppressMessages(as_tibble(ivreg.summ$coef, rownames='rownames') %>%
    full_join(as_tibble(ivreg.summ$diagnostics, rownames='rownames')) %>%
    full_join(tibble(rownames=c('vars'),
                     var.y=var.y,
                     vars.x=str.vars.x,
                     vars.z=str.vars.z,
                     vars.c=str.vars.c)))
# E. Flatten Matrix, All IV results as a single tibble row to be combined with other IV results
df.row.results <- df.results %>%
    gather(variable, value, -rownames) %>%
    drop_na() %>%
    unite(esti.val, rownames, variable) %>%
    mutate(esti.val = gsub(' ', '', esti.val))

In [51]:
# F. Results as Single Colum
df.row.results

esti.val,value
(Intercept)_Estimate,44.0243196254297
prot_Estimate,-1.4025623247106
cal_Estimate,0.065104895750151
sexMale_Estimate,0.120832787571818
hgt0_Estimate,0.286525437984517
wgt0_Estimate,0.000850481389651033
(Intercept)_Std.Error,2.75354847244082
prot_Std.Error,0.198640060273635
cal_Std.Error,0.00758881298880996
sexMale_Std.Error,0.209984580636303


In [52]:
# G. Results as Single Row
df.row.results

esti.val,value
(Intercept)_Estimate,44.0243196254297
prot_Estimate,-1.4025623247106
cal_Estimate,0.065104895750151
sexMale_Estimate,0.120832787571818
hgt0_Estimate,0.286525437984517
wgt0_Estimate,0.000850481389651033
(Intercept)_Std.Error,2.75354847244082
prot_Std.Error,0.198640060273635
cal_Std.Error,0.00758881298880996
sexMale_Std.Error,0.209984580636303


In [53]:
df.row.results %>% spread(esti.val, value)

(Intercept)_Estimate,(Intercept)_Pr(>|z|),(Intercept)_Std.Error,(Intercept)_zvalue,cal_Estimate,cal_Pr(>|z|),cal_Std.Error,cal_zvalue,hgt0_Estimate,hgt0_Pr(>|z|),hgt0_Std.Error,hgt0_zvalue,prot_Estimate,prot_Pr(>|z|),prot_Std.Error,prot_zvalue,Sargan_df1,Sargan_p-value,Sargan_statistic,sexMale_Estimate,sexMale_Pr(>|z|),sexMale_Std.Error,sexMale_zvalue,vars_var.y,vars_vars.c,vars_vars.x,vars_vars.z,Weakinstruments(cal)_df1,Weakinstruments(cal)_df2,Weakinstruments(cal)_p-value,Weakinstruments(cal)_statistic,Weakinstruments(prot)_df1,Weakinstruments(prot)_df2,Weakinstruments(prot)_p-value,Weakinstruments(prot)_statistic,wgt0_Estimate,wgt0_Pr(>|z|),wgt0_Std.Error,wgt0_zvalue,Wu-Hausman_df1,Wu-Hausman_df2,Wu-Hausman_p-value,Wu-Hausman_statistic
44.0243196254297,1.5439659812685398e-57,2.75354847244082,15.9882130516502,0.065104895750151,9.565006482031869e-18,0.0075888129888099,8.57906181719737,0.286525437984517,5.16677787108928e-05,0.0707828182888255,4.04795181812859,-1.4025623247106,1.65519210848649e-12,0.198640060273635,-7.06082309267581,2,3.09196773720398e-27,122.081979628898,0.120832787571818,0.564996139463599,0.209984580636303,0.575436478267434,hgt,sex+hgt0+wgt0,prot+cal,momEdu+wealthIdx+p.A.prot+p.A.nProt,4,14914,1.1891864122086601e-260,315.036848606231,4,14914,8.617319562333659e-228,274.147084958343,0.000850481389651,0.0116409892837831,0.0003371121044442,2.52284441418383,2,14914,1.35024050408262e-41,94.7020085425169
