# Mastering 'Metrics

Chapter 1: Table 1.1

This table compares people with and without health insurance in the 2009 National Health Interview Survey (NHIS).


http://www.masteringmetrics.com/resources/

In [1]:
*** Metrics
*** Table 1.1
*** goal: make table of health outcomes and characteristics by insurance status ***

* by Georg Graetz, August 6, 2013
* modified lightly by Gabriel Kreindler, June 13, 2014
* modified lightly by Jon Petkun, January 2, 2015
* modified lightly by Ryan Hill, Jan 31, 2020

pause on
clear all
set more off
cap log close

In [2]:
// set to directory where NHIS2009_clean.dta is stored
cd "./NHIS/Data/"

/Users/datalab/github/datascience/causal-inference/NHIS/Data


In [3]:
cap log using NHIS2009_hicompare.log, text replace

use NHIS2009_clean, clear

In [4]:
* select non-missings
	keep if marradult==1 & perweight!=0 
		by serial: egen hi_hsb = mean(hi_hsb1)
			keep if hi_hsb!=. & hi!=.
		by serial: egen female = total(fml)
			keep if female==1
			drop female


(50,662 observations deleted)

(207 missing values generated)

(207 observations deleted)


(31 observations deleted)



In [5]:
* Josh's sample selection criteria	
	gen angrist = ( age>=26 & age<=59 & marradult==1 & adltempl>=1 )
		keep if angrist==1
	// drop single-person HHs
	by serial: gen n = _N
		keep if n>1



(9,613 observations deleted)


(1,331 observations deleted)


In [13]:
* Prepare matrix to store results
	matrix results = J(15,6,.)
	matrix rownames results = "Health index" "se" "Nonwhite" "se" "Age" "se" "Education" "se" "Family Size" "se" "Employed" "se" "Family income" "se" "Sample size"
	matrix colnames results = "Husbands: Some HI" "Husbands: No HI" "Husbands: Difference" "Wives: Some HI" "Wives: No HI" "Wives: Difference" 

	matrix list results,format(%8.4f)

	local col = 1
	local row1 = 1
	local row2 = 2






results[15,6]
                Husbands:   Husbands:   Husbands:      Wives:      Wives:
                 Some HI       No HI  Difference     Some HI       No HI
Health index           .           .           .           .           .
          se           .           .           .           .           .
    Nonwhite           .           .           .           .           .
          se           .           .           .           .           .
         Age           .           .           .           .           .
          se           .           .           .           .           .
   Education           .           .           .           .           .
          se           .           .           .           .           .
 Family Size           .           .           .           .           .
          se           .           .           .           .           .
    Employed           .           .           .           .           .
          se           .       

In [14]:
 * Health status by insurance coverage and sex
	forval fem = 0/1 {
	qui sum hlth if hi==1 & fml==`fem' [ aw=perweight ]
		mat results[`row1',`col'] = r(mean)
		mat results[`row2',`col'] = r(sd)
		local ++col
		
	qui sum hlth if hi==0 & fml==`fem' [ aw=perweight ]
		mat results[`row1',`col'] = r(mean)
		mat results[`row2',`col'] = r(sd)
		local ++col

	reg hlth hi if fml==`fem' [ aw=perweight ], robust
		mat results[`row1',`col'] = _b[hi]
		mat results[`row2',`col'] = _se[hi]
		local ++col
	}

		local row1 = `row1' + 2
		local row2 = `row2' + 2


(sum of wgt is 34,118,563)

Linear regression                               Number of obs     =      9,395
                                                F(1, 9393)        =      84.68
                                                Prob > F          =     0.0000
                                                R-squared         =     0.0129
                                                Root MSE          =      .9406

------------------------------------------------------------------------------
             |               Robust
        hlth |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
          hi |   .3132452   .0340396     9.20   0.000     .2465202    .3799702
       _cons |   3.695654   .0316859   116.63   0.000     3.633543    3.757765
------------------------------------------------------------------------------
(sum of wgt is 32,513,646)

Linear regression                    

In [24]:
* Other characteristics by insurance and sex		
	foreach var in nwhite age yedu famsize empl inc {
		
		local col = 1
		forval fem = 0/1 {
		
		* means and SDs
			qui sum `var' if hi==1 & fml==`fem' [ aw=perweight ]
				mat results[`row1',`col'] = r(mean)
				local ++col
			qui sum `var' if hi==0 & fml==`fem' [ aw=perweight ]
				mat results[`row1',`col'] = r(mean)
				local ++col
				
		* mean comparisons 
			reg `var' hi if fml==`fem' [ w=perweight ], robust
				mat results[`row1',`col'] = _b[hi]
				mat results[`row2',`col'] = _se[hi]
				local ++col
						
		}		
		local row1 = `row1' + 2
		local row2 = `row2' + 2
	}

	* Sample sizes
	tab hi if fml == 0 [aw=perweight], matcell(x)
	mat list x
	
	mat results[`row1',2] = x[1,1]
	mat results[`row1',1] = x[2,1]
	
	tab hi if fml == 1 [aw=perweight], matcell(y)
	mat list y
	
	mat results[`row1',5] = y[1,1]
	mat results[`row1',4] = y[2,1]


(analytic weights assumed)
(sum of wgt is 34,118,563)

Linear regression                               Number of obs     =      9,395
                                                F(1, 9393)        =       1.01
                                                Prob > F          =     0.3144
                                                R-squared         =     0.0001
                                                Root MSE          =     .36602

------------------------------------------------------------------------------
             |               Robust
      nwhite |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
          hi |  -.0115948   .0115249    -1.01   0.314     -.034186    .0109965
       _cons |   .1693667   .0106274    15.94   0.000     .1485347    .1901986
------------------------------------------------------------------------------
(analytic weights assumed)
(sum of wgt

             |               Robust
        empl |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
          hi |   .0748071   .0118272     6.33   0.000     .0516233     .097991
       _cons |    .849466   .0112809    75.30   0.000     .8273529    .8715791
------------------------------------------------------------------------------
(analytic weights assumed)
(sum of wgt is 32,513,646)

Linear regression                               Number of obs     =      9,395
                                                F(1, 9393)        =     151.32
                                                Prob > F          =     0.0000
                                                R-squared         =     0.0238
                                                Root MSE          =      .4332

------------------------------------------------------------------------------
             |               Robust
   

In [33]:
* List results
matrix list results, format(%8.2f)


results[15,6]
                Husbands:   Husbands:   Husbands:      Wives:      Wives:
                 Some HI       No HI  Difference     Some HI       No HI
Health index        4.01        3.70        0.31        4.02        3.62
          se        0.93        1.01        0.03        0.92        1.01
    Nonwhite        0.16        0.17       -0.01        0.15        0.17
          se           .           .        0.01           .           .
         Age       43.98       41.26        2.71       42.24       39.62
          se           .           .        0.29           .           .
   Education       14.31       11.56        2.74       14.44       11.80
          se           .           .        0.10           .           .
 Family Size        3.50        3.98       -0.47        3.49        3.93
          se           .           .        0.05           .           .
    Employed        0.92        0.85        0.07        0.77        0.56
          se           .           

In [31]:
* Output results	
putexcel set MM_Table1_1, replace
putexcel A1 = matrix(results), names nformat(number_d2)


Note: file will be replaced when the first putexcel command is issued

file MM_Table1_1.xlsx saved


In [32]:
cap log close


### Compare with R
https://jrnold.github.io/masteringmetrics/national-health-interview-survey.html