## Predatory Inclusion in Non-Profit and For-Profit Online Education
### By Christian Michael Smith, Amber D. Villalobos (equal authors), Laura T. Hamilton, and Charlie Eaton

### Code by Christian Smith

### Download Data for Tables 3 and 5

In [1]:
*Set working directory (NOTE TO USER: user must manually set path)
*global path "/Users/christiansmith/Downloads"
*global path   "[SET USER PATH]"
*cd $path

*Prepare workspace for new code
cap log close
clear all
set more off
set matsize 11000

*Begin log file
log using "OnlinePredation_InstitutionLevel_Replication.log", replace


**************************************************
***************(1) DATA PREPARATION***************
**************************************************



**********(A) PREPARE EACH INDIVIDUAL DATASET**********

	
	
***(I) PREPARE DATA ON ONLINE PREVALENCE***
			
*2012-2018
qui {
forvalues year=2012/2018 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'A_DIST_Data_Stata.zip EF`year'A_DIST_Data_Stata.zip, replace
	unzipfile EF`year'A_DIST_Data_Stata, replace
	quietly insheet using ef`year'a_dist_rv_data_stata.csv, clear
    drop x*
    keep if efdelev==2
    gen year=`year'
    save ef`year'a_dist, replace
		}
	}

*2019-2020 (coded slightly differently in IPEDS)
qui {
forvalues year=2019/2020 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'A_DIST_Data_Stata.zip EF`year'A_DIST_Data_Stata.zip, replace
	unzipfile EF`year'A_DIST_Data_Stata, replace
	quietly insheet using ef`year'a_dist_data_stata.csv, clear
    drop x*
    keep if efdelev==2
    gen year=`year'
    save ef`year'a_dist, replace
		}
	}

		
		
***(II) PREPARE DATA ON INSTITUTIONAL CHARACTERISTICS***
	
qui {
forvalues year=2012/2020 {
	copy https://nces.ed.gov/ipeds/datacenter/data/HD`year'_Data_Stata.zip HD`year'_Data_Stata.zip, replace
	unzipfile HD`year'_Data_Stata, replace
	insheet using hd`year'_data_stata.csv, clear
    tostring ein gentele opeid zip closedat, replace force
    save hd`year'a, replace
		}   
	}


		
***(III) PREPARE DATA ON ENROLLMENT COUNTS***

*2012-2018
	*Total count of undergraduates (by race)
qui {
forvalues year=2012/2018 {
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'A_Data_Stata.zip EF`year'A_Data_Stata.zip, replace
	unzipfile EF`year'A_Data_Stata, replace
	insheet using ef`year'a_rv_data_stata.csv, clear
	keep if efalevel==2
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' u`var'
			}
	gen year=`year'
	save under`year', replace
	*Count of full-time, first-time, degree/cert.-seeking ("traditional") undergraduates (by race)
	insheet using ef`year'a_rv_data_stata.csv, clear
	keep if efalevel==24
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' t`var'
			}
    gen year=`year'
	save trad`year', replace
	*Count of first-time, degree/cert.-seeking undergraduates (by race)
	insheet using ef`year'a_rv_data_stata.csv, clear
	keep if efalevel==4
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' f`var'
			}
    gen year=`year'
	save first`year', replace
		}
	}

*2019-2020 (coded slightly differently in IPEDS)
qui {
forvalues year=2019/2020 {
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'A_Data_Stata.zip EF`year'A_Data_Stata.zip, replace
	unzipfile EF`year'A_Data_Stata, replace
	insheet using ef`year'a_data_stata.csv, clear
	keep if efalevel==2
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' u`var'
			}
	gen year=`year'
	save under`year', replace
	*Count of full-time, first-time, degree/cert.-seeking ("traditional") undergraduates (by race)
	insheet using ef`year'a_data_stata.csv, clear
	keep if efalevel==24
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' t`var'
			}
	gen year=`year'
	save trad`year', replace
	*Count of first-time, degree/cert.-seeking undergraduates (by race)
	insheet using ef`year'a_data_stata.csv, clear
	keep if efalevel==4
	foreach var in efbkaat efaiant efasiat efhispt efwhitt eftotlm eftotlw eftotlt {
        rename `var' f`var'
			}
	gen year=`year'
	save first`year', replace
		}
	}
	
		
			
***(IV) PREPARE DATA ON TOTAL FTES INCLUDING GRAD STUDENTS***

*2012-2018
qui {
forvalues year=2012/2018 {
	copy https://nces.ed.gov/ipeds/datacenter/data/EFIA`year'_Data_Stata.zip EFIA`year'_Data_Stata.zip, replace
	unzipfile EFIA`year'_Data_Stata, replace
	insheet using efia`year'_rv_data_stata.csv, clear
	replace eftegd=0 if eftegd==. & efteug!=. 
	replace efteug=0 if efteug==. & eftegd!=. 
	gen totalfte = eftegd+efteug
	replace totalfte=. if totalfte==0
	gen year=`year'
	save efia`year', replace
		}
*2019 (coded slightly differently in IPEDS)
forvalues year=2019/2019 {
	copy https://nces.ed.gov/ipeds/datacenter/data/EFIA`year'_Data_Stata.zip EFIA`year'_Data_Stata.zip, replace
	unzipfile EFIA`year'_Data_Stata, replace
	insheet using efia`year'_data_stata.csv, clear
	replace eftegd=0 if eftegd==. & efteug!=.
	replace efteug=0 if efteug==. & eftegd!=.
	gen totalfte = eftegd+efteug
	replace totalfte=. if totalfte==0
	gen year=`year'
	save efia`year', replace
		}
	}

	
	
***(V) PREPARE DATA ON RETENTION RATES***

*2012-2017 entering cohorts
qui {
forvalues year=2013/2018 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'D_Data_Stata.zip EF`year'D_Data_Stata.zip, replace
	unzipfile EF`year'D_Data_Stata, replace
	quietly insheet using ef`year'd_rv_data_stata.csv, clear
    drop x*
    gen year=`year'-1
	local yyyy=`year'-1
	drop stufacr
    save ret`yyyy', replace
		}
	}

*2018-2019 entering cohorts (coded slightly differently in IPEDS)
qui {
forvalues year=2019/2020 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'D_Data_Stata.zip EF`year'D_Data_Stata.zip, replace
	unzipfile EF`year'D_Data_Stata, replace
	quietly insheet using ef`year'd_data_stata.csv, clear
    drop x*
    gen year=`year'-1
	local yyyy=`year'-1
    drop stufacr
	save ret`yyyy', replace
		}
	}

	
	
***(VI) PREPARE DATA ON GRADUATION RATES***

*2012 fall entering cohort
qui {
forvalues year=2018/2018 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/GR`year'_Data_Stata.zip GR`year'_Data_Stata.zip, replace
	unzipfile GR`year'_Data_Stata, replace
	quietly insheet using gr`year'_rv_data_stata.csv, clear
    drop x*
    gen year=`year'-6
	local yyyy=`year'-6
	gen gradnum6 = .
	replace gradnum6 = grtotlt if grtype==3
	gen cohortnum = .
	replace cohortnum = grtotlt if grtype==2
	sort unitid gradnum6
	bysort unitid: replace gradnum6=gradnum6[1]
	sort unitid cohortnum
	bysort unitid: replace cohortnum=cohortnum[1]
	duplicates drop unitid, force
	gen gradrate6 = 100*gradnum6/cohortnum
	keep unitid year gradrate6 gradnum6 cohortnum
    save gr`yyyy', replace
		}
	}

    *2013-2014 fall entering cohorts (coded slightly differently in IPEDS)
qui {
forvalues year=2019/2020 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/GR`year'_Data_Stata.zip GR`year'_Data_Stata.zip, replace
	unzipfile GR`year'_Data_Stata, replace
	quietly insheet using gr`year'_data_stata.csv, clear
    drop x*
    gen year=`year'-6
	local yyyy=`year'-6
	gen gradnum6 = .
	replace gradnum6 = grtotlt if grtype==3
	gen cohortnum = .
	replace cohortnum = grtotlt if grtype==2
	sort unitid gradnum6
	bysort unitid: replace gradnum6=gradnum6[1]
	sort unitid cohortnum
	bysort unitid: replace cohortnum=cohortnum[1]
	duplicates drop unitid, force
	gen gradrate6 = 100*gradnum6/cohortnum
	keep unitid year gradrate6 gradnum6 cohortnum
	save gr`yyyy', replace
		}
	}

	

***(VII) PREPARE DATA ON FINANCIAL AID***

*2012-2017
qui {
    forvalues year=12/17 {
        local yy = `year'+1
        local yyyy = 2000+`yy'
		local yyyy_fall = 2000+`yy'-1
  copy https://nces.ed.gov/ipeds/datacenter/data/SFA`year'`yy'_Data_Stata.zip SFA`year'`yy'_Data_Stata.zip, replace
unzipfile SFA`year'`yy'_Data_Stata, replace
    insheet using sfa`year'`yy'_rv_data_stata.csv, clear
        gen year=`yyyy_fall'
        drop x*
save sfa`yyyy_fall', replace
    }
}

*2018-2019 (coded slightly differently in IPEDS)
qui {
forvalues year=18/19 {
	local yy = `year'+1
	local yyyy = 2000+`yy'
	local yyyy_fall = 2000+`yy'-1
	copy https://nces.ed.gov/ipeds/datacenter/data/SFA`year'`yy'_Data_Stata.zip SFA`year'`yy'_Data_Stata.zip, replace
	unzipfile SFA`year'`yy'_Data_Stata, replace
	insheet using sfa`year'`yy'_data_stata.csv, clear
	gen year=`yyyy_fall'
	drop x*
	save sfa`yyyy_fall', replace
		}
	}

	
	
***(VIII) PREPARE DATA ON ACCEPTANCE RATES***

*2012-2013
qui {
forvalues year=2012/2013 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/IC`year'_Data_Stata.zip IC`year'_Data_Stata.zip, replace
	unzipfile IC`year'_Data_Stata, replace
	quietly insheet using ic`year'_rv_data_stata.csv, clear
	drop x*
	gen year=`year'
	save adm`year', replace
		}
	}

*2014-2018 (coded slightly differently in IPEDS)
qui {
forvalues year=2014/2018 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/ADM`year'_Data_Stata.zip ADM`year'_Data_Stata.zip, replace
	unzipfile ADM`year'_Data_Stata, replace
	quietly insheet using adm`year'_rv_data_stata.csv, clear
	drop x*
	gen year=`year'
	save adm`year', replace
		}
	}

*2019-2020 (coded slightly differently in IPEDS)
qui {
forvalues year=2019/2020 {  
	copy https://nces.ed.gov/ipeds/datacenter/data/ADM`year'_Data_Stata.zip ADM`year'_Data_Stata.zip, replace
	unzipfile ADM`year'_Data_Stata, replace
	quietly insheet using adm`year'_data_stata.csv, clear
	drop x*
	gen year=`year'
	save adm`year', replace
		}
	}

		