In [6]:
import os

def create_stata_do_files():
    """
    Creates 23 Stata do files for years 2001-2023, each based on the template code
    but with the year 2001 replaced with the corresponding year.
    """
    
    # Base directory path
    base_dir = r"C:\Users\user\OneDrive - Alma Mater Studiorum Università di Bologna\Desktop\LMEC-2nd_year\3rd_period\research_methods\thesis\my_analysis\Code\Analysis\individual outcomes\all_years_race"
    
    # Create directory if it doesn't exist
    os.makedirs(base_dir, exist_ok=True)
    
    # Template code (your original code)
    template_code = '''set more off 
cap log close
clear
set more off
set matsize 800  

global root "C:\\Users\\user\\OneDrive - Alma Mater Studiorum Università di Bologna\\Desktop\\LMEC-2nd_year\\3rd_period\\research_methods\\thesis\\my_analysis"
global code	"${root}/Code"
do "${code}\\my_globals.do"

capture program drop makedum1
program define makedum1
	local i = 0
	while `i' <= 14 {
		gen agearr`i' = age_at_arrival == `i'
	local i = `i' + 1
	}
end

* Define the output Excel file path
putexcel set "C:\\Users\\user\\OneDrive - Alma Mater Studiorum Università di Bologna\\Desktop\\LMEC-2nd_year\\3rd_period\\research_methods\\thesis\\my_analysis\\Output\\Tables\\het_by_race\\individual_race_raw.xlsx", sheet("YEAR_PLACEHOLDER") modify

* Define column positions for each section
local col_NHW = "B"
local col_NHB = "D"
local col_HIS = "F"
local col_ONH = "H"

* Define the outcomes and their row positions
local outcomes "yrssch lnwagely workedly"

local row = 2

use "$output_B_matched_all_years", clear
		
* Loop through the four sections
foreach grp in NHW NHB HIS ONH {
    
    * Load the appropriate dataset
    use "$output_B_matched_all_years", clear
	
	keep if year == YEAR_PLACEHOLDER
    
    * Apply group-specific filtering
    if "`grp'" == "NHW" {
        keep if race_eth == 1
    }
    if "`grp'" == "NHB" {
        keep if race_eth == 2
    }
    if "`grp'" == "HIS" {
        keep if race_eth == 3
    }
    if "`grp'" == "ONH" {
        keep if race_eth == 4
    }
    
    * Ensure valid observations
    keep if eng ~= .
    
    * Generate necessary variables
    makedum1
    quietly tab age, gen(dumage)
    drop dumage1
    gen pwlinear = max(0, age_at_arrival-9)
    gen idvar = pwlinear * nengdom
    quietly tab bpld, gen(dbpld)
    drop dbpld1

    * Select the appropriate column for this group
    if "`grp'" == "NHW" {
        local col_ols = "`col_NHW'"
        local col_iv = "C"
    }
    else if "`grp'" == "NHB" {
        local col_ols = "`col_NHB'"
        local col_iv = "E"
    }
    else if "`grp'" == "HIS" {
        local col_ols = "`col_HIS'"
        local col_iv = "G"
    }
    else if "`grp'" == "ONH" {
        local col_ols = "`col_ONH'"
        local col_iv = "I"
    }
    
    * Loop through outcomes
    foreach outcome in `outcomes' {
        
        * OLS regression
        regress `outcome' eng agearr1-agearr14 dumage* female dbpld* [aw=perwt], cluster(bpld)
        
        local coef_ols = _b[eng]
        local se_ols = _se[eng]
        local r2_ols = e(r2)
        local p_ols = 2 * ttail(e(df_r), abs(_b[eng] / _se[eng]))  // Compute p-value

        * Format coefficient with significance level
        local coef_ols_str = string(`coef_ols', "%9.3f")
        
        if `p_ols' < 0.01 {
            local coef_ols_str = "`coef_ols_str'***"
        }
        else if `p_ols' < 0.05 {
            local coef_ols_str = "`coef_ols_str'**"
        }
        else if `p_ols' < 0.10 {
            local coef_ols_str = "`coef_ols_str'*"
        }

        * Store OLS results
        putexcel `col_ols'`row' = "`coef_ols_str'", overwrite
        putexcel `col_ols'`=`row'+1' = `"`=string(`se_ols', "%9.3f")'"', overwrite
        putexcel `col_ols'`=`row'+2' = `"`=string(`r2_ols', "%9.3f")'"', overwrite

        * IV regression
        ivreg `outcome' (eng = idvar) agearr1-agearr14 dumage* female dbpld* [aw=perwt], cluster(bpld)

        local coef_iv = _b[eng]
        local se_iv = _se[eng]
        local r2_iv = e(r2)
        local p_iv = 2 * ttail(e(df_r), abs(_b[eng] / _se[eng]))  // Compute p-value

        * Format coefficient with significance level
        local coef_iv_str = string(`coef_iv', "%9.3f")
        
        if `p_iv' < 0.01 {
            local coef_iv_str = "`coef_iv_str'***"
        }
        else if `p_iv' < 0.05 {
            local coef_iv_str = "`coef_iv_str'**"
        }
        else if `p_iv' < 0.10 {
            local coef_iv_str = "`coef_iv_str'*"
        }

        * Store IV results
        putexcel `col_iv'`row' = "`coef_iv_str'", overwrite
        putexcel `col_iv'`=`row'+1' = `"`=string(`se_iv', "%9.3f")'"', overwrite
        putexcel `col_iv'`=`row'+2' = `"`=string(`r2_iv', "%9.3f")'"', overwrite

        * Move to the next set of rows
        local row = `row' + 4
    }
    
    * Reset row counter for next section
    local row = 2
}

* Save and close the Excel file
putexcel B1 = "NHW"
putexcel D1 = "NHB"
putexcel F1 = "HIS"
putexcel H1 = "ONH"
putexcel A2 = "yrssch"
putexcel A6 = "lnwagely"
putexcel A10 = "workedly"
putexcel save
'''
    
    # Generate files for years 2001-2023
    years = range(2001, 2024)  # 2001 to 2023 inclusive
    
    for year in years:
        # Replace YEAR_PLACEHOLDER with the actual year
        year_code = template_code.replace("YEAR_PLACEHOLDER", str(year))
        
        # Create filename
        filename = f"individual_{year}.do"
        filepath = os.path.join(base_dir, filename)
        
        # Write the file
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(year_code)
        
        print(f"Created: {filename}")
    
    print(f"\nSuccessfully created {len(years)} Stata do files in:")
    print(f"{base_dir}")
    
    return len(years)

# Run the function
if __name__ == "__main__":
    try:
        num_files = create_stata_do_files()
        print(f"\nTotal files created: {num_files}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

Created: individual_2001.do
Created: individual_2002.do
Created: individual_2003.do
Created: individual_2004.do
Created: individual_2005.do
Created: individual_2006.do
Created: individual_2007.do
Created: individual_2008.do
Created: individual_2009.do
Created: individual_2010.do
Created: individual_2011.do
Created: individual_2012.do
Created: individual_2013.do
Created: individual_2014.do
Created: individual_2015.do
Created: individual_2016.do
Created: individual_2017.do
Created: individual_2018.do
Created: individual_2019.do
Created: individual_2020.do
Created: individual_2021.do
Created: individual_2022.do
Created: individual_2023.do

Successfully created 23 Stata do files in:
C:\Users\user\OneDrive - Alma Mater Studiorum Università di Bologna\Desktop\LMEC-2nd_year\3rd_period\research_methods\thesis\my_analysis\Code\Analysis\individual outcomes\all_years_race

Total files created: 23
