In [10]:
clear all

tempfile PWT_o PWT_d

* Prepare PWT (origin)
use pwt.dta, clear
keep iso3 year cgdp pop
rename (iso3 cgdp pop) (iso_o gdp_o pop_o)
save `PWT_o'

* Prepare PWT (destination)
use pwt.dta, clear
keep iso3 year cgdp pop
rename (iso3 cgdp pop) (iso_d gdp_d pop_d)
save `PWT_d'

* Start from DoTS flows
use dots1960_2005.dta, clear
rename flow_dots trade

* Merge economic masses
merge m:1 iso_o year using `PWT_o', keep(match) nogen
merge m:1 iso_d year using `PWT_d', keep(match) nogen

* Merge policy dummies (time-varying)
merge m:1 iso_o iso_d year using RTA.dta, nogen
merge m:1 iso_o iso_d year using CU.dta, nogen
rename comcur cu

* Replace missing policy dummies with 0
foreach v in rta cu {
    capture confirm variable `v'
    if !_rc replace `v' = 0 if missing(`v')
}

* Merge bilateral geography (time-invariant)
merge m:1 iso_o iso_d using dist_cepii.dta, nogen

*--------------------------------------------------------------*
* 2) Core transforms and housekeeping
*--------------------------------------------------------------*
* Logs for OLS (keep positives for lnTrade; PPML uses levels)
gen ln_trade = ln(trade) if trade>0
gen ln_gdp_o = ln(gdp_o)
gen ln_gdp_d = ln(gdp_d)
gen ln_dist  = ln(dist)     // you can switch to ln(distwces) if preferred
* Optional: weighted distance (often preferred in CEPII)
* gen ln_distw = ln(distwces)

* Binary indicators should be byte
foreach b in rta cu contig comlang_off colony curcol {
    capture confirm variable `b'
    if !_rc recast byte `b'
}

* Dyad id for clustering & pair FE
egen long pair_id = group(iso_o iso_d)

* Keep a clean estimation frame (log-OLS drops zeros by construction)
preserve
keep if !missing(ln_trade, ln_gdp_o, ln_gdp_d, ln_dist)
tempfile GRAV_LOG
save `GRAV_LOG'

restore
tempfile GRAV_ALL
save `GRAV_ALL'

*--------------------------------------------------------------*
* 3) Descriptives: shares of RTA/CU pairs and trade over time
*    (Question d in your notebook)
*--------------------------------------------------------------*
preserve
use `GRAV_ALL', clear

* Shares by year: pair shares and trade-weighted shares
bys year: egen double share_cu_pairs  = mean(cu)
bys year: egen double share_rta_pairs = mean(rta)

gen double tr_all = trade
gen double tr_cu  = trade if cu==1
gen double tr_rta = trade if rta==1
bys year: egen double trade_total = total(tr_all)
bys year: egen double trade_cu    = total(tr_cu)
bys year: egen double trade_rta   = total(tr_rta)

gen double share_cu_trade  = trade_cu  / trade_total
gen double share_rta_trade = trade_rta / trade_total

collapse (mean) share_cu_pairs share_rta_pairs share_cu_trade share_rta_trade, by(year)


* Plot (optional)
twoway ///
 (line share_cu_pairs  year, lwidth(medthick)) ///
 (line share_rta_pairs year, lwidth(medthick)), ///
 title("RTA & CU: Share of Pairs") ytitle("Share") xtitle("Year") ///
 legend(order(1 "CU (pairs)" 2 "RTA (pairs)"))

twoway ///
 (line share_cu_trade  year, lwidth(medthick)) ///
 (line share_rta_trade year, lwidth(medthick)), ///
 title("RTA & CU: Trade-weighted Shares") ytitle("Share") xtitle("Year") ///
 legend(order(1 "CU (% of trade)" 2 "RTA (% of trade)"))
restore

*--------------------------------------------------------------*
* 4) Gravity OLS with year FE (Questions a–c–e)
*    – match your feols() sequence
*    – cluster at the dyad level (preferred by assignment)
*--------------------------------------------------------------*
use `GRAV_LOG', clear

eststo clear
* (a) Traditional gravity
reghdfe ln_trade ln_gdp_o ln_gdp_d ln_dist i.year, vce(cluster pair_id)
eststo reg_a

* (b) + CU + RTA
reghdfe ln_trade ln_gdp_o ln_gdp_d ln_dist cu rta i.year, vce(cluster pair_id)
eststo reg_b

* (c) + historical ties (language, colony)
reghdfe ln_trade ln_gdp_o ln_gdp_d ln_dist cu rta comlang_off colony i.year, vce(cluster pair_id)
eststo reg_c

* (e) Add exporter & importer FE (+ year FE)
reghdfe ln_trade ln_gdp_o ln_gdp_d ln_dist cu rta comlang_off colony, ///
    absorb(iso_o iso_d year) vce(cluster pair_id)
eststo reg_e

esttab reg_a reg_b reg_c reg_e, se star(* 0.10 ** 0.05 *** 0.01) ///
      stats(N r2, fmt(%9.0g %9.3f) labels("Obs" "R2")) ///
      title("Log-Linear OLS with Year/Country FE (clustered by pair)") ///
      label replace

*--------------------------------------------------------------*
* 5) Year-by-year regressions with exporter & importer FE (Question h)
*--------------------------------------------------------------*
use `GRAV_LOG', clear
levelsof year, local(Ys)

tempname memhold
postfile `memhold' int year double coef_cu se_cu using _yearly_cu, replace

foreach y of local Ys {
    quietly reghdfe ln_trade ln_gdp_o ln_gdp_d ln_dist cu rta if year==`y', ///
        absorb(iso_o iso_d) vce(cluster pair_id)
    matrix b = e(b)
    matrix V = e(V)
    local bcu = b[1,"cu"]
    local scu = sqrt(V[colnumb(colnames(V),"cu"),colnumb(colnames(V),"cu")])
    post `memhold' (`y') (`bcu') (`scu')
}
postclose `memhold'

use _yearly_cu, clear
label var coef_cu "CU coef"
label var se_cu   "SE(CU)"
* (Optional) plot with ci bands; or export table
* twoway (rcap coef_cu-1.96*se_cu coef_cu+1.96*se_cu year) (scatter coef_cu year), ...

*--------------------------------------------------------------*
* 6) Country-pair FE (Question i(1))
*--------------------------------------------------------------*
use `GRAV_LOG', clear
reghdfe ln_trade cu rta ln_gdp_o ln_gdp_d i.year, ///
    absorb(iso_o iso_d iso_o#iso_d) vce(cluster pair_id)
eststo reg_pairfe

*--------------------------------------------------------------*
* 7) Structural gravity with high-dimensional FE (Question j)
*    (1) importer×year and exporter×year FE
*    (2) + pair FE
*--------------------------------------------------------------*
use `GRAV_LOG', clear

* (1) Structural FE (iso_o#year, iso_d#year) – time-invariant costs still identified
reghdfe ln_trade cu rta ln_dist comlang_off colony, ///
    absorb(iso_o#year iso_d#year) vce(cluster pair_id)
eststo reg_j1

* (2) Structural FE + pair FE – time-invariant bilateral costs are absorbed
reghdfe ln_trade cu rta, ///
    absorb(iso_o#year iso_d#year iso_o#iso_d) vce(cluster pair_id)
eststo reg_j2

esttab reg_j1 reg_j2, se star(* 0.10 ** 0.05 *** 0.01) ///
      stats(N r2, fmt(%9.0g %9.3f) labels("Obs" "R2")) ///
      title("Structural Gravity (HDFE) with Pair FE") replace

*--------------------------------------------------------------*
* 8) Small-country heterogeneity (Question k)
*    Replicates your: small_o, small_d, both_small, and interactions
*--------------------------------------------------------------*
* Use overall median of cgdp (like notebook)
use pwt.dta, clear
summ cgdp if !missing(cgdp), meanonly
scalar gdp_med = r(p50)

use `GRAV_LOG', clear
gen byte small_o = (gdp_o < gdp_med)
gen byte small_d = (gdp_d < gdp_med)
gen byte both_small = small_o & small_d

gen cu_small_o  = cu*small_o
gen cu_small_d  = cu*small_d
gen cu_both_small = cu*both_small

eststo clear
* Separate small_o & small_d interactions
reghdfe ln_trade cu small_o small_d cu_small_o cu_small_d ln_gdp_o ln_gdp_d ln_dist rta i.year, vce(cluster pair_id)
eststo reg_k1

* Both-small interaction + controls
reghdfe ln_trade cu both_small cu_both_small ln_gdp_o ln_gdp_d ln_dist rta i.year, vce(cluster pair_id)
eststo reg_k2

* Add exporter/importer FE
reghdfe ln_trade cu both_small cu_both_small, absorb(iso_o iso_d year) vce(cluster pair_id)
eststo reg_k3

esttab reg_k1 reg_k2 reg_k3, se star(* 0.10 ** 0.05 *** 0.01) ///
      title("Do small countries benefit more from CU?") replace

*--------------------------------------------------------------*
* 9) Problems with log-linearization & PPML (Question l)
*    Preferred: PPML with iso_o#year, iso_d#year, and pair FE
*--------------------------------------------------------------*
use `GRAV_ALL', clear
replace trade = 0 if missing(trade)

ppmlhdfe trade cu rta, absorb(iso_o#year iso_d#year iso_o#iso_d) ///
    vce(cluster pair_id) d difficult
eststo reg_ppml

esttab reg_ppml, se star(* 0.10 ** 0.05 *** 0.01) ///
      title("PPML-HDFE (exporter-year, importer-year, and pair FE)") replace

*--------------------------------------------------------------*
* 10) Euro effect (Question m) – DID-style indicator
*     both_euro × post_1999, with HDFE; show OLS and PPML variants
*--------------------------------------------------------------*
use `GRAV_ALL', clear
gen byte euro_o = inlist(iso_o,"AUT","BEL","DEU","ESP","FIN","FRA","IRL","ITA","LUX","NLD") ///
               | inlist(iso_o,"PRT","GRC")
gen byte euro_d = inlist(iso_d,"AUT","BEL","DEU","ESP","FIN","FRA","IRL","ITA","LUX","NLD") ///
               | inlist(iso_d,"PRT","GRC")
gen byte both_euro = euro_o & euro_d
gen byte post_1999 = year>=1999
gen byte euro_cu   = both_euro*post_1999

* OLS with year FE
preserve
keep if trade>0
gen ln_trade = ln(trade)
reghdfe ln_trade euro_cu ln_gdp_o ln_gdp_d ln_dist rta cu i.year, vce(cluster pair_id)
eststo euro1

* OLS with exporter/importer + year FE
reghdfe ln_trade euro_cu rta cu, absorb(iso_o iso_d year) vce(cluster pair_id)
eststo euro2
restore

* HDFE log-OLS with full FE (time-invariant dyad costs absorbed)
preserve
keep if trade>0
gen ln_trade = ln(trade)
reghdfe ln_trade euro_cu rta cu, absorb(iso_o#year iso_d#year iso_o#iso_d) vce(cluster pair_id)
eststo euro3
restore

* PPML-HDFE (preferred)
ppmlhdfe trade euro_cu rta cu, absorb(iso_o#year iso_d#year iso_o#iso_d) ///
    vce(cluster pair_id) d difficult
eststo euro4

esttab euro1 euro2 euro3 euro4, se star(* 0.10 ** 0.05 *** 0.01) ///
      title("Euro effect: OLS vs HDFE vs PPML-HDFE") replace

*--------------------------------------------------------------*
* 11) (Optional) Export LaTeX tables
*     esttab using "tableX.tex", replace label booktabs fragment nolines
*--------------------------------------------------------------*
* esttab reg_a reg_b reg_c reg_e using "table1_gravity.tex", replace label booktabs fragment
* esttab reg_j1 reg_j2              using "table2_structural.tex", replace label booktabs fragment
* esttab reg_ppml                   using "table3_ppml.tex",      replace label booktabs fragment
* esttab euro1 euro2 euro3 euro4    using "table4_euro.tex",      replace label booktabs fragment

di as txt "Done."

(Note: Below code run with echo to enable preserve/restore functionality.)

. clear all

. tempfile PWT_o PWT_d

. use pwt.dta, clear

. keep iso3 year cgdp pop

. rename (iso3 cgdp pop) (iso_o gdp_o pop_o)

. save `PWT_o'
file C:\Users\purga\AppData\Local\Temp\ST_4330_00000l.tmp saved as .dta
    format

. use pwt.dta, clear

. keep iso3 year cgdp pop

. rename (iso3 cgdp pop) (iso_d gdp_d pop_d)

. save `PWT_d'
file C:\Users\purga\AppData\Local\Temp\ST_4330_00000m.tmp saved as .dta
    format

. use dots1960_2005.dta, clear

. rename flow_dots trade

. merge m:1 iso_o year using `PWT_o', keep(match) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                           325,520  
    -----------------------------------------

. merge m:1 iso_d year using `PWT_d', keep(match) nogen

    Result                      Number of obs
    -----------------------------------------
  