# Instrumental Variable Analysis of Return to Education
This notebook replicates the OLS and IV (2SLS) analysis using `nearc4` as an instrument.

In [None]:
# Load required package
if(!require(AER)) install.packages('AER')
library(AER)

In [None]:
# 1. Read data and create log wage
data <- read.csv('Education_data.csv')
data$logwage <- log(data$wage)
head(data)

In [None]:
# 2. OLS regression
ols <- lm(logwage ~ educ + exper + I(exper^2), data = data)
summary(ols)

In [None]:
# 3. First-stage regression
first_stage <- lm(educ ~ nearc4 + exper + I(exper^2), data = data)
summary(first_stage)

In [None]:
# 4. Two-stage manual 2SLS
# 4a. Predict educ_hat
data$educ_hat <- predict(first_stage)
# 4b. Second-stage regression
second_stage <- lm(logwage ~ educ_hat + exper + I(exper^2), data = data)
summary(second_stage)

In [None]:
# 5. IV regression using ivreg()
iv_model <- ivreg(logwage ~ educ + exper + I(exper^2) | nearc4 + exper + I(exper^2), data = data)
summary(iv_model, diagnostics = TRUE)

In [None]:
# 6. Compare OLS vs IV estimates
coef_compare <- data.frame(
  Model = c('OLS', '2SLS'),
  educ = c(coef(ols)['educ'], coef(iv_model)['educ'])
)
print(coef_compare)