In [1]:
using CSV, DataFrames

# Uploading CPS data

In [2]:
cps = DataFrame(CSV.File("c:\\data\\Bounds\\cps.csv")) # <-change this to the right directory

## Cleaning zero wage

cps = cps[cps.wage .> 0, :]
first(cps,5)

Unnamed: 0_level_0,age,wage,educ
Unnamed: 0_level_1,Int64,Int64,String
1,22,12000,some college but no degree
2,21,3500,some college but no degree
3,49,30000,some college but no degree
4,31,32000,bachelor's degree
5,42,89630,doctorate degree
6,35,229339,doctorate degree
7,42,39000,high school diploma or equivalent
8,48,50000,high school diploma or equivalent
9,41,37500,some college but no degree
10,41,52000,"associate's degree, occupational/vocational program"


In [4]:
# Numerical value for education

#1.Creating a dictionary

educ_dict = Dict( 
    "grade 11" => 11, 
    "some college but no degree" => 13, 
    "associate's degree, academic program" => 14,
    "grade 10" => 10, 
    "grades 7 or 8" => 8, 
    "grades 1, 2, 3, or 4" => 4, 
    "associate's degree, occupational/vocational program" => 14, 
    "high school diploma or equivalent" => 12, 
    "grade 9" => 9, 
    "none or preschool" => 0, 
    "doctorate degree" => 21,
    "bachelor's degree" => 16, 
    "master's degree" => 14, 
    "grades 5 or 6" => 6, 
    "professional school degree" => 14, 
    "12th grade, no diploma" => 12
    )

Dict{String, Int64} with 16 entries:
  "grade 11"                                            => 11
  "some college but no degree"                          => 13
  "associate's degree, academic program"                => 14
  "none or preschool"                                   => 0
  "doctorate degree"                                    => 21
  "grade 10"                                            => 10
  "bachelor's degree"                                   => 16
  "master's degree"                                     => 14
  "grades 5 or 6"                                       => 6
  "grades 7 or 8"                                       => 8
  "grades 1, 2, 3, or 4"                                => 4
  "associate's degree, occupational/vocational program" => 14
  "professional school degree"                          => 14
  "high school diploma or equivalent"                   => 12
  "grade 9"                                             => 9
  "12th grade, no diploma"            

In [5]:
#2. transforming cps.educ
f(x) = educ_dict[x]
cps.educ_num = f.(cps.educ)

#3. log of wage
cps.log_wage = log.(cps.wage)
first(cps,10)

Unnamed: 0_level_0,age,wage,educ,educ_num,log_wage
Unnamed: 0_level_1,Int64,Int64,String,Int64,Float64
1,22,12000,some college but no degree,13,9.39266
2,21,3500,some college but no degree,13,8.16052
3,49,30000,some college but no degree,13,10.309
4,31,32000,bachelor's degree,16,10.3735
5,42,89630,doctorate degree,21,11.4034
6,35,229339,doctorate degree,21,12.343
7,42,39000,high school diploma or equivalent,12,10.5713
8,48,50000,high school diploma or equivalent,12,10.8198
9,41,37500,some college but no degree,13,10.5321
10,41,52000,"associate's degree, occupational/vocational program",14,10.859


In [6]:
Nobs, = size(cps)

(22715, 5)

# Best Linear Predictor 

In [7]:
using FixedEffectModels

In [8]:
reg(cps, @formula(log_wage ~ educ_num ), Vcov.robust())

                             Linear Model                             
Number of obs:               22715  Degrees of freedom:              1
R2:                          0.092  R2 Adjusted:                 0.092
F-Stat:                    2440.69  p-value:                     0.000
log_wage    | Estimate  Std.Error t value Pr(>|t|) Lower 95% Upper 95%
----------------------------------------------------------------------
educ_num    | 0.124936 0.00252889 49.4034    0.000  0.119979  0.129893
(Intercept) |  8.57109  0.0336583  254.65    0.000   8.50512   8.63707


In [9]:
reg(cps, @formula(log_wage ~ educ_num + age ), Vcov.robust())

                              Linear Model                              
Number of obs:                22715  Degrees of freedom:               2
R2:                           0.209  R2 Adjusted:                  0.209
F-Stat:                     2684.15  p-value:                      0.000
log_wage    |  Estimate   Std.Error t value Pr(>|t|) Lower 95% Upper 95%
------------------------------------------------------------------------
educ_num    |  0.110902   0.0024148  45.926    0.000  0.106169  0.115635
age         | 0.0377451 0.000712846 52.9498    0.000 0.0363478 0.0391423
(Intercept) |   7.41447   0.0394616 187.891    0.000   7.33712   7.49182


# Interval Data

In [13]:
using Statistics

In [37]:
wage_quantiles = quantile(cps[!,:wage],[0.0, 0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0], sorted=false);
wage_quantiles[1] +=-1
wage_quantiles[end] +=1

362303.0

In [40]:
lower(x) = wage_quantiles[sum(wage_quantiles .<=x)]
upper(x) = wage_quantiles[sum(wage_quantiles .<=x)+1]

cps.wage_lower = lower.(cps.wage)
cps.wage_upper = upper.(cps.wage)

cps.logwage_lower = log.(cps.wage_lower)
cps.logwage_upper = log.(cps.wage_upper)

first(cps,5)

Unnamed: 0_level_0,age,wage,educ,educ_num,log_wage,wage_lower,wage_upper
Unnamed: 0_level_1,Int64,Int64,String,Int64,Float64,Float64,Float64
1,22,12000,some college but no degree,13,9.39266,9000.0,15000.0
2,21,3500,some college but no degree,13,8.16052,0.0,9000.0
3,49,30000,some college but no degree,13,10.309,30000.0,36000.0
4,31,32000,bachelor's degree,16,10.3735,30000.0,36000.0
5,42,89630,doctorate degree,21,11.4034,71000.0,362303.0


# Partial Indentification

In [10]:
include("C:\\Users\\The Group Leader\\Notebooks\\Research\\Bounds.jl-1\\src\\setBLP.jl")

Main.setBLP

In [12]:
using Main.setBLP