In [1]:
# install packages if necissary 
# ] add CSV
# ] add DataFrames
# ] add Optim

In [2]:
using CSV
using DataFrames
using WGLMakie
using Optim

In [3]:
alldata = CSV.read("data.csv", DataFrame);

In [4]:
size(alldata)

(20, 6)

In [5]:
data = alldata[5:end, :];

In [6]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)
f

In [7]:
b_eye = 50.
m_eye = 2.

2.0

In [8]:
xx = LinRange(50, 250, 20)

20-element LinRange{Float64, Int64}:
 50.0,60.5263,71.0526,81.5789,92.1053,…,207.895,218.421,228.947,239.474,250.0

In [9]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)

yy_eye = xx .* m_eye .+ b_eye
lines!(xx, yy_eye)

f

In [10]:
function objective(parameters)
    b = parameters[1]
    m = parameters[2]
    @show b, m
    return abs(b - 3.4) + abs(m - 17)
end

objective (generic function with 1 method)

In [11]:
function objective_2(parameters)
    b = parameters[1]
    m = parameters[2]

    y_pred = b .+ m .* data.x
    
    return sum(abs.(data.y .- y_pred))
end

objective_2 (generic function with 1 method)

In [12]:
starting_params = [b_eye + 0., m_eye]
result = optimize(objective_2, starting_params)

 * Status: success

 * Candidate solution
    Final objective value:     3.765701e+02

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    97
    f(x) calls:    190


In [13]:
b_abs, m_abs = Optim.minimizer(result)

2-element Vector{Float64}:
 53.74766339105346
  2.056074767629348

In [14]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)

yy_eye = xx .* m_eye .+ b_eye
lines!(xx, yy_eye, color=:orange)

yy_abs = xx .* m_abs .+ b_abs
lines!(xx, yy_abs, color=:green)

f

# Problem 1 - Quadradic Fit

Here we will fit the data to a qadratic model instead of a linear model, we will do this by adding a quadractic term in the objective function as well as a new parameter to optimize. 

In [15]:
function objective_3(parameters)
    b = parameters[1]
    m = parameters[2]
    q = parameters[3]

    y_pred = b .+ m .* data.x .+ q .* data.x.^2
    
    return sum(abs.(data.y .- y_pred))
end

objective_3 (generic function with 1 method)

In [16]:
b_eye = 50.
m_eye = 2.
q_eye = 0.001

0.001

In [17]:
starting_params = [b_eye + 0., m_eye + 0., q_eye + 0.]
result = optimize(objective_3, starting_params)

 * Status: success

 * Candidate solution
    Final objective value:     3.339365e+02

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    192
    f(x) calls:    355


In [18]:
b_abs, m_abs, q_abs = Optim.minimizer(result)

3-element Vector{Float64}:
 94.76027385840413
  1.1092347687349888
  0.0041332073587093

In [19]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)

yy_eye = xx.^2 .*q_eye .+ xx .* m_eye .+ b_eye
lines!(xx, yy_eye, color=:orange)

yy_abs = xx.^2 .*q_abs .+ xx .* m_abs .+ b_abs
lines!(xx, yy_abs, color=:green)

f

In [20]:
function objective_gauss(parameters, x, y, sigma)
    b = parameters[1]
    m = parameters[2]

    y_pred = b .+ m .* x
    
    return -sum(
        -log.(sigma * sqrt(2 * π)) .-0.5 .* (y .- y_pred).^2 / sigma.^2)
end

objective_gauss (generic function with 1 method)

In [21]:
#function shim(p)
#    return objective_abs(p, data.x, data.y, data.sigma_y)
#end

In [22]:
starting_params = [b_eye + 0., m_eye]
result = optimize(p -> objective_gauss(p, data.x, data.y, data.sigma_y),
                  starting_params)

 * Status: success

 * Candidate solution
    Final objective value:     1.044426e+03

 * Found with
    Algorithm:     Nelder-Mead

 * Convergence measures
    √(Σ(yᵢ-ȳ)²)/n ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    33
    f(x) calls:    68


In [23]:
b_gauss, m_gauss = Optim.minimizer(result)

2-element Vector{Float64}:
 28.84560565724924
  2.216646434317286

In [24]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)

yy_eye = xx .* m_eye .+ b_eye
lines!(xx, yy_eye, color=:orange)

yy_abs = xx .* m_abs .+ b_abs
lines!(xx, yy_abs, color=:green)

yy_gauss = xx .* m_gauss .+ b_gauss
lines!(xx, yy_gauss, color=:purple, linewidth=3)

f

In [25]:
@show data.x;

data.x = [203, 58, 210, 202, 198, 158, 165, 201, 157, 131, 166, 160, 186, 125, 218, 146]


In [26]:
size(data, 1)

16

In [27]:
ndata = size(data,1)

B_jack = zeros(ndata)
M_jack = zeros(ndata)

for i in 1:ndata
    
    xcopy = copy(data.x)
    deleteat!(xcopy, i)

    ycopy = copy(data.y)
    deleteat!(ycopy, i)

    scopy = copy(data.sigma_y)
    deleteat!(scopy, i)

    starting_params = [b_eye + 0., m_eye]
    result = optimize(p -> objective_gauss(p, xcopy, ycopy, scopy),
                      starting_params)
    @assert Optim.converged(result)
    b_jack, m_jack = Optim.minimizer(result)
    
    B_jack[i] = b_jack
    M_jack[i] = m_jack 
end

In [28]:
@assert 2+2 == 4

In [29]:
f = Figure()
Axis(f[1, 1])
errorbars!(data.x, data.y, data.sigma_y)
scatter!(data.x, data.y, markersize=20, color=:red)

for i in 1:ndata
    yy_jack = xx .* M_jack[i] .+ B_jack[i]
    lines!(xx, yy_jack, color=:green)
end

yy_gauss = xx .* m_gauss .+ b_gauss
lines!(xx, yy_gauss, color=:purple, linewidth=3)

f

In [30]:
f = Figure()
Axis(f[1, 1])
scatter!(B_jack, M_jack, markersize=20, color=:red)

plot!([b_gauss], [m_gauss], markersize=30)

f

In [31]:
using Statistics

In [32]:
mean(B_jack), std(B_jack) .* sqrt((ndata - 1) / ndata)

(27.982644701170038, 8.58069162338665)

In [33]:
mean(M_jack), std(M_jack) .* sqrt((ndata - 1) / ndata)

(2.2212591046386256, 0.04881277590712103)

In [34]:
f = Figure()
Axis(f[1, 1])
errorbars!(alldata.x, alldata.y, alldata.sigma_y)
scatter!(alldata.x, alldata.y, markersize=20, color=:red)
f

In [35]:
starting_params = [b_eye + 0., m_eye]
result = optimize(p -> objective_gauss(p, alldata.x, alldata.y, alldata.sigma_y),
                  starting_params)
@assert Optim.converged(result)
b_bad, m_bad = Optim.minimizer(result)

2-element Vector{Float64}:
 310.0184868511828
   0.6320089810388613

In [36]:
f = Figure()
Axis(f[1, 1])
errorbars!(alldata.x, alldata.y, alldata.sigma_y)
scatter!(alldata.x, alldata.y, markersize=20, color=:red)

yy_bad = xx .* m_bad .+ b_bad
lines!(xx, yy_bad, color=:purple, linewidth=3)

f

In [37]:
function objective_outliers(parameters, x, y, sigma)
    b = parameters[1]
    m = parameters[2]

    frac_bad = 0.01
    like_bad = frac_bad * (1. / 600.)

    y_pred = b .+ m .* x
    like_good = (1. - frac_bad) * 1 ./(sqrt(2*π) .* sigma) .* exp.(-0.5 * (y .- y_pred).^2 ./ sigma.^2)
    like = like_bad .+ like_good
    loglike = log.(like)

    return -sum(loglike)
end

objective_outliers (generic function with 1 method)

In [38]:
starting_params = [b_eye + 0., m_eye]
result = optimize(p -> objective_outliers(p, alldata.x, alldata.y, alldata.sigma_y),
                  starting_params)
@assert Optim.converged(result)
b_out, m_out = Optim.minimizer(result)

2-element Vector{Float64}:
 32.71335736732801
  2.252659398988028

In [39]:
starting_params = [700 + 0., -0.5]
result = optimize(p -> objective_outliers(p, alldata.x, alldata.y, alldata.sigma_y),
                  starting_params)
@assert Optim.converged(result)
b_punk, m_punk = Optim.minimizer(result)

2-element Vector{Float64}:
 627.9728284643311
  -0.7590088958472745

In [40]:
f = Figure()
Axis(f[1, 1])
errorbars!(alldata.x, alldata.y, alldata.sigma_y)
scatter!(alldata.x, alldata.y, markersize=20, color=:red)

yy_bad = xx .* m_bad .+ b_bad
lines!(xx, yy_bad, color=:purple, linewidth=3)

yy_out = xx .* m_out .+ b_out
lines!(xx, yy_out, color=:green, linewidth=3)

yy_punk = xx .* m_punk .+ b_punk
lines!(xx, yy_punk, color=:blue, linewidth=3)

f