In [None]:
using Plots, Random, LinearAlgebra, Statistics
pyplot()

In [None]:
"""plot line y = w*x+b"""
function plotline(w,b;
                  xmin=-100,xmax=100,label="")
    xsamples = [xmin, xmax]
    plot!(xsamples, [w*x+b for x in xsamples], color=:black, label=label)
end

In [None]:
"""plot function y=f(x)"""
function plotfunc(f;
                  xmin=-100,xmax=100,nsamples=100,label="")
    xsamples = LinRange(xmin,xmax,nsamples)
    plot!(xsamples, [f(x) for x in xsamples], color=:black, label=label)
end

In [None]:
# Generate and plot data
Random.seed!(1)
n = 15
function sample_data(num_points)
    x = rand(num_points)*10
    y = .2 .+ .2*x + .1*sin.(x) + .1*randn(num_points) - .01*((x.-5)/6).^4
    return x,y
end
x,y = sample_data(n)

"""function to plot the above data"""
function plotdata(x=x,y=y; margin=.05)
    scatter(x,y, label="data", legend=:topleft)
    xlabel!("x")
    ylabel!("y")
    range_y = maximum(y) - minimum(y)
    range_x = maximum(x) - minimum(x)
    ylims!((minimum(y)-margin*range_y,maximum(y)+margin*range_y))
    xlims!((minimum(x)-margin*range_x,maximum(x)+margin*range_x))
end
plotdata()

# Approximating with the mean

In [None]:
# the mean solves a very simple least squares problem:
X = ones(n,1)
w = X\y

In [None]:
# check the solution to our least squares problem is the mean
abs(mean(y) - w[1])

In [None]:
# plot the fit
plotdata()
plotline(0, w[1], label="mean")

Is this a good model? Would you trust this model to make predictions on new inputs $x$?

* (A) yes 
* (B) no

# Approximating with a line

In [None]:
X = [copy(x) ones(length(x))]

In [None]:
w = X\y

In [None]:
# plot the fit
plotdata()
plotline(w[1], w[2], label="linear fit")

Is this a good model? Would you trust this model to make predictions on new inputs $x$?

* (A) yes 
* (B) no

In [None]:
# plot fit on out of sample data
plotdata()
plotline(w[1], w[2])

xtest,ytest = sample_data(20)
scatter!(xtest,ytest,label="test")

# Approximating with a polynomial

In [None]:
# first, construct a Vandermonde matrix
max_order = 10

X = zeros(n, max_order+1)
for k=0:max_order
    X[:,k+1] = x.^k
end
X

In [None]:
# solve least squares problem
w = X\y

In [None]:
"""computes our polynomial fit evaluated at x"""
function p(x; order = max_order, w = w)
    y = 0
    for k=0:order
        y += w[k+1]*x^k
    end
    return y
end

In [None]:
# plot fit
plotdata()
plotfunc(x -> p(x, order=max_order, w=w), xmin=0, xmax=10)

Is this a good model? Would you trust this model to make predictions on new inputs $x$?

* (A) yes 
* (B) no

In [None]:
# plot fit on out of sample data
plotdata()
plotfunc(x -> p(x, order=max_order, w=w), xmin=0, xmax=10)

xtest,ytest = sample_data(100)
scatter!(xtest,ytest,label="test")

# Choosing the best model order

In [None]:
max_model_order = 10
rmse = zeros(max_model_order+1) # array to store root mean square model errors
xtest,ytest = sample_data(50)   # generate test set

for model_order = 0:max_model_order
    # form Vandermonde matrix
    X = zeros(n, model_order+1)
    for k=0:model_order
        X[:,k+1] = x.^k
    end
    
    # solve least squares problem
    w = X\y
    
    # compute test error
    ptest = [p(x, order=model_order, w=w) for x in xtest]
    rmse[model_order+1] = mean(abs.(ytest - ptest)) # sqrt(mean((ytest - ptest).^2))
end
rmse

In [None]:
plot(0:max_model_order,rmse)
xticks!(0:10)
xlabel!("model order")
ylabel!("rmse")

# Polynomial models for classification

In [None]:
Random.seed!(0)
n = 20
xs = 10*(rand(n) .- .5)
ys = (xs).^2
pos = (xs .- 1).^2 .>= 7
zn=zeros(n)
pt = scatter(xs[pos],zn[pos],color="blue", label="positive")
scatter!(xs[.!pos],zn[.!pos],color="red", label="negative")
plot!([-2,-2], [-1,1], color="black", label="classification boundary")#, ticks=:none, border=:none, legend=false)
yaxis!([-1,1])
# savefig("poly-class-1d.pdf")
pt

In [None]:
pt = scatter(xs[pos],ys[pos],color="blue", label="positive")
scatter!(xs[.!pos],ys[.!pos],color="red", label="negative")
plot!(xs, 1.5*xs .+ 7, color="black", label="classification boundary")
# savefig("poly-class-2d.pdf")
pt

In [None]:
# Example 1: multivariate polynomial classification 
n = 1000
scale = 50
x1 = scale*rand(n) .- scale/2
x2 = scale*rand(n) .- scale/2
f(x1,x2) = -30 - 9x1 + 2x2 + x1^2 - 0x1*x2 + x2^2
y = f.(x1,x2)
pos = y.>0
neg = y.<0

scatter(x1[pos], x2[pos], color="blue")
scatter!(x1[neg], x2[neg], color="red",legend=:topright)

In [None]:
# Example 2: multivariate polynomial classification 
n = 1000
scale = 10
x1 = scale*rand(n) .- scale/2
x2 = scale*rand(n) .- scale/2
f(x1,x2) = -5 - 3x1 + 2x2 + x1^2 - x1*x2 + 5x2^2
y = f.(x1,x2)
pos = y.>0
neg = y.<0

scatter(x1[pos], x2[pos], color="blue")
scatter!(x1[neg], x2[neg], color="red",legend=:topright)

In [None]:
# Example 3: multivariate polynomial classification 
n = 1000
scale = 20
x1 = scale*rand(n) .- scale/2
x2 = scale*rand(n) .- scale/2
f(x1,x2) = -5 - 3x1 + 2x2 + x1^2 - x1*x2 - 2x2^2
y = f.(x1,x2)
pos = y.>0
neg = y.<0

scatter(x1[pos], x2[pos], color="blue")
scatter!(x1[neg], x2[neg], color="red",legend=:topright)

# Bootstrap estimators

In [None]:
# sample K data sets of n samples each and compute a model on each
# see how the models vary
n = 50
K = 500

models = zeros(K,2)
for k=1:K
    xk,yk = sample_data(n) # this experiment samples from the true data distribution many many times (not realistic)
    Xk = [xk ones(n)]
    wk = Xk \ yk
    models[k,:] = wk
end

In [None]:
# histogram of the distribution of the first coefficient
# could use to compute, eg, confidence intervals

histogram(models[:,1])

In [None]:
mean(models, dims=1)

In [None]:
var(models,dims=1)

In [None]:
# can sample with replacement using rand
rand(1:20, 5)

In [None]:
# eg,
a = .1:.1:2.0
s = rand(1:20, 5)
[s a[s]]

In [None]:
# resample K bootstrap data sets of n samples each and compute a model on each
# see how the models vary
# how does result depend on number of datapoints n? number of resamples K?
n = 50
K = 1000

x,y = sample_data(n) # this is the only data we've got!

models = zeros(K,2)
for k=1:K
    mysample = rand(1:n,n)
    xk,yk = x[mysample], y[mysample]
    Xk = [xk ones(n)]
    wk = Xk \ yk
    models[k,:] = wk
end

In [None]:
n = 5
rand(1:n,n)

In [None]:
histogram(models[:,1])

In [None]:
mean(models,dims=1)

In [None]:
# as K increases, mean of the bootstrap models should converge to 
# the model fit on the original data set
X = [x ones(n)]
w = X \ y

In [None]:
var(models,dims=1)