In [None]:
using PyPlot

In [None]:
"""plot line y = m*x+b"""
function plotline(w,b;
                  xmin=-100,xmax=100)
    xsamples = [xmin, xmax]
    plot(xsamples, [w*x+b for x in xsamples], color="black")
end

In [None]:
"""plot function y=f(x)"""
function plotfunc(f;
                  xmin=-100,xmax=100,nsamples=100)
    xsamples = linspace(xmin,xmax,nsamples)
    plot(xsamples, [f(x) for x in xsamples], color="black")
end

In [None]:
# Generate and plot data
srand(2)
n = 20
function sample_data(num_points)
    x = rand(num_points)*10
    y = .2 + .2*x + .1*sin(x) + .03*randn(num_points) - .1*(x/6).^2
    return x,y
end
x,y = sample_data(n)

"""function to plot the above data"""
function plotdata(x=x,y=y; margin=.05)
    hold(true)
    plot(x,y,"o")
    xlabel("x")
    ylabel("y")
    range_y = maximum(y) - minimum(y)
    range_x = maximum(x) - minimum(x)
    ylim([minimum(y)-margin*range_y,maximum(y)+margin*range_y])
    xlim([minimum(x)-margin*range_x,maximum(x)+margin*range_x])
end
plotdata()

# Approximating with the mean

In [None]:
# the mean solves a very simple least squares problem:
X = ones(n,1)
w = X\y

In [None]:
# check the solution to our least squares problem is the mean
abs(mean(y) - w[1])

In [None]:
# plot the fit
clf() # clears the previous figure
plotdata()
plotline(0, w[1])

# Approximating with a line

In [None]:
X = [copy(x) ones(length(x))]

In [None]:
w = X\y

In [None]:
# plot the fit
clf()
plotdata()
plotline(w[1], w[2])

In [None]:
# plot fit on out of sample data
clf()
plotdata()
plotline(w[1], w[2])

xtest,ytest = sample_data(20)
plotdata(xtest,ytest)

# Approximating with a polynomial

In [None]:
# first, construct a Vandermonde matrix
max_order = 10

X = zeros(n, max_order+1)
for k=0:max_order
    X[:,k+1] = x.^k
end
X

In [None]:
# solve least squares problem
w = X\y

In [None]:
"""computes our polynomial fit evaluated at x"""
function p(x)
    y = 0
    for k=0:max_order
        y += w[k+1]*x^k
    end
    return y
end

In [None]:
# plot fit
clf()
plotdata()
plotfunc(p, xmin=0, xmax=9)

In [None]:
# plot fit on out of sample data
clf()
plotdata()
plotfunc(p, xmin=0, xmax=9)

xtest,ytest = sample_data(20)
plotdata(xtest,ytest)