In [None]:
# Only if using JuliaBox
pop!(DEPOT_PATH)
push!(DEPOT_PATH, "/home/jrun/.julia2/")

## Linear fit by differentiable programming

In [None]:
using Zygote, LinearAlgebra

In [None]:
# Define a data type representing our model (optional)
struct LinearRegression
    weights::Matrix
    bias::Array{Float64,0}
end
LinearRegression(nparams) = LinearRegression(randn(1, nparams), fill(0.0))

In [None]:
# Define how to "run" the model forwards to generate a prediction
function predict(model::LinearRegression, X)
    return model.weights * X .+ model.bias[]
end

In [None]:
# Compute the "loss" to be minimized
function loss(model::LinearRegression, X, Y)
    return norm(predict(model, X) .- Y, 2)
end

In [None]:
# Set up a sample problem.
# "Ground truth" values we will try to learn/recover
weights_gt = [1.0, 2.7, 0.3, 1.2]'
bias_gt = 0.4

In [None]:
# Generate a fake dataset of many observations
X = randn(length(weights_gt), 10000)
Y = weights_gt * X .+ bias_gt
# Add a little bit of noise to `X` so that we do not have an exact solution,
# but must instead do a least-squares fit
X .+= 0.01 .* randn(size(X))

In [None]:
# Start with a random model to train
model = LinearRegression(size(X, 1))

In [None]:
# Calculate gradient on `model` for the first example in our training set
grads = Zygote.gradient(m -> loss(m, X[:, 1], Y[1]), model)

In [None]:
# The `grads` object is a tuple containing one element per argument to
# `gradient`, so we take the first one to get the gradient on `model`
grads = grads[1]

In [None]:
# Define an update rule that will allow us to modify the weights
# of our model a bit according to the gradients
function sgd_update!(model::LinearRegression, grads, η = 0.001)
    model.weights .-= η .* grads.weights
    model.bias .-= η .* grads.bias
end

In [None]:
for idx in 1:size(X, 2)
    grads = Zygote.gradient(m -> loss(m, X[:, idx], Y[idx]), model)[1]
    sgd_update!(model, grads)
end

In [None]:
model

In [None]:
weights_gt