In [31]:
using Lux, Random
using Pkg 

# Pkg.add("ComponentArrays")


using ComponentArrays, ForwardDiff, Zygote
using Optimisers, Printf

In [2]:
rng = Random.default_rng()
Random.seed!(rng, 0)

TaskLocalRNG()

In [52]:
# Una forma interesante de formar una matriz 

(2:2:10) .* (1:5)'

5×5 Matrix{Int64}:
  2   4   6   8  10
  4   8  12  16  20
  6  12  18  24  30
  8  16  24  32  40
 10  20  30  40  50

In [3]:
x = rand(rng, 5,3)

5×3 Matrix{Float64}:
 0.455238   0.746943   0.193291
 0.547642   0.746801   0.116989
 0.773354   0.97667    0.899766
 0.940585   0.0869468  0.422918
 0.0296477  0.351491   0.707534

In [6]:
x = reshape(1:8, 2, 4)

2×4 reshape(::UnitRange{Int64}, 2, 4) with eltype Int64:
 1  3  5  7
 2  4  6  8

In [9]:
x_copy = copy(x)
view(x_copy, :, 1) .= 0

2-element view(::Matrix{Int64}, :, 1) with eltype Int64:
 0
 0

In [10]:
x_copy

2×4 Matrix{Int64}:
 0  3  5  7
 0  4  6  8

Calculando el gradiente

In [13]:
f(x) = x'*x /2
∇f(x) = x
v = rand(rng, Float32, 4)

4-element Vector{Float32}:
 0.46487772
 0.8812782
 0.9124628
 0.9318977

In [14]:
println("Actual Gradient: ", ∇f(v))
println("Computed Gradient via Reverse Mode AD (Zygote): ", only(Zygote.gradient(f, v)))
println("Computed Gradient via Forward Mode AD (ForwardDiff): ", ForwardDiff.gradient(f, v))

Actual Gradient: Float32[0.46487772, 0.8812782, 0.9124628, 0.9318977]
Computed Gradient via Reverse Mode AD (Zygote): Float32[0.46487772, 0.8812782, 0.9124628, 0.9318977]
Computed Gradient via Forward Mode AD (ForwardDiff): Float32[0.46487772, 0.8812782, 0.9124628, 0.9318977]


In [15]:
f(x) = x .* x ./ 2
x = randn(rng, Float32, 5)
v = ones(Float32, 5)

5-element Vector{Float32}:
 1.0
 1.0
 1.0
 1.0
 1.0

In [16]:
jvp = jacobian_vector_product(f, AutoForwardDiff(), x, v)
println("JVP: ", jvp)

JVP: Float32[0.79077834, -1.7732544, 0.7437802, 0.09222726, 0.7748581]


In [17]:
vjp = vector_jacobian_product(f, AutoZygote(), x, v)
println("VJP: ", vjp)

VJP: Float32[0.79077834, -1.7732544, 0.7437802, 0.09222726, 0.7748581]


In [19]:
model = Dense(10 => 5)

rng = Random.default_rng()
Random.seed!(rng, 0)

TaskLocalRNG()

In [20]:
ps, st = Lux.setup(rng, model)
ps = ComponentArray(ps)

[0mComponentVector{Float32}(weight = Float32[-0.48351598 0.29944375 … -0.30674052 0.034259234; -0.04903387 -0.4242767 … 0.1958431 0.23992883; … ; 0.05218964 -0.09701932 … 0.36829436 -0.3097294; 0.20277858 -0.51524514 … 0.071482725 -0.45247704], bias = Float32[-0.04199602, -0.093925126, -0.0007736237, -0.19397983, 0.0066712513])

In [21]:
# Set problem's dimensions 
n_samples = 20
x_dim = 10
y_dim = 5

5

In [22]:
# Generate random ground truth W and b ?  Ground truth, los valores reales que se utiliza para el aprendizaje 
W = randn(rng, Float32, y_dim, x_dim)
b = randn(rng, Float32, y_dim)

5-element Vector{Float32}:
 -0.9436797
  1.5164032
  0.011937321
  1.4339262
 -0.2771789

In [23]:
x_samples = randn(rng, Float32, x_dim, n_samples)
y_samples = W * x_samples .+ b .+ 0.01f0 .* randn(rng, Float32, y_dim, n_samples)
println("x shape: ", size(x_samples), "; y shape: ", size(y_samples)) # Dimensión de nuestras muestras 

x shape: (10, 20); y shape: (5, 20)


In [28]:
typeof(0.01f0)

Float32

For updating our parameters let's use Optimisers.jl. We will use Stochastic Gradient Descent (SGD) with a learning rate of 0.01.


In [32]:
lossfn = MSELoss() # Mean Squared Error Loss

println("Loss Value with ground true parameters: ", lossfn(W * x_samples .+ b, y_samples))

Loss Value with ground true parameters: 9.37424e-5


In [33]:
function train_model!(model, ps, st, opt, nepochs::Int)
    tstate = Training.TrainState(model, ps, st, opt)
    for i in 1:nepochs
        grads, loss, _, tstate = Training.single_train_step!(
            AutoZygote(), lossfn, (x_samples, y_samples), tstate
        )
        if i % 1000 == 1 || i == nepochs
            @printf "Loss Value after %6d iterations: %.8f\n" i loss
        end
    end
    return tstate.model, tstate.parameters, tstate.states
end

train_model! (generic function with 1 method)

In [34]:
model, ps, st = train_model!(model, ps, st, Descent(0.01f0), 10000)


println("Loss Value after training: ", lossfn(first(model(x_samples, ps, st)), y_samples))

Loss Value after      1 iterations: 7.80465460
Loss Value after   1001 iterations: 0.12477564
Loss Value after   2001 iterations: 0.02535537
Loss Value after   3001 iterations: 0.00914141
Loss Value after   4001 iterations: 0.00407581
Loss Value after   5001 iterations: 0.00198415
Loss Value after   6001 iterations: 0.00101147
Loss Value after   7001 iterations: 0.00053332
Loss Value after   8001 iterations: 0.00029203
Loss Value after   9001 iterations: 0.00016878
Loss Value after  10000 iterations: 0.00010551
Loss Value after training: 0.000105468556


In [46]:
model

Dense(10 => 5)      [90m# 55 parameters[39m

In [47]:
ps

[0mComponentVector{Float32}(weight = Float32[0.7682156 0.7941856 … 0.29398134 -0.89615625; -0.8729844 -1.7722499 … 0.86862755 0.014939133; … ; -0.5534184 0.09342571 … 0.355172 -0.5844771; 1.7958173 0.77247226 … -0.90781474 0.085328616], bias = Float32[-0.9428906, 1.5168314, 0.012598767, 1.437549, -0.28315955])

In [48]:
st

NamedTuple()