# Admin

In [1]:
Pkg.add("Flux")
# Optional but recommended
Pkg.update() # Keep your packages up to date

[1m[36mINFO: [39m[22m[36mPackage Flux is already installed
[39m[1m[36mINFO: [39m[22m[36mUpdating METADATA...
[39m[1m[36mINFO: [39m[22m[36mComputing changes...
[39m

LoadError: [91mresolve is unable to satisfy package requirements.
  The problem was detected when trying to find a feasible version
  for package NullableArrays.
  However, this only means that package NullableArrays is involved in an
  unsatisfiable or difficult dependency relation, and the root of
  the problem may be elsewhere.
[39m

In [2]:
Pkg.test("Flux") # Check things installed correctly

[1m[36mINFO: [39m[22m[36mTesting Flux
[39m  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3629k  100 3629k    0     0  1537k      0  0:00:02  0:00:02 --:--:-- 1538k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   382  100   382    0     0   2671      0 --:--:-- --:--:-- --:--:--  2671
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   281  100   281    0     0   2007      0 --:--:-- --:--:-- --:--:--  1992


[1m[37mTest Summary: | [39m[22m[1m[32mPass  [39m[22m[1m[36mTotal[39m[22m
Flux          | [32m  86  [39m[36m   86[39m


[1m[36mINFO: [39m[22m[36mFlux tests passed
[39m

# Building Models

## Basics

### Gradients

In [3]:
W = rand(2, 5)
b = rand(2)

predict(x) = W*x .+ b
loss(x, y) = sum((predict(x) .- y).^2)

x, y = rand(5), rand(2) # Dummy data
loss(x, y) # ~ 3

4.4480426969766285

In [4]:
using Flux.Tracker

W = param(W)
b = param(b)

l = loss(x, y)

back!(l)

In [5]:
W.grad

# Update the parameter
W.data .-= 0.1(W.grad)

loss(x, y) # ~ 2.5

Tracked 0-dimensional Array{Float64,0}:
2.42937

### Layers

In [11]:
using Flux

W1 = param(rand(3, 5))
b1 = param(rand(3))
layer1(x) = W1 * x .+ b1

W2 = param(rand(2, 3))
b2 = param(rand(2))
layer2(x) = W2 * x .+ b2

model(x) = layer2(σ.(layer1(x)))

model(rand(5)) # => 2-element vector

Tracked 2-element Array{Float64,1}:
 1.07609
 2.69392

In [13]:
function linear(in, out)
  W = param(randn(out, in))
  b = param(randn(out))
  x -> W * x .+ b
end

linear1 = linear(5, 3) # we can access linear1.W etc
linear2 = linear(3, 2)

model(x) = linear2(σ.(linear1(x)))

model(x) # => 2-element vector

Tracked 2-element Array{Float64,1}:
  1.82401 
 -0.880741

In [14]:
# another way
struct Affine
  W
  b
end

Affine(in::Integer, out::Integer) =
  Affine(param(randn(out, in)), param(randn(out)))

# Overload call, so the object can be used as a function
(m::Affine)(x) = m.W * x .+ m.b

a = Affine(10, 5)

a(rand(10)) # => 5-element vector

Tracked 5-element Array{Float64,1}:
  3.73754 
  3.90584 
 -0.819231
  1.47741 
 -0.144513

### Stacking it up

In [17]:
using Flux
layer_1 = Dense(10, 5, σ)
# ...
model(x) = layer3(layer2(layer_1(x)))

model (generic function with 1 method)

In [18]:
using Flux

layers = [Dense(10, 5, σ), Dense(5, 2), softmax]

model(x) = foldl((x, m) -> m(x), x, layers)

model(rand(10)) # => 2-element vector

Tracked 2-element Array{Float64,1}:
 0.198587
 0.801413

In [19]:
model2 = Chain(
  Dense(10, 5, σ),
  Dense(5, 2),
  softmax)

model2(rand(10)) # => 2-element vector

Tracked 2-element Array{Float64,1}:
 0.673231
 0.326769

In [20]:
m = Dense(5, 2) ∘ Dense(10, 5, σ)

m(rand(10))

Tracked 2-element Array{Float64,1}:
 -0.245294
  0.603482

In [21]:
m = Chain(x -> x^2, x -> x+1)

m(5) # => 26

26

## Recurrence

### Recurrent Cells

In [22]:
Wxh = randn(5, 10)
Whh = randn(5, 5)
b   = randn(5)

function rnn(h, x)
  h = tanh.(Wxh * x .+ Whh * h .+ b)
  return h, h
end

x = rand(10) # dummy data
h = rand(5)  # initial hidden state

h, y = rnn(h, x)

([0.786098, 0.994451, -0.970166, 0.972303, -0.999449], [0.786098, 0.994451, -0.970166, 0.972303, -0.999449])

In [23]:
# more concise
using Flux

rnn2 = Flux.RNNCell(10, 5)

x = rand(10) # dummy data
h = rand(5)  # initial hidden state

h, y = rnn2(h, x)

(param([-0.740629, 0.832188, 0.049595, 0.391344, 0.108413]), param([-0.740629, 0.832188, 0.049595, 0.391344, 0.108413]))

### Stateful Models

In [24]:
x = rand(10)
h = rand(5)

m = Flux.Recur(rnn, h)

y = m(x)

5-element Array{Float64,1}:
  0.944022 
 -0.0331725
  0.269579 
  0.973758 
 -0.999189 

### Sequences

In [25]:
seq = [rand(10) for i = 1:10]

10-element Array{Array{Float64,1},1}:
 [0.648083, 0.595107, 0.839945, 0.739376, 0.990343, 0.339041, 0.962497, 0.844712, 0.337752, 0.388273]   
 [0.53186, 0.817248, 0.832853, 0.887418, 0.519195, 0.274859, 0.578825, 0.533091, 0.428315, 0.480524]    
 [0.285008, 0.785897, 0.909889, 0.546765, 0.974994, 0.730691, 0.358246, 0.543274, 0.893889, 0.122494]   
 [0.637238, 0.983072, 0.576636, 0.0425121, 0.226362, 0.623526, 0.591562, 0.0359528, 0.480548, 0.190922] 
 [0.263383, 0.452225, 0.699728, 0.0639004, 0.915173, 0.568517, 0.757429, 0.565975, 0.549652, 0.0336582] 
 [0.802022, 0.0263171, 0.676374, 0.788473, 0.127783, 0.0791591, 0.0976811, 0.308933, 0.064207, 0.656015]
 [0.568454, 0.0867206, 0.705805, 0.0724178, 0.496361, 0.524542, 0.664999, 0.528334, 0.10445, 0.169119]  
 [0.509781, 0.409487, 0.450865, 0.177006, 0.356838, 0.0415929, 0.763214, 0.43009, 0.123381, 0.958812]   
 [0.681299, 0.456266, 0.508522, 0.0334579, 0.194583, 0.756465, 0.851532, 0.0937034, 0.441508, 0.394023] 
 [0.540796, 0.928

In [26]:
m.(seq) # returns a list of 5-element vectors

10-element Array{Array{Float64,1},1}:
 [0.697384, 0.004087, -0.914507, 0.755987, -0.999839] 
 [0.561729, 0.39182, -0.900744, 0.641344, -0.99969]   
 [0.570325, 0.771587, -0.744579, 0.2353, -0.997611]   
 [-0.141873, 0.791483, 0.382622, 0.535097, -0.97649]  
 [-0.736799, 0.614253, 0.820003, 0.792835, -0.895037] 
 [-0.778259, -0.254554, 0.98142, 0.994847, -0.803188] 
 [-0.798405, -0.716291, 0.997126, 0.960138, -0.977139]
 [-0.864664, -0.194456, 0.999685, 0.996779, -0.976205]
 [-0.634608, -0.587306, 0.999912, 0.996222, -0.974303]
 [0.783383, -0.657033, 0.999993, 0.99896, -0.995225]  

In [27]:
m = Chain(LSTM(10, 15), Dense(15, 5))
m.(seq)

10-element Array{TrackedArray{…,Array{Float64,1}},1}:
 param([0.278205, 0.147351, 0.0432366, -0.233245, 0.0367112])
 param([0.309439, 0.183179, 0.162422, -0.177446, 0.115459])  
 param([0.38543, 0.129649, 0.394392, -0.180781, 0.160209])   
 param([0.278085, 0.223547, 0.363532, -0.2069, 0.212915])    
 param([0.338599, 0.268606, 0.487053, -0.263658, 0.169581])  
 param([0.37252, 0.281502, 0.351284, -0.055149, 0.188647])   
 param([0.347256, 0.256342, 0.424047, -0.131747, 0.254324])  
 param([0.306231, 0.263328, 0.294741, -0.0628453, 0.260427]) 
 param([0.278381, 0.250655, 0.387657, -0.135299, 0.324638])  
 param([0.211492, 0.326294, 0.442487, -0.160556, 0.229433])  

### Truncating Gradients

In [29]:
using Flux
truncate!(m)

LoadError: [91mUndefVarError: truncate! not defined[39m

## Regularisation

In [30]:
m = Dense(10, 5)
loss(x, y) = crossentropy(softmax(m(x)), y)

loss (generic function with 1 method)

In [31]:
penalty() = vecnorm(m.W) + vecnorm(m.b)
loss(x, y) = crossentropy(softmax(m(x)), y) + penalty()

loss (generic function with 1 method)

In [32]:
sum(vecnorm, params(m))

LoadError: [91mMethodError: no method matching norm(::TrackedArray{…,Array{Float64,0}})[0m
Closest candidates are:
  norm([91m::RowVector[39m) at linalg/generic.jl:595
  norm([91m::RowVector[39m, [91m::Real[39m) at linalg/generic.jl:607
  norm([91m::Union{Base.ReshapedArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},1,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:Union{DenseArray, SubArray{T,N,P,I,true} where I<:Tuple{Union{Base.Slice, UnitRange},Vararg{Any,N} where N} where P where N where T}, DenseArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},1}, SubArray{T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64},1,A,I,L} where L} where I<:Tuple{Vararg{Union{Base.AbstractCartesianIndex, Int64, Range{Int64}},N} where N} where A<:Union{Base.ReshapedArray{T,N,A,MI} where MI<:Tuple{Vararg{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64},N} where N} where A<:Union{DenseArray, SubArray{T,N,P,I,true} where I<:Tuple{Union{Base.Slice, UnitRange},Vararg{Any,N} where N} where P where N where T} where N where T, DenseArray}[39m, [91m::Union{Range{TI<:Integer}, UnitRange{TI<:Integer}}[39m) where {T<:Union{Complex{Float32}, Complex{Float64}, Float32, Float64}, TI<:Integer} at linalg/dense.jl:87
  ...[39m

In [33]:
m = Chain(
  Dense(28^2, 128, relu),
  Dense(128, 32, relu),
  Dense(32, 10), softmax)

loss(x, y) = crossentropy(m(x), y) + sum(vecnorm, params(m))

loss(rand(28^2), rand(10))

LoadError: [91mUndefVarError: crossentropy not defined[39m