In [2]:
using Statistics
using Flux, Flux.Optimise
using Flux: onehotbatch, onecold, flatten
using Base.Iterators: partition
using CUDA
using Distributions

# 1. Load Data

In [70]:
typeof([data[i] for i in 1:100_000])

Vector{Int64} (alias for Array{Int64, 1})

In [3]:
data=rand(DiscreteUniform(15),100_000)
X = onehotbatch([data[i] for i in 1:100_000],0:15)
y = X

16×100000 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  …  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅
 1  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  1  ⋅  ⋅  ⋅  ⋅
 ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  …  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  1  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  1
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  1  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  …  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅

In [5]:
#train = ([(cat(imgs[i]..., dims = 4), labels[:,i]) for i in partition(1:49000, 1000)]) |> gpu
train = ([(X[:,i], y[:,i]) for i in partition(1:100_000, 10000)]) |> gpu

10-element Vector{Tuple{Flux.OneHotArray{UInt32, 16, 1, 2, Vector{UInt32}}, Flux.OneHotArray{UInt32, 16, 1, 2, Vector{UInt32}}}}:
 ([0 0 … 1 0; 1 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1], [0 0 … 1 0; 1 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1])
 ([0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0])
 ([0 1 … 1 0; 0 0 … 0 0; … ; 0 0 … 0 0; 1 0 … 0 0], [0 1 … 1 0; 0 0 … 0 0; … ; 0 0 … 0 0; 1 0 … 0 0])
 ([0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1], [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1])
 ([0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 1 0; 0 0 … 0 0], [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 1 0; 0 0 … 0 0])
 ([0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 1; 0 0 … 0 0], [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 1; 0 0 … 0 0])
 ([0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0], [0 0 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 0])
 ([0 0 … 0 0; 0 1 … 0 0; … ; 0 0 … 1 0; 0 0 … 0 0], [0 0 … 0 0; 0 1 … 0 0; … ; 0 0 … 1 0; 0 0 … 0 0])
 ([0 1 … 0 0; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1], [0

In [6]:
length(valX)

LoadError: UndefVarError: valX not defined

In [7]:
valset = 90_001:100_000
valX = X[:,valset] |> gpu
valY = y[:,valset] |> gpu;

# 2. Define Model

In [33]:
encoder = Dense(16,4,sigmoid)
decoder = Chain(Dense(4,16),softmax)
m = Chain(encoder,decoder)|> gpu

Chain(
  Dense(16, 4, σ),                      [90m# 68 parameters[39m
  Chain(
    Dense(4, 16),                       [90m# 80 parameters[39m
    NNlib.softmax,
  ),
)[90m                   # Total: 4 arrays, [39m148 parameters, 880 bytes.

# 3. Define Loss Function

In [34]:
using Flux: Momentum
loss(x, y) = Flux.Losses.crossentropy(m(x), y)
opt = ADAM(0.1)

AMSGrad(0.1, (0.9, 0.999), IdDict{Any, Any}())

In [35]:
accuracy(x, y) = mean(onecold(m(x), 1:16) .== onecold(y, 1:16))

accuracy (generic function with 1 method)

# 4. Train Network

In [36]:
epochs = 8

for epoch = 1:epochs
  for d in train
    gs = gradient(Flux.params(m)) do
      l = loss(d...)
    end
    update!(opt, Flux.params(m), gs)
  end
  @show accuracy(valX, valY)
end

accuracy(valX, valY) = 0.9395
accuracy(valX, valY) = 0.9351
accuracy(valX, valY) = 0.9351
accuracy(valX, valY) = 0.9351
accuracy(valX, valY) = 0.9395
accuracy(valX, valY) = 0.9395
accuracy(valX, valY) = 1.0
accuracy(valX, valY) = 1.0


# 5. Test Network

In [22]:
data[1:5]

5-element Vector{Int64}:
  1
  2
  4
  1
 11

In [23]:
round.(m[1](valX[:,1:10]))

4×10 Matrix{Float32}:
 1.0  1.0  1.0  1.0  1.0  1.0  0.0  0.0  0.0  0.0
 0.0  0.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0
 1.0  1.0  1.0  0.0  0.0  1.0  1.0  0.0  0.0  1.0
 1.0  0.0  1.0  0.0  0.0  1.0  0.0  1.0  1.0  0.0

In [24]:
valX[:,1:10]

16×10 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  1
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  1  ⋅
 ⋅  ⋅  ⋅  1  1  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  1  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅