In [None]:
%install '.package(path: "$cwd/FastaiNotebook_08a_heterogeneous_dictionary")' FastaiNotebook_08a_heterogeneous_dictionary

Installing packages:
	.package(path: "/usr/local/google/home/jekbradbury/fastai_docs/dev_swift/FastaiNotebook_08a_heterogeneous_dictionary")
		FastaiNotebook_08a_heterogeneous_dictionary
With SwiftPM flags: []
Working in: /tmp/tmpy8z2fmjk
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 5.60s
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'FastaiNotebook_08a_heterogeneous_dictionary' (13 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


## Load data

In [None]:
import FastaiNotebook_08a_heterogeneous_dictionary
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


In [None]:
// export
import Path
import TensorFlow

In [None]:
let path = downloadImagette()

Downloading https://s3.amazonaws.com/fast-ai-imageclas/imagenette-160.tgz...


In [None]:
let il = ItemList(fromFolder: path, extensions: ["jpeg", "jpg"])
let sd = SplitData(il, fromFunc: {grandParentSplitter(fName: $0, valid: "val")})
var (procItem,procLabel) = (NoopProcessor<Path>(),CategoryProcessor())
let sld = SplitLabeledData(sd, fromFunc: parentLabeler, procItem: &procItem, procLabel: &procLabel)
var rawData = sld.toDataBunch(itemToTensor: pathsToTensor, labelToTensor: intsToTensor)
let data = transformData(rawData, tfmItem: { openAndResize(fname: $0, size: 128) })

In [None]:
let data = mnistDataBunch(flat: true)

In [None]:
let (n,m) = (60000,784)
let c = 10
let nHid = 50

In [None]:
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}

## Stateful optimizer

In [None]:
//export
open class StatDelegate<Scalar: TensorFlowFloatingPoint> {
    open var name: String { return "" }
    var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }
    func update(
        state: inout [String: Tensor<Scalar>],
        for param: Tensor<Scalar>,
        along direction: Tensor<Scalar>,
        config: inout HeterogeneousDictionary
    ) { }
}

//export
open class StepDelegate<Scalar: TensorFlowFloatingPoint> {
    var defaultConfig: HeterogeneousDictionary { return HeterogeneousDictionary() }
    func update(
        param: inout Tensor<Scalar>,
        along direction: inout Tensor<Scalar>,
        state: [String: Tensor<Scalar>],
        config: inout HeterogeneousDictionary
    ) { }
}

In [10]:
//export
class StatefulOptimizer<Model: Layer,
                        Scalar: TensorFlowFloatingPoint>: Optimizer
    where Model.AllDifferentiableVariables == Model.CotangentVector{
    var configs: [HeterogeneousDictionary]
    var learningRate: Float {
        get { return configs.last![LearningRate()] } 
        set { 
            for i in configs.indices {self.configs[i][LearningRate()] = newValue }
        }
    }
    var learningRates: [Float] {
        get {
            var res: [Float] = []
            for config in configs {res.append(config[LearningRate()])}
            return res
        }
        set { 
            for i in configs.indices {self.configs[i][LearningRate()] = newValue[i] } 
        }
    }
    var splits: (Int) -> Int
    var states: [String: Model.AllDifferentiableVariables]
    var statDelegates: [StatDelegate<Scalar>]
    var stepDelegates: [StepDelegate<Scalar>]
    init(
        stepDelegates: [StepDelegate<Scalar>],
        statDelegates: [StatDelegate<Scalar>],
        configs: [HeterogeneousDictionary],
        splits: @escaping (Int) -> Int
    ) {
        self.configs = Array(repeating: HeterogeneousDictionary(), count: configs.count)
        states = [:]
        for stepDelegate in stepDelegates {
            for i in self.configs.indices { self.configs[i].merge(stepDelegate.defaultConfig) { (_, new) in new } }
        }
        for statDelegate in statDelegates {
            for i in self.configs.indices { self.configs[i].merge(statDelegate.defaultConfig) { (_, new) in new } }
            states[statDelegate.name] = Model.AllDifferentiableVariables.zero
        }
        for i in 0..<configs.count {
            self.configs[i].merge(configs[i]) { (_, new) in new }
        }
        self.stepDelegates = stepDelegates
        self.statDelegates = statDelegates
        self.splits = splits
    }
        
    func update(
        _ model: inout Model.AllDifferentiableVariables,
        along direction: Model.CotangentVector
    ) {
        for (i,kp) in model.recursivelyAllWritableKeyPaths(to: Tensor<Scalar>.self).enumerated() {
            var grad = direction[keyPath: kp]
            var state = states.mapValues(){$0[keyPath: kp]}
            var config = configs[splits(i)]
            for statDelegate in statDelegates {
                statDelegate.update(
                    state: &state,
                    for: model[keyPath: kp],
                    along: grad,
                    config: &config
                )
            }
            for n in states.keys { states[n]![keyPath: kp] = state[n]! }
            for stepDelegate in stepDelegates {
                stepDelegate.update(
                    param: &model[keyPath: kp],
                    along: &grad,
                    state: state,
                    config: &config
                )
            }
        }
    }
}

In [11]:
//export
class SGDStep: StepDelegate<Float> {
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        param -= direction * config[LearningRate()]
    }
}

In [12]:
//export
public struct WeightDecayKey: HetDictKey, Equatable {
    public static var defaultValue: Float = 0.0
}

class WeightDecay: StepDelegate<Float> {
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        param *= 1 - config[LearningRate()] * config[WeightDecayKey()]
    }
}

In [13]:
//export

class L2Regularization: StepDelegate<Float> {
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        direction += config[WeightDecayKey()] * param
    }
}

In [14]:
//export

public struct Momentum: HetDictKey, Equatable {
    public static var defaultValue: Float = 0.9
}

public struct MomentumDampening: HetDictKey, Equatable {
    public static var defaultValue: Float = 0.9
}

class AverageGrad: StatDelegate<Float> {
    let dampened: Bool
    init(dampened: Bool = false) { self.dampened = dampened }
    override var name: String { return "averageGrad" }
    override func update(
        state: inout [String: Tensor<Float>],
        for param: Tensor<Float>,
        along direction: Tensor<Float>,
        config: inout HeterogeneousDictionary
    ) {
        state["averageGrad"]! *= config[Momentum()]
        config[MomentumDampening()] = 1.0 - (dampened ? config[Momentum()] : 0.0)
        state["averageGrad"]! += config[MomentumDampening()] * direction
    }
}

In [15]:
func split_func(_ a: Int) -> Int { return a < 2 ? 0 : 1 }

In [22]:
var configs = [HeterogeneousDictionary(LearningRate(), 0.0), HeterogeneousDictionary(LearningRate(), 0.01)]
func opt(_ model: BasicModel) -> StatefulOptimizer<BasicModel, Float> {
    return StatefulOptimizer(
        stepDelegates: [SGDStep()],
        statDelegates: [],
        configs: configs,
        splits: split_func)
}

In [23]:
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))

In [24]:
let params = learner.model.allDifferentiableVariables

In [25]:
for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { 
    print(params[keyPath: kp][0]) 
}

[    -0.046549, -0.0056212405,    0.04649705,   0.041535746,   -0.03871442,   -0.08179674,
   -0.03968917,  -0.013780484,   0.051365733,   0.042979345,   0.014209886,   -0.07570091,
   0.020541439,   -0.03300613,    0.04694891,   0.010562088,    0.03216306, -0.0078320345,
     0.0676105,   -0.02969341,   0.029745907,    0.06272062,   -0.04326095,   0.008575076,
   -0.07724032,  -0.039810453,   0.057117227,   0.030263104,   0.031303518,  -0.008565996,
    0.06568537,      0.059534,  0.0016873064,  -0.034330063,   0.013239545,   0.055974327,
  -0.021404885,   0.041099936,   0.038637787,  -0.016372196,  -0.014275174,   0.012478242,
    0.02541998,    0.07681102,    0.05248556,    0.07618754,  -0.045867424,   -0.06004807,
     0.0574056,   -0.03207307]
0.0
[ -0.1687389,  -0.2625489,  0.13653485,   0.2740094, -0.27290937,  0.17933364,  0.20196906,
  0.12957323, -0.28662503,  0.12529063]
0.0


In [26]:
learner.fit(2)

Epoch 0: [1.1812385, 0.6439]                                                    
Epoch 1: [0.98253614, 0.7015]                                                   
                                                                              

In [27]:
let params = learner.model.allDifferentiableVariables
for kp in params.recursivelyAllWritableKeyPaths(to: TF.self) { 
    print(params[keyPath: kp][0]) 
}

[    -0.046549, -0.0056212405,    0.04649705,   0.041535746,   -0.03871442,   -0.08179674,
   -0.03968917,  -0.013780484,   0.051365733,   0.042979345,   0.014209886,   -0.07570091,
   0.020541439,   -0.03300613,    0.04694891,   0.010562088,    0.03216306, -0.0078320345,
     0.0676105,   -0.02969341,   0.029745907,    0.06272062,   -0.04326095,   0.008575076,
   -0.07724032,  -0.039810453,   0.057117227,   0.030263104,   0.031303518,  -0.008565996,
    0.06568537,      0.059534,  0.0016873064,  -0.034330063,   0.013239545,   0.055974327,
  -0.021404885,   0.041099936,   0.038637787,  -0.016372196,  -0.014275174,   0.012478242,
    0.02541998,    0.07681102,    0.05248556,    0.07618754,  -0.045867424,   -0.06004807,
     0.0574056,   -0.03207307]
0.0
[ -0.30932078,   0.18727356,   0.14484356, -0.049940858, -0.092392795,   0.12527509,
   0.23155186,    0.3126535,  -0.50158757,     0.007534]
-0.07913654


In [28]:
//export
class MomentumStep: StepDelegate<Float> {
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        param -= config[LearningRate()] * state["averageGrad"]!
    }
}

In [29]:
func opt(_ model: BasicModel) -> StatefulOptimizer<BasicModel, Float> {
    return StatefulOptimizer(
        stepDelegates: [MomentumStep()],
        statDelegates: [AverageGrad()],
        configs: configs,
        splits: split_func)
}

In [30]:
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))

In [31]:
learner.fit(2)

Epoch 0: [0.8318388, 0.7328]                                                    
Epoch 1: [0.788731, 0.7465]                                                     
                                                                              

In [32]:
//export

public struct SquareMomentum: HetDictKey, Equatable {
    public static var defaultValue: Float = 0.99
}

public struct SquareMomentumDampening: HetDictKey, Equatable {
    public static var defaultValue: Float = 0.99
}


class AverageSquaredGrad: StatDelegate<Float> {
    let dampened: Bool
    init(dampened: Bool = false) { self.dampened = dampened }
    override var name: String { return "averageSquaredGrad" }
    override func update(
        state: inout [String: Tensor<Float>],
        for param: Tensor<Float>,
        along direction: Tensor<Float>,
        config: inout HeterogeneousDictionary
    ) {
        state["averageSquaredGrad"]! *= config[SquareMomentum()]
        config[SquareMomentumDampening()] = 1.0 - (dampened ? config[SquareMomentum()] : 0.0)
        state["averageSquaredGrad"]! += config[SquareMomentumDampening()] * direction.squared()
    }
}

In [33]:
//export
class StepCount: StatDelegate<Float> {
    override var name: String { return "step" }
    override func update(
        state: inout [String: Tensor<Float>],
        for param: Tensor<Float>,
        along direction: Tensor<Float>,
        config: inout HeterogeneousDictionary
    ) {
        state["step"]! += 1.0
    }
}

In [34]:
//export
func debias<Scalar: TensorFlowFloatingPoint>(
    momentum: Scalar,
    dampening: Scalar,
    step: Tensor<Scalar> 
) -> Tensor<Scalar> {
    return dampening * (1 - pow(momentum, step)) / (1 - momentum)
}

In [35]:
//export
public struct Epsilon: HetDictKey, Equatable {
    public static var defaultValue: Float = 1e-5
}

class AdamStep: StepDelegate<Float> {
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        let debiasedLearningRate = config[LearningRate()] / debias(
            momentum: config[Momentum()],
            dampening: config[MomentumDampening()],
            step: state["step"]!
        )
        let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias(
            momentum: config[SquareMomentum()],
            dampening: config[SquareMomentumDampening()],
            step: state["step"]!
        )) + config[Epsilon()]
        param -= debiasedLearningRate * state["averageGrad"]! / debiasedRMSGrad
    }
}

In [36]:
func opt(_ model: BasicModel) -> StatefulOptimizer<BasicModel, Float> {
    return StatefulOptimizer(
        stepDelegates: [AdamStep()], 
        statDelegates: [AverageGrad(), AverageSquaredGrad(), StepCount()], 
        configs: configs,
        splits: split_func)
}

In [37]:
let learner = Learner(data: data, lossFunction: softmaxCrossEntropy, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeDefaultDelegates(metrics: [accuracy])
learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std))

In [38]:
learner.fit(2)

Epoch 0: [0.6520929, 0.7885]                                                    
Epoch 1: [0.6512625, 0.7906]                                                    
                                                                              

In [39]:
class LambStep: StepDelegate<Float> {
    override var defaultConfig: HeterogeneousDictionary {
        return HeterogeneousDictionary(Epsilon(), 1e-6, WeightDecayKey(), 0.0)
    }
    override func update(
        param: inout Tensor<Float>,
        along direction: inout Tensor<Float>,
        state: [String: Tensor<Float>],
        config: inout HeterogeneousDictionary
    ) {
        let debiasedAverageGrad = state["averageGrad"]! / debias(
            momentum: config[Momentum()],
            dampening: config[MomentumDampening()],
            step: state["step"]!
        )
        let debiasedRMSGrad = sqrt(state["averageSquaredGrad"]! / debias(
            momentum: config[SquareMomentum()],
            dampening: config[SquareMomentumDampening()],
            step: state["step"]!
        ) + config[Epsilon()])
        let step = debiasedAverageGrad / debiasedRMSGrad + config[WeightDecayKey()] * param
        let r1 = sqrt((param * param).mean())
        let r2 = sqrt((step * step).mean())
        let factor = min(r1 / r2, Float(10.0))
        param -= config[LearningRate()] * factor * step
    }
}

## Export

In [40]:
notebookToScript(fname: (Path.cwd / "09_optimizer.ipynb").string)