# Annealing

In [None]:
%install '.package(path: "$cwd/FastaiNotebooks")' FastaiNotebooks

Installing packages:
	.package(path: "/home/ubuntu/fastai_docs/dev_swift/FastaiNotebooks")
		FastaiNotebooks
With SwiftPM flags: []
Working in: /tmp/tmpvqpiqxhz
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 1.19s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'FastaiNotebooks' (6 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


## Load data

In [None]:
import FastaiNotebooks

In [None]:
// export
import Path
import TensorFlow

In [None]:
let data = mnistDataBunch(flat: true)

In [None]:
let (n,m) = (60000,784)
let c = 10
let nHid = 50

In [None]:
let opt = SGD<BasicModel, Float>(learningRate: 1e-2)

In [None]:
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}

In [None]:
func lossOutputWithGrad(
    model: BasicModel,
    in context: Context,
    inputs: Tensor<Float>,
    labels: Tensor<Int32>
) -> (Tensor<Float>, BasicModel.Output, BasicModel.CotangentVector) {
    var outputs: BasicModel.Output? = nil
    let (loss, grads) = model.valueWithGradient { model -> Tensor<Float> in
        let predictions = model.applied(to: inputs, in: context)
        outputs = predictions
        return softmaxCrossEntropy(logits: predictions, labels: labels)
    }
    return (loss, outputs!, grads)
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.AvgMetric(metrics: [accuracy])]

In [None]:
learner.fit(2)

Epoch 0: [0.47702354, 0.8808]
Epoch 1: [0.35481605, 0.9056]


## Annealing

We define two new callbacks: the Recorder to save track of the loss and our scheduled learning rate, and a ParamScheduler that can schedule any hyperparameter as long as it's registered in the state_dict of the optimizer. 

In [None]:
extension Learner {
    public class Recorder: Delegate {
        public var losses: [Loss] = []
        public var lrs: [O.Scalar] = []
        
        public override func batchDidFinish(learner: Learner) throws{
            if learner.inTrain{
                losses.append(learner.currentLoss)
                lrs.append(learner.optimizer.learningRate)
            }
        }
    }
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.AvgMetric(metrics: [accuracy]), Learner.Recorder()]

In [None]:
learner.fit(2)

Epoch 0: [0.4500633, 0.8821]
Epoch 1: [0.34907588, 0.9051]


In [None]:
(learner.delegates[2] as! Learner.Recorder).losses.count

1876


In [None]:
extension Learner{
    public var recorder: Learner.Recorder? {
        if let rec = learner.delegates.first(where: {d in d is Learner.Recorder}){
            return rec as! Learner.Recorder
        } else { return nil}
    }
}

In [None]:
learner.recorder!.losses.count

1876


### Progress bar

In [None]:
import Glibc
import Foundation

In [None]:
func formatTime(_ t: Float) -> String {
    let t = Int(t)
    let (h,m,s) = (t/3600, (t/60)%60, t%60)
    return h != 0 ? String(format: "%02d:%02d:%02d", h, m, s) : String(format: "%02d:%02d", m, s)
}

In [None]:
formatTime(78.23)

"01:18"


In [None]:
public struct ProgressBar{
    let total: Int
    let length: Int = 50
    let showEvery: Float = 0.02
    let fillChar: Character = "X"
    public var comment: String = ""
    private var lastVal: Int = 0
    private var waitFor: Int = 0
    private var startTime: UInt64 = 0
    private var lastShow: UInt64 = 0
    private var estimatedTotal: Float = 0.0
    private var bar: String = ""
    
    public init(_ c: Int) { total = c }
    
    public mutating func update(_ val: Int){
        if val == 0 {
            startTime = DispatchTime.now().uptimeNanoseconds
            lastShow = startTime
            waitFor = 1
            update_bar(0)
        } else if val >= lastVal + waitFor || val == total {
            lastShow = DispatchTime.now().uptimeNanoseconds
            let averageTime = Float(lastShow - startTime) / (1e9 * Float(val))
            waitFor = max(Int(averageTime / (showEvery + 1e-8)), 1)
            estimatedTotal = Float(total) * averageTime
            update_bar(val)
        }
    }
    
    public mutating func update_bar(_ val: Int){
        lastVal = val
        bar = String(repeating: fillChar, count: (val * length) / total)
        bar += String(repeating: "-", count: length - (val * length) / total)
        let pct = String(format: "%.2f", 100.0 * Float(val)/Float(total))
        let elapsedTime = Float(lastShow - startTime) / 1e9
        bar += " \(pct)% [\(val)/\(total) \(formatTime(elapsedTime))<\(formatTime(estimatedTotal))"
        bar += comment.isEmpty ? "]" : " \(comment)]"
        print(bar, terminator:"\r")
        fflush(stdout)
    }
    
    public func remove(){
        print(String(repeating: " ", count: bar.count), terminator:"\r")
        fflush(stdout)
    }
}

In [None]:
var tst = ProgressBar(100)
for i in 0...100{
    tst.update(i)
    usleep(50000)
}
tst.remove()

                                                                                

In [None]:
extension Learner {
    public class ShowProgress: Delegate {
        var pbar: ProgressBar? = nil
        var iter: Int = 0
        
        public override func epochWillStart(learner: Learner) throws{
            pbar = ProgressBar(learner.data.train.count(where: {_ in true}))
            iter = 0
            pbar!.update(iter)
        }
        
        public override func validationWillStart(learner: Learner) throws{
            if pbar != nil { pbar!.remove() }
            pbar = ProgressBar(learner.data.valid.count(where: {_ in true}))
            iter = 0
            pbar!.update(iter)
        }
        
        public override func epochDidFinish(learner: Learner) throws{
            if pbar != nil { pbar!.remove() }
        }
        
        public override func batchDidFinish(learner: Learner) throws{
            iter += 1
            pbar!.update(iter)
        }
    }
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.ShowProgress(), 
                     Learner.AvgMetric(metrics: [accuracy]), Learner.Recorder()]

In [None]:
learner.fit(2)

Epoch 0: [0.44530797, 0.8864]                                                   
Epoch 1: [0.3450528, 0.9055]                                                    


## Add Callbacks

The code below adds callbacks and defines a new training loop.

In [None]:
/// Simple SGD optimizer with a modifiable learning rate.
public class SettableSGD<Model: Layer>: Optimizer
    where Model.AllDifferentiableVariables == Model.CotangentVector {
    /// The learning rate.
    public var learningRate: Float {
        willSet(newLearningRate) {
            precondition(newLearningRate >= 0, "Learning rate must be non-negative")
        }
    }

    public init(learningRate: Float = 0.01) {
        precondition(learningRate >= 0, "Learning rate must be non-negative")
        self.learningRate = learningRate
    }

    public func update(_ model: inout Model.AllDifferentiableVariables,
                       along direction: Model.CotangentVector) {
        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Scalar>.self) {
            model[keyPath: kp] += learningRate * direction[keyPath: kp]
        }
    }
}


In [None]:
let foo = SettableSGD<Dense<Float>>()

In [None]:
foo.learningRate

In [None]:
foo.learningRate = 0.2

In [None]:
foo.learningRate

In [None]:
/// A non-generalized learning rate scheduler
class LearningRateScheduler<M, O: SettableSGD<M>>: TrainingCallbacks<M, O, Float>
    where O.Model == M,
          M.Input == Tensor<Float>, M.Output == Tensor<Float> {
    
    // A learning rate schedule from step to float.
    typealias ScheduleFunc = (Int) -> Float

    private var optimizer: O?
    private let scheduler: ScheduleFunc
    private var step = 0
    
    init(scheduler: @escaping ScheduleFunc) {
        self.scheduler = scheduler
    }

    override func beforeTrain(model: inout M, optimizer: inout O) -> CallbackResult {
        self.optimizer = optimizer
        return .proceed
    }
              
    override func beforeBatch() -> CallbackResult {
        step += 1
        self.optimizer!.learningRate = scheduler(step)
        return .proceed
    }
    
}

In [None]:
class SequentialCallbacks< M, O: Optimizer, S>: TrainingCallbacks<M, O, S>
    where O.Model == M, O.Scalar == S,
          M.Input == Tensor<S>, M.Output == Tensor<S> {
    
    private let callbacks: [TrainingCallbacks<M, O, S>]

    init(_ callbacks: [TrainingCallbacks<M, O, S>]) {
        self.callbacks = callbacks
    }
    convenience init(_ callbacks: TrainingCallbacks<M, O, S>...) {
        self.init(callbacks)
    }
              
    override func beforeTrain(model: inout M, optimizer: inout O) -> CallbackResult {
        for cb in callbacks {
            let cbResult = cb.beforeTrain(model: &model, optimizer: &optimizer)
            switch cbResult {
                case .stop, .skip: return cbResult
                case .proceed: break
            }
        }
        return .proceed
    }
    
    // TODO: Figure out what to pass here!
    override func beforeBatch() -> CallbackResult {
        for cb in callbacks {
            let cbResult = cb.beforeBatch()
            switch cbResult {
                case .stop, .skip: return cbResult
                case .proceed: break
            }
        }
        return .proceed
    }
    
    override func afterBatch(loss: inout Tensor<S>) -> CallbackResult {
        for cb in callbacks {
            let cbResult = cb.afterBatch(loss: &loss)
            switch cbResult {
                case .stop, .skip: return cbResult
                case .proceed: break
            }
        }
        return .proceed
    }
}

In [None]:
/// A training loop, now improved with callbacks!
public func trainWithCallbacks<M, O: Optimizer, S>(
    _ model: inout M,
    at variablesKeyPath: WritableKeyPath<M, M.AllDifferentiableVariables>,
    on dataset: Dataset<Example<S, S>>,
    using optimizer: inout O,
    loss: @escaping @differentiable (Tensor<S>, Tensor<S>) -> Tensor<S>,
    callbacks: TrainingCallbacks<M, O, S>
) where O.Model == M, O.Scalar == S,
        M.Input == Tensor<S>, M.Output == Tensor<S>
{
    let context = Context(learningPhase: .training)
    callbacks.beforeTrain(model: &model, optimizer: &optimizer)
    for batch in dataset {
        callbacks.beforeBatch()  // TODO: pass in batch!
        let (x, y) = (batch.data, batch.labels)
        var (loss, (𝛁model, _)) = model.valueWithGradient(at: y) { (model, y) -> Tensor<S> in
            let preds = model.applied(to: x, in: context)
            return loss(preds, y)
        }
        callbacks.afterBatch(loss: &loss)
        print(loss)
        optimizer.update(&model[keyPath: variablesKeyPath], along: 𝛁model)
    }
}