In [1]:
import TensorFlow

public struct CIFARExample: TensorGroup {
    public var label: Tensor<Int32>
    public var data: Tensor<Float>

    public init(label: Tensor<Int32>, data: Tensor<Float>) {
        self.label = label
        self.data = data
    }

    public init<C: RandomAccessCollection>(
        _handles: C
    ) where C.Element: _AnyTensorHandle {
        precondition(_handles.count == 2)
        let labelIndex = _handles.startIndex
        let dataIndex = _handles.index(labelIndex, offsetBy: 1)
        label = Tensor<Int32>(handle: TensorHandle<Int32>(handle: _handles[labelIndex]))
        data = Tensor<Float>(handle: TensorHandle<Float>(handle: _handles[dataIndex]))
    }
}

In [2]:
import Foundation
import TensorFlow

#if canImport(FoundationNetworking)
    import FoundationNetworking
#endif

public struct CIFAR10 {
    public let trainingDataset: Dataset<CIFARExample>
    public let testDataset: Dataset<CIFARExample>

    public init() {
        self.trainingDataset = Dataset<CIFARExample>(elements: loadCIFARTrainingFiles())
        self.testDataset = Dataset<CIFARExample>(elements: loadCIFARTestFile())
    }
}

func downloadCIFAR10IfNotPresent(to directory: String = ".") {
    let downloadPath = "\(directory)/cifar-10-batches-bin"
    let directoryExists = FileManager.default.fileExists(atPath: downloadPath)

    guard !directoryExists else { return }

    print("Downloading CIFAR dataset...")
    let archivePath = "\(directory)/cifar-10-binary.tar.gz"
    let archiveExists = FileManager.default.fileExists(atPath: archivePath)
    if !archiveExists {
        print("Archive missing, downloading...")
        do {
            let downloadedFile = try Data(
                contentsOf: URL(
                    string: "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz")!)
            try downloadedFile.write(to: URL(fileURLWithPath: archivePath))
        } catch {
            print("Could not download CIFAR dataset, error: \(error)")
            exit(-1)
        }
    }

    print("Archive downloaded, processing...")

    #if os(macOS)
        let tarLocation = "/usr/bin/tar"
    #else
        let tarLocation = "/bin/tar"
    #endif

    let task = Process()
    task.executableURL = URL(fileURLWithPath: tarLocation)
    task.arguments = ["xzf", archivePath]
    do {
        try task.run()
        task.waitUntilExit()
    } catch {
        print("CIFAR extraction failed with error: \(error)")
    }

    do {
        try FileManager.default.removeItem(atPath: archivePath)
    } catch {
        print("Could not remove archive, error: \(error)")
        exit(-1)
    }

    print("Unarchiving completed")
}

func loadCIFARFile(named name: String, in directory: String = ".") -> CIFARExample {
    downloadCIFAR10IfNotPresent(to: directory)
    let path = "\(directory)/cifar-10-batches-bin/\(name)"

    let imageCount = 10000
    guard let fileContents = try? Data(contentsOf: URL(fileURLWithPath: path)) else {
        print("Could not read dataset file: \(name)")
        exit(-1)
    }
    guard fileContents.count == 30_730_000 else {
        print(
            "Dataset file \(name) should have 30730000 bytes, instead had \(fileContents.count)")
        exit(-1)
    }

    var bytes: [UInt8] = []
    var labels: [Int64] = []

    let imageByteSize = 3073
    for imageIndex in 0..<imageCount {
        let baseAddress = imageIndex * imageByteSize
        labels.append(Int64(fileContents[baseAddress]))
        bytes.append(contentsOf: fileContents[(baseAddress + 1)..<(baseAddress + 3073)])
    }

    let labelTensor = Tensor<Int64>(shape: [imageCount], scalars: labels)
    let images = Tensor<UInt8>(shape: [imageCount, 3, 32, 32], scalars: bytes)

    // Transpose from the CIFAR-provided N(CHW) to TF's default NHWC.
    let imageTensor = Tensor<Float>(images.transposed(withPermutations: [0, 2, 3, 1]))

    let mean = Tensor<Float>([0.485, 0.456, 0.406])
    let std = Tensor<Float>([0.229, 0.224, 0.225])
    let imagesNormalized = ((imageTensor / 255.0) - mean) / std

    return CIFARExample(label: Tensor<Int32>(labelTensor), data: imagesNormalized)
}

func loadCIFARTrainingFiles() -> CIFARExample {
    let data = (1..<6).map { loadCIFARFile(named: "data_batch_\($0).bin") }
    return CIFARExample(
        label: Raw.concat(concatDim: Tensor<Int32>(0), data.map { $0.label }),
        data: Raw.concat(concatDim: Tensor<Int32>(0), data.map { $0.data })
    )
}

func loadCIFARTestFile() -> CIFARExample {
    return loadCIFARFile(named: "test_batch.bin")
}

In [3]:
import TensorFlow

// Original Paper:
// "Deep Residual Learning for Image Recognition"
// Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
// https://arxiv.org/abs/1512.03385
// using shortcut layer to connect BasicBlock layers (aka Option (B))
public enum DataKind {
    case cifar
    case imagenet
}

public struct ConvBN: Layer {
    public var conv: Conv2D<Float>
    public var norm: BatchNorm<Float>

    public init(
        filterShape: (Int, Int, Int, Int),
        strides: (Int, Int) = (1, 1),
        padding: Padding = .valid
    ) {
        self.conv = Conv2D(filterShape: filterShape, strides: strides, padding: padding)
        self.norm = BatchNorm(featureCount: filterShape.3)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return input.sequenced(through: conv, norm)
    }
}

public struct ResidualBasicBlockShortcut: Layer {
    public var layer1: ConvBN
    public var layer2: ConvBN
    public var shortcut: ConvBN

    public init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
        self.layer1 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
            strides: (2, 2),
            padding: .same)
        self.layer2 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
            strides: (1, 1),
            padding: .same)
        self.shortcut = ConvBN(
            filterShape: (1, 1, featureCounts.0, featureCounts.3),
            strides: (2, 2),
            padding: .same)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return layer2(relu(layer1(input))) + shortcut(input)
    }
}

public struct ResidualBasicBlock: Layer {
    public var layer1: ConvBN
    public var layer2: ConvBN

    public init(
        featureCounts: (Int, Int, Int, Int),
        kernelSize: Int = 3,
        strides: (Int, Int) = (1, 1)
    ) {
        self.layer1 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
            strides: strides,
            padding: .same)
        self.layer2 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.3),
            strides: strides,
            padding: .same)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        return layer2(relu(layer1(input)))
    }
}

public struct ResidualBasicBlockStack: Layer {
    public var blocks: [ResidualBasicBlock] = []

    public init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3, blockCount: Int) {
        for _ in 0..<blockCount {
            blocks += [ResidualBasicBlock(featureCounts: featureCounts, kernelSize: kernelSize)]
        }
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let blocksReduced = blocks.differentiableReduce(input) { last, layer in
            layer(last)
        }
        return blocksReduced
    }
}

public struct ResidualConvBlock: Layer {
    public var layer1: ConvBN
    public var layer2: ConvBN
    public var layer3: ConvBN
    public var shortcut: ConvBN

    public init(
        featureCounts: (Int, Int, Int, Int),
        kernelSize: Int = 3,
        strides: (Int, Int) = (2, 2)
    ) {
        self.layer1 = ConvBN(
            filterShape: (1, 1, featureCounts.0, featureCounts.1),
            strides: strides)
        self.layer2 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
            padding: .same)
        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
        self.shortcut = ConvBN(
            filterShape: (1, 1, featureCounts.0, featureCounts.3),
            strides: strides,
            padding: .same)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let tmp = relu(layer2(relu(layer1(input))))
        return relu(layer3(tmp) + shortcut(input))
    }
}

public struct ResidualIdentityBlock: Layer {
    public var layer1: ConvBN
    public var layer2: ConvBN
    public var layer3: ConvBN

    public init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
        self.layer1 = ConvBN(filterShape: (1, 1, featureCounts.0, featureCounts.1))
        self.layer2 = ConvBN(
            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
            padding: .same)
        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let tmp = relu(layer2(relu(layer1(input))))
        return relu(layer3(tmp) + input)
    }
}

public struct ResidualIdentityBlockStack: Layer {
    public var blocks: [ResidualIdentityBlock] = []

    public init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3, blockCount: Int) {
        for _ in 0..<blockCount {
            blocks += [ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)]
        }
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let blocksReduced = blocks.differentiableReduce(input) { last, layer in
            layer(last)
        }
        return blocksReduced
    }
}

public struct ResNetBasic: Layer {
    public var l1: ConvBN
    public var maxPool: MaxPool2D<Float>

    public var l2a = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
    public var l2b: ResidualBasicBlockStack

    public var l3a = ResidualBasicBlockShortcut(featureCounts: (64, 128, 128, 128))
    public var l3b: ResidualBasicBlockStack

    public var l4a = ResidualBasicBlockShortcut(featureCounts: (128, 256, 256, 256))
    public var l4b: ResidualBasicBlockStack

    public var l5a = ResidualBasicBlockShortcut(featureCounts: (256, 512, 512, 512))
    public var l5b: ResidualBasicBlockStack

    public var avgPool: AvgPool2D<Float>
    public var flatten = Flatten<Float>()
    public var classifier: Dense<Float>

    public init(dataKind: DataKind, layerBlockCounts: (Int, Int, Int, Int)) {
        switch dataKind {
        case .imagenet:
            l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
            maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
            avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
            classifier = Dense(inputSize: 512, outputSize: 1000)
        case .cifar:
            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1))  // no-op
            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
            classifier = Dense(inputSize: 512, outputSize: 10)
        }

        l2b = ResidualBasicBlockStack(
            featureCounts: (64, 64, 64, 64),
            blockCount: layerBlockCounts.0)
        l3b = ResidualBasicBlockStack(
            featureCounts: (128, 128, 128, 128),
            blockCount: layerBlockCounts.1)
        l4b = ResidualBasicBlockStack(
            featureCounts: (256, 256, 256, 256),
            blockCount: layerBlockCounts.2)
        l5b = ResidualBasicBlockStack(
            featureCounts: (512, 512, 512, 512),
            blockCount: layerBlockCounts.3)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let inputLayer = maxPool(relu(l1(input)))
        let level2 = inputLayer.sequenced(through: l2a, l2b)
        let level3 = level2.sequenced(through: l3a, l3b)
        let level4 = level3.sequenced(through: l4a, l4b)
        let level5 = level4.sequenced(through: l5a, l5b)
        return level5.sequenced(through: avgPool, flatten, classifier)
    }
}

extension ResNetBasic {
    public enum Kind {
        case resNet18
        case resNet34
    }

    public init(inputKind: Kind, dataKind: DataKind) {
        switch inputKind {
        case .resNet18:
            self.init(dataKind: dataKind, layerBlockCounts: (2, 2, 2, 2))
        case .resNet34:
            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 6, 3))
        }
    }
}

public struct ResNet: Layer {
    public var l1: ConvBN
    public var maxPool: MaxPool2D<Float>

    public var l2a = ResidualConvBlock(featureCounts: (64, 64, 64, 256), strides: (1, 1))
    public var l2b: ResidualIdentityBlockStack

    public var l3a = ResidualConvBlock(featureCounts: (256, 128, 128, 512))
    public var l3b: ResidualIdentityBlockStack

    public var l4a = ResidualConvBlock(featureCounts: (512, 256, 256, 1024))
    public var l4b: ResidualIdentityBlockStack

    public var l5a = ResidualConvBlock(featureCounts: (1024, 512, 512, 2048))
    public var l5b: ResidualIdentityBlockStack

    public var avgPool: AvgPool2D<Float>
    public var flatten = Flatten<Float>()
    public var classifier: Dense<Float>

    public init(dataKind: DataKind, layerBlockCounts: (Int, Int, Int, Int)) {
        switch dataKind {
        case .imagenet:
            l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
            maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
            avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
            classifier = Dense(inputSize: 2048, outputSize: 1000)
        case .cifar:
            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1))  // no-op
            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
            classifier = Dense(inputSize: 2048, outputSize: 10)
        }

        l2b = ResidualIdentityBlockStack(
            featureCounts: (256, 64, 64, 256),
            blockCount: layerBlockCounts.0)
        l3b = ResidualIdentityBlockStack(
            featureCounts: (512, 128, 128, 512),
            blockCount: layerBlockCounts.1)
        l4b = ResidualIdentityBlockStack(
            featureCounts: (1024, 256, 256, 1024),
            blockCount: layerBlockCounts.2)
        l5b = ResidualIdentityBlockStack(
            featureCounts: (2048, 512, 512, 2048),
            blockCount: layerBlockCounts.3)
    }

    @differentiable
    public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
        let inputLayer = maxPool(relu(l1(input)))
        let level2 = inputLayer.sequenced(through: l2a, l2b)
        let level3 = level2.sequenced(through: l3a, l3b)
        let level4 = level3.sequenced(through: l4a, l4b)
        let level5 = level4.sequenced(through: l5a, l5b)
        return level5.sequenced(through: avgPool, flatten, classifier)
    }
}

extension ResNet {
    public enum Kind {
        case resNet50
        case resNet101
        case resNet152
    }

    public init(inputKind: Kind, dataKind: DataKind) {
        switch inputKind {
        case .resNet50:
            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 6, 3))
        case .resNet101:
            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 23, 3))
        case .resNet152:
            self.init(dataKind: dataKind, layerBlockCounts: (3, 8, 36, 3))
        }
    }
}

In [1]:
import Datasets
import ImageClassificationModels
import TensorFlow

let batchSize = 100

let dataset = CIFAR10()
let testBatches = dataset.testDataset.batched(batchSize)

// Use the network sized for CIFAR-10
var model = ResNet(inputKind: .resNet50, dataKind: .cifar)

// the classic ImageNet optimizer setting diverges on CIFAR-10
// let optimizer = SGD(for: model, learningRate: 0.1, momentum: 0.9)
let optimizer = SGD(for: model, learningRate: 0.001)

print("Starting training...")
Context.local.learningPhase = .training

for epoch in 1...10 {
    var trainingLossSum: Float = 0
    var trainingBatchCount = 0
    let trainingShuffled = dataset.trainingDataset.shuffled(
        sampleCount: 50000, randomSeed: Int64(epoch))
    for batch in trainingShuffled.batched(batchSize) {
        let (labels, images) = (batch.label, batch.data)
        let (loss, gradients) = valueWithGradient(at: model) { model -> Tensor<Float> in
            let logits = model(images)
            return softmaxCrossEntropy(logits: logits, labels: labels)
        }
        trainingLossSum += loss.scalarized()
        trainingBatchCount += 1
        optimizer.update(&model, along: gradients)
    }
    var testLossSum: Float = 0
    var testBatchCount = 0
    var correctGuessCount = 0
    var totalGuessCount = 0
    for batch in testBatches {
        let (labels, images) = (batch.label, batch.data)
        let logits = model(images)
        testLossSum += softmaxCrossEntropy(logits: logits, labels: labels).scalarized()
        testBatchCount += 1

        let correctPredictions = logits.argmax(squeezingAxis: 1) .== labels
        correctGuessCount = correctGuessCount + Int(
            Tensor<Int32>(correctPredictions).sum().scalarized())
        totalGuessCount = totalGuessCount + batchSize
    }

    let accuracy = Float(correctGuessCount) / Float(totalGuessCount)
    print(
        """
          [Epoch \(epoch)] \
          Accuracy: \(correctGuessCount)/\(totalGuessCount) (\(accuracy)) \
          Loss: \(testLossSum / Float(testBatchCount))
          """
    )
}

: 