# Import

In [1]:
// %install-swiftpm-flags -c release
// %install '.package(url: "https://github.com/JacopoMangiavacchi/SwiftCoreMLTools.git", from: "0.0.5")' SwiftCoreMLTools
// %install '.package(url: "https://github.com/dduan/Just.git", from: "0.8.0")' Just

In [2]:
import Foundation
import TensorFlow
// import SwiftCoreMLTools
// import Just

# Data Download

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per ten thousand dollars
        - PTRATIO  pupil-teacher ratio by town
        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
        - LSTAT    % lower status of the population
        - MEDV     Median value of owner-occupied homes in a thousand dollar

    :Missing Attribute Values: None

    :Creator: Harrison, D. and Rubinfeld, D.L.

This is a copy of UCI ML housing dataset.
https://archive.ics.uci.edu/ml/machine-learning-databases/housing/


In [3]:
// if let cts = Just.get(URL(string: "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")!).content {
//     try! cts.write(to: URL(fileURLWithPath:"../data/housing.csv"))
// }

# Data Ingestion

In [4]:
let data = try String(contentsOfFile:"./data/housing.csv", encoding: String.Encoding.utf8)
let dataRecords: [[Float]] = data.split(separator: "\n").map{ String($0).split(separator: " ").compactMap{ Float(String($0)) } }

let numRecords = dataRecords.count
let numColumns = dataRecords[0].count

let dataFeatures = dataRecords.map{ Array($0[0..<numColumns-1]) }
let dataLabels = dataRecords.map{ Array($0[(numColumns-1)...]) }

# Data Transformation

## Split Numerical Categorical Features

In [5]:
let categoricalColumns = [3, 8]
let numericalColumns = [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12]
let numCategoricalFeatures = categoricalColumns.count
let numNumericalFeatures = numericalColumns.count
let numLabels = 1

assert(numColumns == numCategoricalFeatures + numNumericalFeatures + 1)

// Get Categorical Features
let allCategoriesValues = dataFeatures.map{ row in categoricalColumns.map{ Int32(row[$0]) } }
                                .reduce(into: Array(repeating: [Int32](), count: 2)){ total, value in
                                    total[0].append(value[0])
                                    total[1].append(value[1]) }
                                .map{ Set($0).sorted() }

let embeddingSizes = allCategoriesValues.map{ $0.count }

let categoricalFeatures = dataFeatures.map{ row in categoricalColumns.map{ Int32(row[$0]) } }
let oneHotCategoricalFeatures:[[[Int32]]] = categoricalFeatures.map{ catArray in
    var oneHotArray = [[Int32]]()
    
    for i in 0..<catArray.count {
        var oneHot = Array(repeating: Int32(0), count: allCategoriesValues[i].count)
        if let pos = allCategoriesValues[i].firstIndex(where: { $0 == catArray[i] }){
            oneHot[pos] = 1
        }
        oneHotArray.append(oneHot)
    }
    
    return oneHotArray
}

// Get Numerical Features
let numericalFeatures = dataFeatures.map{ row in numericalColumns.map{ row[$0] } }

## Split Train and Test

In [6]:
let trainPercentage:Float = 0.8
let numTrainRecords = Int(ceil(Float(numRecords) * trainPercentage))
let numTestRecords = numRecords - numTrainRecords

func matrixTranspose<T>(_ matrix: [[T]]) -> [[T]] {
    if matrix.isEmpty {return matrix}
    var result = [[T]]()
    for index in 0..<matrix.first!.count {
        result.append(matrix.map{$0[index]})
    }
    return result
}

let xCategoricalAllTrain = matrixTranspose(Array(oneHotCategoricalFeatures[0..<numTrainRecords])).map{ Array($0.joined()) }
let xCategoricalAllTest = matrixTranspose(Array(oneHotCategoricalFeatures[numTrainRecords...])).map{ Array($0.joined()) }
let xNumericalAllTrain = Array(Array(numericalFeatures[0..<numTrainRecords]).joined())
let xNumericalAllTest = Array(Array(numericalFeatures[numTrainRecords...]).joined())
let yAllTrain = Array(Array(dataLabels[0..<numTrainRecords]).joined())
let yAllTest = Array(Array(dataLabels[numTrainRecords...]).joined())

let XCategoricalTrain = xCategoricalAllTrain.enumerated().map{ (offset, element) in 
    Tensor<Int32>(element).reshaped(to: TensorShape([numTrainRecords, embeddingSizes[offset]]))
}
let XCategoricalTest = xCategoricalAllTest.enumerated().map{ (offset, element) in 
    Tensor<Int32>(element).reshaped(to: TensorShape([numTestRecords, embeddingSizes[offset]]))
}
let XNumericalTrainDeNorm = Tensor<Float>(xNumericalAllTrain).reshaped(to: TensorShape([numTrainRecords, numNumericalFeatures]))
let XNumericalTestDeNorm = Tensor<Float>(xNumericalAllTest).reshaped(to: TensorShape([numTestRecords, numNumericalFeatures]))
let YTrain = Tensor<Float>(yAllTrain).reshaped(to: TensorShape([numTrainRecords, numLabels]))
let YTest = Tensor<Float>(yAllTest).reshaped(to: TensorShape([numTestRecords, numLabels]))

## Normalize Numerical Features

In [7]:
let mean = XNumericalTrainDeNorm.mean(alongAxes: 0)
let std = XNumericalTrainDeNorm.standardDeviation(alongAxes: 0)

print(mean, std)

[[ 2.0137098,  14.197531,   9.523555, 0.53213036,  6.3311296,   64.47929,  4.1678762,  353.68396,
    18.03163,  379.84735,  11.394517]] [[ 6.5076075,  25.258776,   6.534038, 0.11449408,  0.7311985,  29.000755,  2.1797554,  132.14561,
    2.217345,  40.494495,   6.852825]]


In [8]:
let XNumericalTrain = (XNumericalTrainDeNorm - mean)/std
let XNumericalTest = (XNumericalTestDeNorm - mean)/std

In [9]:
print("Training shapes \(XNumericalTrain.shape) \(XCategoricalTrain[0].shape) \(XCategoricalTrain[1].shape) \(YTrain.shape)")
print("Testing shapes  \(XNumericalTest.shape) \(XCategoricalTest[0].shape) \(XCategoricalTest[1].shape) \(YTest.shape)")

Training shapes [405, 11] [405, 2] [405, 9] [405, 1]
Testing shapes  [101, 11] [101, 2] [101, 9] [101, 1]


# Model

In [10]:
struct MultiInputs<N: Differentiable, C>: Differentiable {
  var numerical: N
  
  @noDerivative
  var categorical: C

  @differentiable
  init(numerical: N, categorical: C) {
    self.numerical = numerical
    self.categorical = categorical
  }
}

struct RegressionModel: Module {
//     var numericalLayer = Dense<Float>(inputSize: 11, outputSize: 32, activation: relu)
    var embedding1 = Embedding<Float>(vocabularySize: 2, embeddingSize: 2)
    var embedding2 = Embedding<Float>(vocabularySize: 9, embeddingSize: 5)
//     var embeddingLayer = Dense<Float>(inputSize: (4 + 45), outputSize: 64, activation: relu)
//     var allInputConcatLayer = Dense<Float>(inputSize: (32 + 64), outputSize: 128, activation: relu)
    var allInputConcatLayer = Dense<Float>(inputSize: (11 + 4 + 45), outputSize: 128, activation: relu)
    var hiddenLayer = Dense<Float>(inputSize: 128, outputSize: 32, activation: relu)
    var outputLayer = Dense<Float>(inputSize: 32, outputSize: 1)
    
    @differentiable
    func callAsFunction(_ input: MultiInputs<[Tensor<Float>], [Tensor<Int32>]>) -> Tensor<Float> {
//         let numericalInput = numericalLayer(input.numerical[0])
        let embeddingOutput1 = embedding1(input.categorical[0])
        let embeddingOutput1Reshaped = embeddingOutput1.reshaped(to: 
            TensorShape([embeddingOutput1.shape[0], embeddingOutput1.shape[1] * embeddingOutput1.shape[2]]))
        let embeddingOutput2 = embedding2(input.categorical[1])
        let embeddingOutput2Reshaped = embeddingOutput2.reshaped(to: 
            TensorShape([embeddingOutput2.shape[0], embeddingOutput2.shape[1] * embeddingOutput2.shape[2]]))
//         let embeddingConcat = Tensor<Float>(concatenating: [embeddingOutput1Reshaped, embeddingOutput2Reshaped], alongAxis: 1)
//         let embeddingInput = embeddingLayer(embeddingConcat)
//         let allConcat = Tensor<Float>(concatenating: [numericalInput, embeddingInput], alongAxis: 1)
        let allConcat = Tensor<Float>(concatenating: [input.numerical[0], embeddingOutput1Reshaped, embeddingOutput2Reshaped], alongAxis: 1)
        return allConcat.sequenced(through: allInputConcatLayer, hiddenLayer, outputLayer)
    }
}

var model = RegressionModel()

# Training

In [11]:
let optimizer = RMSProp(for: model, learningRate: 0.001)
Context.local.learningPhase = .training

In [12]:
let epochCount = 500
let batchSize = 32
let numberOfBatch = Int(ceil(Double(numTrainRecords) / Double(batchSize)))
let shuffle = true

func mae(predictions: Tensor<Float>, truths: Tensor<Float>) -> Float {
    return abs(Tensor<Float>(predictions - truths)).mean().scalarized()
}

In [13]:
for epoch in 1...epochCount {
    var epochLoss: Float = 0
    var epochMAE: Float = 0
    var batchCount: Int = 0
    var batchArray = Array(repeating: false, count: numberOfBatch)
    for batch in 0..<numberOfBatch {
        var r = batch
        if shuffle {
            while true {
                r = Int.random(in: 0..<numberOfBatch)
                if !batchArray[r] {
                    batchArray[r] = true
                    break
                }
            }
        }
        
        let batchStart = r * batchSize
        let batchEnd = min(numTrainRecords, batchStart + batchSize)
        let (loss, grad) = model.valueWithGradient { (model: RegressionModel) -> Tensor<Float> in
            let multiInput = MultiInputs(numerical: [XNumericalTrain[batchStart..<batchEnd]],
                                         categorical: [XCategoricalTrain[0][batchStart..<batchEnd],
                                                       XCategoricalTrain[1][batchStart..<batchEnd]])
            let logits = model(multiInput)
            return meanSquaredError(predicted: logits, expected: YTrain[batchStart..<batchEnd])
        }
        optimizer.update(&model, along: grad)
        
        let multiInput = MultiInputs(numerical: [XNumericalTrain[batchStart..<batchEnd]],
                                     categorical: [XCategoricalTrain[0][batchStart..<batchEnd],
                                                   XCategoricalTrain[1][batchStart..<batchEnd]])
        let logits = model(multiInput)
        epochMAE += mae(predictions: logits, truths: YTrain[batchStart..<batchEnd])
        epochLoss += loss.scalarized()
        batchCount += 1
    }
    epochMAE /= Float(batchCount)
    epochLoss /= Float(batchCount)

    print("Epoch \(epoch): MSE: \(epochLoss), MAE: \(epochMAE)")
}

Epoch 1: MSE: 502.0456, MAE: 19.801373
Epoch 2: MSE: 234.11732, MAE: 11.567073
Epoch 3: MSE: 92.90376, MAE: 6.2636995
Epoch 4: MSE: 65.79809, MAE: 5.104972
Epoch 5: MSE: 55.88944, MAE: 4.8351364
Epoch 6: MSE: 51.318428, MAE: 4.2889614
Epoch 7: MSE: 45.5352, MAE: 4.097953
Epoch 8: MSE: 40.751522, MAE: 3.595095
Epoch 9: MSE: 38.35515, MAE: 3.5714211
Epoch 10: MSE: 34.48596, MAE: 3.397207
Epoch 11: MSE: 29.352333, MAE: 3.2578378
Epoch 12: MSE: 29.699827, MAE: 3.0485804
Epoch 13: MSE: 27.916904, MAE: 3.0622892
Epoch 14: MSE: 28.149239, MAE: 3.0177972
Epoch 15: MSE: 25.466373, MAE: 2.9360785
Epoch 16: MSE: 25.76882, MAE: 2.9714775
Epoch 17: MSE: 23.245653, MAE: 2.8534093
Epoch 18: MSE: 25.04813, MAE: 2.8597603
Epoch 19: MSE: 24.774015, MAE: 2.858098
Epoch 20: MSE: 23.56269, MAE: 2.854866
Epoch 21: MSE: 23.217655, MAE: 2.803007
Epoch 22: MSE: 22.488848, MAE: 2.8177464
Epoch 23: MSE: 22.359364, MAE: 2.7257943
Epoch 24: MSE: 22.495338, MAE: 2.7968454
Epoch 25: MSE: 22.431416, MAE: 2.7096899
Ep

Epoch 204: MSE: 5.8629656, MAE: 1.5529401
Epoch 205: MSE: 4.417165, MAE: 1.4902012
Epoch 206: MSE: 5.704977, MAE: 1.4926358
Epoch 207: MSE: 5.9223166, MAE: 1.6661497
Epoch 208: MSE: 5.144955, MAE: 1.4987527
Epoch 209: MSE: 4.945585, MAE: 1.4947363
Epoch 210: MSE: 5.53083, MAE: 1.5694433
Epoch 211: MSE: 5.7050476, MAE: 1.505933
Epoch 212: MSE: 4.429266, MAE: 1.4932778
Epoch 213: MSE: 4.484904, MAE: 1.5041156
Epoch 214: MSE: 5.63726, MAE: 1.6001498
Epoch 215: MSE: 4.7366033, MAE: 1.4974208
Epoch 216: MSE: 4.743142, MAE: 1.5536658
Epoch 217: MSE: 4.5767946, MAE: 1.5780773
Epoch 218: MSE: 4.9849277, MAE: 1.5846045
Epoch 219: MSE: 5.520872, MAE: 1.4766389
Epoch 220: MSE: 5.373663, MAE: 1.5041344
Epoch 221: MSE: 4.6598024, MAE: 1.536872
Epoch 222: MSE: 5.2643347, MAE: 1.5900631
Epoch 223: MSE: 4.2423515, MAE: 1.4604989
Epoch 224: MSE: 6.490338, MAE: 1.5707607
Epoch 225: MSE: 4.6870074, MAE: 1.4319619
Epoch 226: MSE: 4.6928105, MAE: 1.4971993
Epoch 227: MSE: 4.9387407, MAE: 1.6457143
Epoch 22

Epoch 403: MSE: 3.4096918, MAE: 1.2548369
Epoch 404: MSE: 2.9269202, MAE: 1.244082
Epoch 405: MSE: 3.844939, MAE: 1.488626
Epoch 406: MSE: 2.9949586, MAE: 1.2515607
Epoch 407: MSE: 3.779837, MAE: 1.3573115
Epoch 408: MSE: 3.4122252, MAE: 1.2713642
Epoch 409: MSE: 2.892384, MAE: 1.3365561
Epoch 410: MSE: 3.2657328, MAE: 1.3454946
Epoch 411: MSE: 2.6580122, MAE: 1.2607694
Epoch 412: MSE: 3.0445685, MAE: 1.5545933
Epoch 413: MSE: 3.7501073, MAE: 1.2351229
Epoch 414: MSE: 3.8364525, MAE: 1.3822888
Epoch 415: MSE: 2.7059512, MAE: 1.1257997
Epoch 416: MSE: 3.18781, MAE: 1.292966
Epoch 417: MSE: 3.3766801, MAE: 1.3980644
Epoch 418: MSE: 3.3134542, MAE: 1.3437563
Epoch 419: MSE: 2.9553287, MAE: 1.2430589
Epoch 420: MSE: 3.4636464, MAE: 1.3513833
Epoch 421: MSE: 2.9403396, MAE: 1.3400298
Epoch 422: MSE: 3.486006, MAE: 1.3258051
Epoch 423: MSE: 4.116498, MAE: 1.2646537
Epoch 424: MSE: 2.6261592, MAE: 1.1723425
Epoch 425: MSE: 2.8367643, MAE: 1.3372365
Epoch 426: MSE: 3.361268, MAE: 1.4404712
Epo

# Test

In [14]:
Context.local.learningPhase = .inference

let multiInputTest = MultiInputs(numerical: [XNumericalTest],
                                 categorical: [XCategoricalTest[0],
                                               XCategoricalTest[1]])

let prediction = model(multiInputTest)

let predictionMse = meanSquaredError(predicted: prediction, expected: YTest).scalarized()/Float(numTestRecords)
let predictionMae = mae(predictions: prediction, truths: YTest)/Float(numTestRecords)

print("MSE: \(predictionMse), MAE: \(predictionMae)")

MSE: 0.39759186, MAE: 0.044550754


# Export

In [15]:
// print(model.layer1.weight.shape, model.layer2.weight.shape, model.layer3.weight.shape)
// print(model.layer1.bias.shape, model.layer2.bias.shape, model.layer3.bias.shape)

In [16]:
// let coremlModel = Model(version: 4,
//                         shortDescription: "Regression",
//                         author: "Jacopo Mangiavacchi",
//                         license: "MIT",
//                         userDefined: ["SwiftCoremltoolsVersion" : "0.0.3"]) {
//     Input(name: "input", shape: [13])
//     Output(name: "output", shape: [1])
//     NeuralNetwork {
//         InnerProduct(name: "dense1",
//                      input: ["input"],
//                      output: ["outDense1"],
//                      weight: model.layer1.weight.transposed().flattened().scalars,
//                      bias: model.layer1.bias.flattened().scalars,
//                      inputChannels: 13,
//                      outputChannels: 64)
//         ReLu(name: "Relu1",
//              input: ["outDense1"],
//              output: ["outRelu1"])
//         InnerProduct(name: "dense2",
//                      input: ["outRelu1"],
//                      output: ["outDense2"],
//                      weight: model.layer2.weight.transposed().flattened().scalars,
//                      bias: model.layer2.bias.flattened().scalars,
//                      inputChannels: 64,
//                      outputChannels: 32)
//         ReLu(name: "Relu2",
//              input: ["outDense2"],
//              output: ["outRelu2"])
//         InnerProduct(name: "dense3",
//                      input: ["outRelu2"],
//                      output: ["output"],
//                      weight: model.layer3.weight.transposed().flattened().scalars,
//                      bias: model.layer3.bias.flattened().scalars,
//                      inputChannels: 32,
//                      outputChannels: 1)
//     }
// }

In [17]:
// let coreMLData = coremlModel.coreMLData
// try! coreMLData!.write(to: URL(fileURLWithPath: "../model/s4tf_train_model.mlmodel"))