In [1]:
%install-location $cwd/swift-install
%install-swiftpm-flags -c release
%install '.package(url: "https://github.com/tensorflow/swift-models", .branch("master"))' Batcher ModelSupport Datasets

Installing packages:
	.package(url: "https://github.com/tensorflow/swift-models", .branch("master"))
		Batcher
		ModelSupport
		Datasets
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmp2cddk25u/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [0]:
import Foundation
import TensorFlow
import Datasets

# Dataset

In [0]:
extension Sequence where Element : Collection {
    subscript(column column : Element.Index) -> [ Element.Iterator.Element ] {
        return map { $0[ column ] }
    }
}
extension Sequence where Iterator.Element: Hashable {
    func unique() -> [Iterator.Element] {
        var seen: Set<Iterator.Element> = []
        return filter { seen.insert($0).inserted }
    }
}

In [4]:
print(_ExecutionContext.global.deviceNames)

["/job:localhost/replica:0/task:0/device:CPU:0", "/job:localhost/replica:0/task:0/device:XLA_CPU:0", "/job:localhost/replica:0/task:0/device:GPU:0", "/job:localhost/replica:0/task:0/device:XLA_GPU:0"]


In [0]:
public struct MovieLens {

    public let users: [Float]
    public let items: [Float]
    public let num_users: Int
    public let num_items: Int
    public let user_item_rating: [TensorPair<Int32,Float>]
    public let rating: [Float]
    public let user2id: [Float:Int]
    public let id2user: [Int:Float]
    public let item2id: [Float:Int]
    public let id2item: [Int:Float]
    public let neg_sampling: Tensor<Float>

    static func downloadMovieLensDatasetIfNotPresent() -> String{
        let localURL = URL(fileURLWithPath: FileManager.default.currentDirectoryPath)
        let dataFolder = DatasetUtilities.downloadResource(
            filename: "ml-100k",
            fileExtension: "zip",
            remoteRoot: URL(string: "http://files.grouplens.org/datasets/movielens/")!,
            localStorageDirectory: localURL.appendingPathComponent("data/", isDirectory: true))

        return try! String(contentsOf: dataFolder.appendingPathComponent("u1.base"), encoding: .utf8)}

    public init() {
        let dataFiles  = MovieLens.downloadMovieLensDatasetIfNotPresent()
        let data: [[Float]] = dataFiles.split(separator: "\n").map{ String($0).split(separator: "\t").compactMap{ Float(String($0)) } }

        // let data = datad[0...5000]
        let users = data[column: 0].unique()
        let items = data[column: 1].unique()
        let rating = data[column: 2]

        let user_index = 0...users.count-1
        let user2id = Dictionary(uniqueKeysWithValues: zip(users,user_index))
        let id2user = Dictionary(uniqueKeysWithValues: zip(user_index,users))

        let item_index = 0...items.count-1
        let item2id = Dictionary(uniqueKeysWithValues: zip(items,item_index))
        let id2item = Dictionary(uniqueKeysWithValues: zip(item_index,items))

        var neg_sampling = Tensor<Float>(zeros: [users.count,items.count])

        var dataset:[TensorPair<Int32,Float>] = []

        for element in data{
            let u_index = user2id[element[0]]!
            let i_index = item2id[element[1]]!
            let rating = element[2]
            if (rating > 0){
              neg_sampling[u_index][i_index] = Tensor(1.0)
            }
        }

        for element in data{
            let u_index = user2id[element[0]]!
            let i_index = item2id[element[1]]!

            let x = Tensor<Int32>([Int32(u_index), Int32(i_index)])
            dataset.append(TensorPair<Int32, Float>(first:x, second: [1]))

            for i in 0...3{
              var i_index = Int.random(in:item_index)
              while(neg_sampling[u_index][i_index].scalarized() == 1.0){
                i_index = Int.random(in:item_index)
              }
              let x = Tensor<Int32>([Int32(u_index), Int32(i_index)])
              dataset.append(TensorPair<Int32, Float>(first: x, second: [0]))
            }
        }

        self.num_users = users.count
        self.num_items = items.count
        self.users = users
        self.items = items
        self.rating = rating
        self.user2id = user2id
        self.id2user = id2user
        self.item2id = item2id
        self.id2item = id2item
        self.user_item_rating = dataset
        self.neg_sampling = neg_sampling
    }
}

# Model

In [0]:
public struct NeuMF: Module {

    public typealias Scalar = Float

    @noDerivative public let num_users: Int
    @noDerivative public let num_items: Int
    @noDerivative public let mf_dim: Int
    @noDerivative public let mf_reg: Scalar
    @noDerivative public var mlp_layer_sizes : [Int] = [64,32,16,8]
    @noDerivative public var mlp_layer_regs: [Scalar] = [0,0,0,0]

    public var mf_user_embed: Embedding<Scalar>
    public var mf_item_embed: Embedding<Scalar>
    public var mlp_user_embed: Embedding<Scalar>
    public var mlp_item_embed: Embedding<Scalar>
    public var dense1: Dense<Scalar>
    public var dense2: Dense<Scalar>
    public var dense3: Dense<Scalar>
    public var final_dense: Dense<Scalar>


    public init(
        num_users: Int,
        num_items: Int,
        mf_dim: Int,
        mf_reg: Float,
        mlp_layer_sizes: [Int],
        mlp_layer_regs: [Float]
    ) {
        self.num_users = num_users
        self.num_items = num_items
        self.mf_dim = mf_dim
        self.mf_reg = mf_reg
        self.mlp_layer_sizes = mlp_layer_sizes
        self.mlp_layer_regs = mlp_layer_regs

        // precondition(self.mlp_layer_sizes[0]%2 == 0, "u dummy, mlp_layer_sizes[0] % 2 != 0")
        // precondition(self.mlp_layer_sizes.count == self.mlp_layer_regs.count, "u dummy, layer_sizes != layer_regs!")

        //TODO: regularization
        self.mf_user_embed = Embedding<Scalar>(vocabularySize: self.num_users, embeddingSize: self.mf_dim)
        self.mf_item_embed = Embedding<Scalar>(vocabularySize: self.num_items, embeddingSize: self.mf_dim)
        self.mlp_user_embed = Embedding<Scalar>(vocabularySize: self.num_users, embeddingSize: self.mlp_layer_sizes[0]/2)
        self.mlp_item_embed = Embedding<Scalar>(vocabularySize: self.num_items, embeddingSize: self.mlp_layer_sizes[0]/2)

        //TODO: Extend it for n layers by using for loop
        //Currently only for 3 layers
        dense1 = Dense(inputSize: self.mlp_layer_sizes[0], outputSize: self.mlp_layer_sizes[1], activation: relu)
        dense2 = Dense(inputSize: self.mlp_layer_sizes[1], outputSize: self.mlp_layer_sizes[2], activation: relu)
        dense3 = Dense(inputSize: self.mlp_layer_sizes[2], outputSize: self.mlp_layer_sizes[3], activation: relu)
        final_dense = Dense(inputSize: (self.mlp_layer_sizes[3] + self.mf_dim), outputSize: 1)
    }
        @differentiable
        public func callAsFunction(_ input: Tensor<Int32>) -> Tensor<Scalar>{
            let user_indices = input.unstacked(alongAxis:1)[0]
            let item_indices = input.unstacked(alongAxis:1)[1]

            let user_embed_mlp = self.mlp_user_embed(user_indices)
            let item_embed_mlp = self.mlp_item_embed(item_indices)
            let user_embed_mf = self.mf_user_embed(user_indices)
            let item_embed_mf = self.mf_item_embed(item_indices)

            // let mf_vector = matmul(user_embed_mf,item_embed_mf)
            let mf_vector = user_embed_mf*item_embed_mf
            var mlp_vector = user_embed_mlp.concatenated(with:item_embed_mlp,alongAxis:-1)
            //
            // print(mlp_vector.shape)
            mlp_vector = mlp_vector.sequenced(through: dense1, dense2, dense3)
            let vector = mlp_vector.concatenated(with:mf_vector,alongAxis:-1)

            return final_dense(vector)
            // return mf_vector
        }
    // }
}


# Training

In [0]:
import Batcher

In [8]:
let dataset = MovieLens()

Loading resource: ml-100k


In [0]:
let num_users = dataset.num_users
let num_items = dataset.num_items

In [10]:
print("Number of datapoints", dataset.user_item_rating.count)
print("Number of users", num_users)
print("Number of items", num_items)

Number of datapoints 400000
Number of users 943
Number of items 1650


In [0]:
let batcher = Batcher(on: dataset.user_item_rating, batchSize: 1024, shuffle: true)

In [0]:
var size:[Int] = [64, 32, 16, 8]
var regs:[Float] = [0, 0, 0, 0]
var model = NeuMF(num_users: num_users, num_items: num_items, mf_dim: 8, mf_reg: 0.0, mlp_layer_sizes: size, mlp_layer_regs: regs)

In [0]:
let optimizer = Adam(for: model, learningRate: 0.001)

In [14]:
for epoch in 1...60{
    var avg_loss: Float = 0.0
    Context.local.learningPhase = .training
    for data in batcher.sequenced(){
            let user_id = data.first
            let rating = data.second
            let (loss, grad) = valueWithGradient(at: model){model -> Tensor<Float> in
            let logits = model(user_id)
            return sigmoidCrossEntropy(logits: logits, labels: rating)}

            optimizer.update(&model, along: grad)
            avg_loss = avg_loss + loss.scalarized()
    }
    print("Epoch: \(epoch)", "Current loss: \(avg_loss)")
}


Epoch: 1 Current loss: 173.22163
Epoch: 2 Current loss: 144.26732
Epoch: 3 Current loss: 142.3186
Epoch: 4 Current loss: 141.08649
Epoch: 5 Current loss: 140.19893
Epoch: 6 Current loss: 139.49968
Epoch: 7 Current loss: 138.74817
Epoch: 8 Current loss: 138.0404
Epoch: 9 Current loss: 137.13118
Epoch: 10 Current loss: 135.83781
Epoch: 11 Current loss: 134.2333
Epoch: 12 Current loss: 131.41345
Epoch: 13 Current loss: 127.53943
Epoch: 14 Current loss: 123.29612
Epoch: 15 Current loss: 119.51312
Epoch: 16 Current loss: 116.1993
Epoch: 17 Current loss: 113.30507
Epoch: 18 Current loss: 110.635925
Epoch: 19 Current loss: 108.21609
Epoch: 20 Current loss: 106.15353
Epoch: 21 Current loss: 104.150635
Epoch: 22 Current loss: 102.30159
Epoch: 23 Current loss: 100.48774
Epoch: 24 Current loss: 98.76922
Epoch: 25 Current loss: 97.07339
Epoch: 26 Current loss: 95.74557
Epoch: 27 Current loss: 94.33093
Epoch: 28 Current loss: 92.942955
Epoch: 29 Current loss: 91.69779
Epoch: 30 Current loss: 90.279

## Testing the result and then verifying it on Test dataset

In [227]:
var item:[Int] = []
var output:[Float] = []
let user: Float = 150.0

let user_index = dataset.user2id[user]!
print(user_index)
for i in 0...num_items-1{
  if dataset.neg_sampling[user_index][i].scalarized() == 0{
    var input =  Tensor<Int32>(shape: [1, 2],scalars: [Int32(user_index),Int32(i)])
    output.append(model(input).scalarized())
    item.append(i)
  }
}

149


In [0]:
var item_score = Dictionary(uniqueKeysWithValues: zip(item,output))

In [0]:
let sortedByValueDictionary = item_score.sorted { $0.1 > $1.1 }

In [0]:
let first10 = sortedByValueDictionary.prefix(10)

In [231]:
for (key,_) in first10{
  // print(key)
  print(dataset.id2item[key]!)
}

50.0
98.0
69.0
222.0
117.0
1.0
181.0
258.0
7.0
237.0


In [0]:
// let x = exp(Tensor(output))/(exp(Tensor(output)).sum())

In [164]:
// x.argmax()
item[194]

194


In [173]:
dataset.id2item[669]!

845.0


In [40]:
x.argmin()

1308


In [0]:
item[70]

0
