In [1]:
%install-location $cwd/swift-install
%install-swiftpm-flags -c release
%install '.package(url: "https://github.com/tensorflow/swift-models", .branch("master"))' Batcher ModelSupport Datasets

Installing packages:
	.package(url: "https://github.com/tensorflow/swift-models", .branch("master"))
		Batcher
		ModelSupport
		Datasets
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpb19wsmuk/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [0]:
import Foundation
import ModelSupport
import TensorFlow
import Datasets

# Dataset

In [0]:
extension Sequence where Element : Collection {
    subscript(column column : Element.Index) -> [ Element.Iterator.Element ] {
        return map { $0[ column ] }
    }
}
extension Sequence where Iterator.Element: Hashable {
    func unique() -> [Iterator.Element] {
        var seen: Set<Iterator.Element> = []
        return filter { seen.insert($0).inserted }
    }
}

In [0]:
public struct MovieLens {

    public let users: [Float]
    public let items: [Float]
    public let num_users: Int
    public let num_items: Int
    public let user_item_rating: [[Int]]
    public let rating: [Float]
    public let user2id: [Float:Int]
    public let id2user: [Int:Float]
    public let item2id: [Float:Int]
    public let id2item: [Int:Float]
    public let neg_sampling: Tensor<Float>

    static func downloadMovieLensDatasetIfNotPresent() -> String{
        let localURL = URL(fileURLWithPath: FileManager.default.currentDirectoryPath)
        let dataFolder = DatasetUtilities.downloadResource(
            filename: "ml-100k",
            fileExtension: "zip",
            remoteRoot: URL(string: "http://files.grouplens.org/datasets/movielens/")!,
            localStorageDirectory: localURL.appendingPathComponent("data/", isDirectory: true))

        return try! String(contentsOf: dataFolder.appendingPathComponent("u1.base"), encoding: .utf8)}
        
    public init() {
        let dataFiles  = MovieLens.downloadMovieLensDatasetIfNotPresent()
        let data: [[Float]] = dataFiles.split(separator: "\n").map{ String($0).split(separator: "\t").compactMap{ Float(String($0)) } }

        // let data = dataRecords[0...30000]
        let users = data[column: 0].unique()
        let items = data[column: 1].unique()
        let rating = data[column: 2]

        let user_index = 0...users.count-1
        let user2id:[Float:Int] = Dictionary(uniqueKeysWithValues: zip(users,user_index))
        let id2user:[Int:Float] = Dictionary(uniqueKeysWithValues: zip(user_index,users))

        let item_index = 0...items.count-1
        let item2id:[Float:Int] = Dictionary(uniqueKeysWithValues: zip(items,item_index))
        let id2item:[Int:Float] = Dictionary(uniqueKeysWithValues: zip(item_index,items))

        var neg_sampling = Tensor<Float>(zeros: [users.count,items.count])
        
        var dataset:[[Int]] = []
        for element in data{
            let u_index = user2id[element[0]]!
            let i_index = item2id[element[1]]!
            let rating = element[2]
            if (rating > 0){
              dataset.append([u_index,i_index, 1])
              neg_sampling[u_index][i_index] = Tensor(1.0)
            }
        }
    
        for u_index in user_index{
            for i in 0...4{
              var i_index = Int.random(in:item_index)
              while(neg_sampling[u_index][i_index].scalarized() == 1.0){
                i_index = Int.random(in:item_index)
              }
              dataset.append([u_index,i_index, 0])
            }
        }

        
        self.num_users = users.count
        self.num_items = items.count
        self.users = users
        self.items = items
        self.rating = rating
        self.user2id = user2id
        self.id2user = id2user
        self.item2id = item2id
        self.id2item = id2item
        self.user_item_rating = dataset
        self.neg_sampling = neg_sampling
    }
}

# Model

In [0]:
public struct NeuMF: Module {

    public typealias Scalar = Float

    @noDerivative public let num_users: Int
    @noDerivative public let num_items: Int
    @noDerivative public let mf_dim: Int
    @noDerivative public let mf_reg: Scalar
    @noDerivative public var mlp_layer_sizes : [Int] = [64,32,16,8]
    @noDerivative public var mlp_layer_regs: [Scalar] = [0,0,0,0]

    public var mf_user_embed: Embedding<Scalar>
    public var mf_item_embed: Embedding<Scalar>
    public var mlp_user_embed: Embedding<Scalar>
    public var mlp_item_embed: Embedding<Scalar>
    public var dense1: Dense<Scalar>
    public var dense2: Dense<Scalar>
    public var dense3: Dense<Scalar>
    public var final_dense: Dense<Scalar>


    public init(
        num_users: Int,
        num_items: Int,
        mf_dim: Int,
        mf_reg: Float,
        mlp_layer_sizes: [Int],
        mlp_layer_regs: [Float]
    ) {
        self.num_users = num_users
        self.num_items = num_items
        self.mf_dim = mf_dim
        self.mf_reg = mf_reg
        self.mlp_layer_sizes = mlp_layer_sizes
        self.mlp_layer_regs = mlp_layer_regs

        // precondition(self.mlp_layer_sizes[0]%2 == 0, "u dummy, mlp_layer_sizes[0] % 2 != 0")
        // precondition(self.mlp_layer_sizes.count == self.mlp_layer_regs.count, "u dummy, layer_sizes != layer_regs!")

        //TODO: regularization
        self.mf_user_embed = Embedding<Scalar>(vocabularySize: self.num_users, embeddingSize: self.mf_dim)
        self.mf_item_embed = Embedding<Scalar>(vocabularySize: self.num_items, embeddingSize: self.mf_dim)
        self.mlp_user_embed = Embedding<Scalar>(vocabularySize: self.num_users, embeddingSize: self.mlp_layer_sizes[0]/2)
        self.mlp_item_embed = Embedding<Scalar>(vocabularySize: self.num_items, embeddingSize: self.mlp_layer_sizes[0]/2)

        //TODO: Extend it for n layers by using for loop
        //Currently only for 3 layers
        dense1 = Dense(inputSize: self.mlp_layer_sizes[0], outputSize: self.mlp_layer_sizes[1], activation: relu)
        dense2 = Dense(inputSize: self.mlp_layer_sizes[1], outputSize: self.mlp_layer_sizes[2], activation: relu)
        dense3 = Dense(inputSize: self.mlp_layer_sizes[2], outputSize: self.mlp_layer_sizes[3], activation: relu)
        final_dense = Dense(inputSize: (self.mlp_layer_sizes[3] + self.mf_dim), outputSize: 1)
    }
        @differentiable
        public func callAsFunction(_ input: Tensor<Int32>) -> Tensor<Scalar>{
            let user_indices  = input[0]
            let item_indices = input[1]

            let user_embed_mlp = self.mlp_user_embed(user_indices)
            let item_embed_mlp = self.mlp_item_embed(item_indices)
            let user_embed_mf = self.mf_user_embed(user_indices)
            let item_embed_mf = self.mf_item_embed(item_indices)

            // let mf_vector = matmul(user_embed_mf,item_embed_mf)
            let mf_vector = user_embed_mf*item_embed_mf
            var mlp_vector = user_embed_mlp.concatenated(with:item_embed_mlp,alongAxis:-1)
            //
            // print(mlp_vector.shape)
            mlp_vector = mlp_vector.sequenced(through: dense1, dense2, dense3)
            let vector = mlp_vector.concatenated(with:mf_vector,alongAxis:-1)

            return final_dense(vector)
            // return mf_vector
        }
    // }
}


# Training

In [7]:
let dataset = MovieLens()

Loading resource: ml-100k


In [0]:
let num_users = dataset.num_users
let num_items = dataset.num_items

In [20]:
let x = dataset.user_item_rating.shuffled()
x.count

84715


In [0]:
var size:[Int] = [64, 32, 16, 8]
var regs:[Float] = [0, 0, 0, 0]
var model = NeuMF(num_users: num_users, num_items: num_items, mf_dim: 8, mf_reg: 0.0, mlp_layer_sizes: size, mlp_layer_regs: regs)

In [0]:
let optimizer = Adam(for: model, learningRate: 0.001)
withDevice(.gpu) {
  for epoch in 1...3{
      var avg_loss: Float = 0.0
      Context.local.learningPhase = .training
      for (i,data) in dataset.user_item_rating.shuffled().enumerated(){
              let user_id = data[0]
              let item_id = data[1]
              let rating = Tensor(Float(data[2]))

              let input = Tensor<Int32>(shape: [2, 1], scalars: [Int32(user_id), Int32(item_id)])
              let (loss, grad) = valueWithGradient(at: model) { model -> Tensor<Float> in
              let logits = model(input)
              return sigmoidCrossEntropy(logits: logits, labels:rating)
  //             return meanSquaredError(predicted: logits, expected: rating)
                                                          }
              optimizer.update(&model, along: grad)
              avg_loss = avg_loss + loss.scalarized()
              if(i+1)%20000 == 0{
                  print(avg_loss/Float(i+1))
              }
      }
      print("Epoch: \(epoch)", "Current loss: \(avg_loss/(Float(num_users*num_items)))")
  }
}

0.18833874


# Testing

In [34]:
var input: Tensor<Int32> = [[4],[1]]
model(input)

[[-4.2725706]]


In [0]:
var output:[Float] = []
for i in 0...num_items-1{
    var input =  Tensor<Int32>(shape: [2, 1],scalars: [3,Int32(i)])
    output.append(model(input).scalarized())
}

In [0]:
let x = exp(Tensor(output))/(exp(Tensor(output)).sum())

In [37]:
x.argmax()

33


In [38]:
x.argmin()

1639


# Testing on Dummy Data

In [0]:
let num_users = 4
let num_items = 5

In [0]:
var size:[Int] = [64, 32, 16, 8]
var regs:[Float] = [0, 0, 0, 0]
var model = NeuMF(num_users: num_users, num_items: num_items, mf_dim: 8, mf_reg: 0.0, mlp_layer_sizes: size, mlp_layer_regs: regs)

In [0]:
var data:[[Int]] = [[0,0,1], [0,1,1], [0,2,1], [0,3,0], [0,4,1],
                    [1,0,0], [1,1,1], [1,2,1], [1,3,0], [1,4,0],
                    [2,0,0], [2,1,1], [2,2,1], [2,3,1], [2,4,0],
                    [3,0,1], [3,1,0], [3,2,1], [3,3,1], [3,4,1]]

In [0]:
var data:[[Int]] = [[0,5,1], [2,6,1], [1,7,1], [3,8,0], [4,9,1],
                    [3,14,1], [3,13,0], [3,12,1], [3,11,1], [3,10,1]]

In [0]:
let optimizer = Adam(for: model, learningRate: 0.001)
for epoch in 1...50{
    var avg_loss: Float = 0.0
    for (i,data) in data.enumerated(){
            let user_id = data[0]
            let item_id = data[1]
            let rating = Tensor(Float(data[2]))

            let input = Tensor<Int32>(shape: [2, 1], scalars: [Int32(user_id), Int32(item_id)])
            let (loss, grad) = valueWithGradient(at: model) { model -> Tensor<Float> in
            let logits = model(input)
            return sigmoidCrossEntropy(logits: logits, labels:rating)
//             return meanSquaredError(predicted: logits, expected: rating)
                                                        }
            optimizer.update(&model, along: grad)
            avg_loss = avg_loss + loss.scalarized()
    }
    print("Epoch: \(epoch)", "Current loss: \(avg_loss/(Float(num_users*num_items)))")
}


Epoch: 1 Current loss: 0.6526973
Epoch: 2 Current loss: 0.61954325
Epoch: 3 Current loss: 0.6023137
Epoch: 4 Current loss: 0.5998179
Epoch: 5 Current loss: 0.5859607
Epoch: 6 Current loss: 0.5725097
Epoch: 7 Current loss: 0.5623292
Epoch: 8 Current loss: 0.55179363
Epoch: 9 Current loss: 0.5393158
Epoch: 10 Current loss: 0.5358568
Epoch: 11 Current loss: 0.52070105
Epoch: 12 Current loss: 0.50869924
Epoch: 13 Current loss: 0.49550062
Epoch: 14 Current loss: 0.4862957
Epoch: 15 Current loss: 0.47034225
Epoch: 16 Current loss: 0.47229204
Epoch: 17 Current loss: 0.45568734
Epoch: 18 Current loss: 0.43813866
Epoch: 19 Current loss: 0.4247455
Epoch: 20 Current loss: 0.41159487
Epoch: 21 Current loss: 0.3978243
Epoch: 22 Current loss: 0.3862557
Epoch: 23 Current loss: 0.37105322
Epoch: 24 Current loss: 0.36510608
Epoch: 25 Current loss: 0.3532523
Epoch: 26 Current loss: 0.34468272
Epoch: 27 Current loss: 0.3255597
Epoch: 28 Current loss: 0.31359676
Epoch: 29 Current loss: 0.30134034
Epoch: 3

In [0]:
var output:[Float] = []
for i in 0...num_items-1{
    var input =  Tensor<Int32>(shape: [2, 1],scalars: [2,Int32(i)])
    output.append(model(input).scalarized())
}

In [0]:
let x = exp(Tensor(output))/(exp(Tensor(output)).sum())

In [0]:
x.argmax()

2


In [0]:
x

[8.960172e-06, 0.0031344146,   0.99529314, 0.0015539423, 9.463736e-06]
