# Importing all necessary Packages

In [27]:
import os
!pip install mxnet-cu92
import mxnet as mx
from mxnet import gluon, nd, ndarray

import pandas as pd
import numpy as np

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting mxnet-cu92
  Downloading mxnet_cu92-1.7.0-py2.py3-none-manylinux2014_x86_64.whl (789.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m789.8/789.8 MB[0m [31m834.3 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: mxnet-cu92
Successfully installed mxnet-cu92-1.7.0


## Hyper Parameters

In [13]:
data_path = 'ml-100k'
num_emb = 128 # Numerical Embedding value
opt = 'Adam' # Optimizer - Adam // can be SGD(Stochastic Gradient Descent)
lr = 0.02 # Learning Rate
mnmtm = 0. # Momentum
wd = 0.4 
batch_size = 50
ctx = mx.gpu(4) 

## Downloading Data from Movielens

In [14]:
def download_ml_data(prefix):
  if not os.path.exists("%s.zip" % prefix):
    print("Downloading Movielens Data %s" % prefix)
    os.system("wget http://files.grouplens.org/datasets/movielens/%s.zip" % prefix)
    os.system("unzip %s.zip" % prefix)

In [15]:
download_ml_data(data_path)

Downloading Movielens Data ml-100k


--2023-04-21 11:37:44--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’

     0K .......... .......... .......... .......... ..........  1%  114K 42s
    50K .......... .......... .......... .......... ..........  2%  228K 31s
   100K .......... .......... .......... .......... ..........  3%  228K 27s
   150K .......... .......... .......... .......... ..........  4%  228K 25s
   200K .......... .......... .......... .......... ..........  5%  228K 24s
   250K .......... .......... .......... .......... ..........  6% 59.9M 20s
   300K .......... .......... .......... .......... ..........  7% 72.7M 17s
   350K .......... .......... .......... .......... ..........  8%  229K 17s
   400K .......... .........

Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base         
  inflating: ml-100k/u3.test         
  inflating: ml-100k/u4.base         
  inflating: ml-100k/u4.test         
  inflating: ml-100k/u5.base         
  inflating: ml-100k/u5.test         
  inflating: ml-100k/ua.base         
  inflating: ml-100k/ua.test         
  inflating: ml-100k/ub.base         
  inflating: ml-100k/ub.test         


In [17]:
## Finding the No of users and no of items 
from re import M
def max_id(fname):
  mu = 0
  mi = 0
  with open(fname) as f:
    for line in f:
      tks = line.strip().split('\t')
      if len(tks) != 4:
        continue
      mu = max(mu, int(tks[0]))
      mi = max(mi, int(tks[1]))
  return mu+1,mi+1
max_users, max_items = max_id(data_path + '/u.data')

## Splitting train data, test data, labels

In [47]:
train_df = pd.read_csv(data_path+'/u1.base', header = None, sep = '\t')
test_df = pd.read_csv(data_path+'/u1.test',header = None, sep = '\t')

train_data = nd.array(train_df[[0,1]].values, dtype = np.float32)
train_label = nd.array(train_df[2].values, dtype = np.float32)

test_data = nd.array(test_df[[0,1]].values, dtype = np.float32)
test_label = nd.array(test_df[2].values, dtype = np.float32)

## Creating a SparseMatrixDataset
This class inherits the gluon dataset class. 

In [22]:
class SparseMatrixDataset(gluon.data.Dataset):
  def __init__(self, data, label):
    assert data.shape[0] == len(label) ## Checks whether the first dimension in the shape of data is equal to len of label, else raise Assertion Error
    self.data = data
    self.label = label
    ## Checking whether the label is of the type ndarray using isinstance()
    if isinstance(label, ndarray.NDArray) and len(label.shape) == 1:
      # using self._ will be available in the entire instance of the class
      # using only label will be scoped only within the method itself
      self._label = label.asnumpy()
    else:
      self._label = label
    
  def __getitem__(self,idx):
    return self.data[idx,0], self.data[idx,1], self.label[idx]
  
  def __len__(self):
    return self.data.shape[0]


In [54]:
class MFBlock(gluon.Block):
  def __init__(self, max_users, max_items, num_emb, dropout_p = 0.5):
    super(MFBlock,self).__init__()

    self.max_users = max_users
    self.max_items = max_items
    self.dropout_p = dropout_p
    self.num_emb = num_emb

    with self.name_scope(): 
      #To manage the names of nested Blocks, each Block has a name_scope attached to it. All Blocks created within a name scope will have its parent Block’s prefix prepended to its name.
      self.user_embeddings = gluon.nn.Embedding(max_users, num_emb)
      self.item_embeddings = gluon.nn.Embedding(max_items, num_emb)
      self.dropout = gluon.nn.Dropout(dropout_p)
    
  def forward(self, users, items):
    a = self.user_embeddings(users)
    b = self.item_embeddings(items)
    predictions = self.dropout(a) * self.dropout(b)
    predictions = nd.sum(predictions, axis = 1) ## Calculate the sum of each elements in the array    
    return predictions


In [55]:
net = MFBlock(max_users = max_users, max_items = max_items, num_emb = num_emb, dropout_p = 0.)
net.collect_params()  

mfblock2_ (
  Parameter mfblock2_embedding0_weight (shape=(944, 128), dtype=float32)
  Parameter mfblock2_embedding1_weight (shape=(1683, 128), dtype=float32)
)

In [57]:
loss_function = gluon.loss.L2Loss()

In [58]:
ctx = mx.cpu()

In [59]:
net.collect_params().initialize(mx.init.Xavier(magnitude = 2.24), ctx = mx.cpu(), force_reinit = True)

In [60]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
    'learning_rate' : lr, 'wd':wd, 'momentum': 0.9
})

In [61]:
train_data_iter = gluon.data.DataLoader(SparseMatrixDataset(train_data, train_label),
                                        shuffle = True, batch_size = batch_size)
test_data_iter =  gluon.data.DataLoader(SparseMatrixDataset(test_data, test_label),
                                       shuffle = True, batch_size = batch_size)

In [64]:
def eval_net(data, net):
  acc = mx.metric.RMSE()
  for i, (user,item,label) in enumerate(data):
    user = user.as_in_context(ctx).reshape((batch_size,))
    item = item.as_in_context(ctx).reshape((batch_size,))
    label = label.as_in_context(ctx).reshape((batch_size,))
    predictions = net(user, item)
    loss = loss_function(predictions, label)
    acc.update(preds = predictions, labels = label)
  return acc.get()[1]  

In [65]:
eval_net(test_data_iter, net)

3.7163775902986527

In [68]:
epochs = 10
def train(data_iter, net):
  a = []
  b = []
  for e in range(epochs):
    print(f'Epoch: {format(e)} ')
    for i, (user,item,label) in enumerate(data_iter):
      user = user.as_in_context(ctx).reshape((batch_size,))
      item = item.as_in_context(ctx).reshape((batch_size,))
      label = label.as_in_context(ctx).reshape((batch_size,))
      with mx.autograd.record():
        output = net(user,item)
        loss = loss_function(output,label)
        loss.backward()
      net.collect_params().values()
      trainer.step(batch_size)
    a = eval_net(train_data_iter, net)
    b = eval_net(test_data_iter, net)
    print("EPOCH {}: RMSE ON TRAINING DATA AND TEST DATA: {}. {}".format(e,a,b))
  return a, b




In [69]:
(a,b) = train(train_data_iter, net)
print("The final RMSE of training data and testing data are",(a,b))

Epoch: 0 
EPOCH 0: RMSE ON TRAINING DATA AND TEST DATA: 3.698545118868351. 3.7165595012903214
Epoch: 1 
EPOCH 1: RMSE ON TRAINING DATA AND TEST DATA: 3.6985192470252515. 3.716602285504341
Epoch: 2 
EPOCH 2: RMSE ON TRAINING DATA AND TEST DATA: 3.6985854786634444. 3.716644257903099
Epoch: 3 
EPOCH 3: RMSE ON TRAINING DATA AND TEST DATA: 3.698873323947191. 3.7165798515081407
Epoch: 4 
EPOCH 4: RMSE ON TRAINING DATA AND TEST DATA: 3.6987005624175073. 3.7166377264261246
Epoch: 5 
EPOCH 5: RMSE ON TRAINING DATA AND TEST DATA: 3.6988156653940676. 3.7167279362678527
Epoch: 6 
EPOCH 6: RMSE ON TRAINING DATA AND TEST DATA: 3.6987258034944532. 3.716428287625313
Epoch: 7 
EPOCH 7: RMSE ON TRAINING DATA AND TEST DATA: 3.6988907086849214. 3.7164703798294068
Epoch: 8 
EPOCH 8: RMSE ON TRAINING DATA AND TEST DATA: 3.6986256209015846. 3.716779120564461
Epoch: 9 
EPOCH 9: RMSE ON TRAINING DATA AND TEST DATA: 3.6987649576365946. 3.716369916200638
The final RMSE of training data and testing data are (3.6

## Let' change the Model 
## Let's add some Dense Layer and Make it as Fully Connected layer, and check the performance

In [126]:
class MFBlock1(gluon.Block):
  def __init__(self, max_users, max_items, num_emb, dropout_p = 0.):
    super(MFBlock1,self).__init__()

    self.max_users = max_users
    self.max_items = max_items
    self.dropout_p = dropout_p
    self.num_emb = num_emb

    with self.name_scope(): 
      #To manage the names of nested Blocks, each Block has a name_scope attached to it. All Blocks created within a name scope will have its parent Block’s prefix prepended to its name.
      self.user_embeddings = gluon.nn.Embedding(max_users, num_emb)
      self.item_embeddings = gluon.nn.Embedding(max_items, num_emb)
      self.dropout = gluon.nn.Dropout(dropout_p)
      self.dense = gluon.nn.Dense(num_emb)
    
  def forward(self, users, items):
    a = self.user_embeddings(users)
    a = self.dense(a)


    b = self.item_embeddings(items)
    b = self.dense(b)


    predictions = self.dropout(a) * self.dropout(b)
    predictions = nd.sum(predictions, axis = 1) ## Calculate the sum of each elements in the array    
    return predictions

In [127]:
net1 = MFBlock1(max_users = max_users, max_items = max_items, num_emb = num_emb, dropout_p = 0.)
net1.collect_params()  

mfblock12_ (
  Parameter mfblock12_embedding0_weight (shape=(944, 128), dtype=float32)
  Parameter mfblock12_embedding1_weight (shape=(1683, 128), dtype=float32)
  Parameter mfblock12_dense0_weight (shape=(128, 0), dtype=float32)
  Parameter mfblock12_dense0_bias (shape=(128,), dtype=float32)
)

In [128]:
loss_function = gluon.loss.L2Loss()

In [129]:
net1.collect_params().initialize(mx.init.Xavier(magnitude = 2.24), ctx = mx.cpu(), force_reinit = True)

In [130]:
trainer = gluon.Trainer(net1.collect_params(), 'sgd', {
    'learning_rate' : lr, 'wd':wd, 'momentum': 0.9
})

In [131]:
eval_net(test_data_iter, net1)

3.7166581296920778

In [133]:
epochs = 10
def train1(data_iter, net):
  a = []
  b = []
  for e in range(epochs):
    print(f'Epoch: {format(e)} ')
    for i, (user,item,label) in enumerate(data_iter):
      user = user.as_in_context(ctx).reshape((batch_size,))
      item = item.as_in_context(ctx).reshape((batch_size,))
      label = label.as_in_context(ctx).reshape((batch_size,))
      with mx.autograd.record():
        output = net(user,item)
        loss = loss_function(output,label)
        loss.backward()
      net.collect_params().values()
      trainer.step(batch_size,ignore_stale_grad = True)
    a = eval_net(train_data_iter, net)
    b = eval_net(test_data_iter, net)
    print("EPOCH {}: RMSE ON TRAINING DATA AND TEST DATA: {}. {}".format(e,a,b))
  return a, b




In [134]:
(a,b) = train1(train_data_iter, net)
print("The final RMSE of training data and testing data are",(a,b))

Epoch: 0 
EPOCH 0: RMSE ON TRAINING DATA AND TEST DATA: 3.6986864763498306. 3.7164743614196776
Epoch: 1 
EPOCH 1: RMSE ON TRAINING DATA AND TEST DATA: 3.698616882264614. 3.716383241415024
Epoch: 2 
EPOCH 2: RMSE ON TRAINING DATA AND TEST DATA: 3.698664338588715. 3.7170433419942857
Epoch: 3 
EPOCH 3: RMSE ON TRAINING DATA AND TEST DATA: 3.6985878494381903. 3.7162007969617843
Epoch: 4 
EPOCH 4: RMSE ON TRAINING DATA AND TEST DATA: 3.6986929927766323. 3.7165655320882798
Epoch: 5 
EPOCH 5: RMSE ON TRAINING DATA AND TEST DATA: 3.698709747046232. 3.7163346296548845
Epoch: 6 
EPOCH 6: RMSE ON TRAINING DATA AND TEST DATA: 3.6987730634212492. 3.7163832676410675
Epoch: 7 
EPOCH 7: RMSE ON TRAINING DATA AND TEST DATA: 3.6987247894704343. 3.7162336218357086
Epoch: 8 
EPOCH 8: RMSE ON TRAINING DATA AND TEST DATA: 3.6987047734856606. 3.7167682909965514
Epoch: 9 
EPOCH 9: RMSE ON TRAINING DATA AND TEST DATA: 3.698653819859028. 3.7161995333433153
The final RMSE of training data and testing data are (3

In [163]:
class MFBlock2(gluon.Block):
  def __init__(self, max_users, max_items, num_emb, dropout_p = 0.5):
    super(MFBlock2,self).__init__()

    self.max_users = max_users
    self.max_items = max_items
    self.dropout_p = dropout_p
    self.num_emb = num_emb

    with self.name_scope(): 
      #To manage the names of nested Blocks, each Block has a name_scope attached to it. All Blocks created within a name scope will have its parent Block’s prefix prepended to its name.
      self.user_embeddings = gluon.nn.Embedding(max_users, num_emb)
      self.item_embeddings = gluon.nn.Embedding(max_items, num_emb)
      self.dropout = gluon.nn.Dropout(dropout_p)
      # self.conv = gluon.nn.Conv1D(num_emb, kernel_size = (1),activation = "relu")
      self.dense = gluon.nn.Dense(num_emb, activation = 'sigmoid')
    
  def forward(self, users, items):
    a = self.user_embeddings(users)
    # a = self.conv(a)
    a = self.dense(a)
    a = self.dense(a)

    b = self.item_embeddings(items)
    # b = self.conv(b)
    b = self.dense(b)
    b = self.dense(b)

    predictions = self.dropout(a) * self.dropout(b)
    predictions = nd.sum(predictions, axis = 1) ## Calculate the sum of each elements in the array    
    return predictions

In [164]:
net2 = MFBlock2(max_users = max_users, max_items = max_items, num_emb = num_emb, dropout_p = 0.)
net2.collect_params() 

mfblock210_ (
  Parameter mfblock210_embedding0_weight (shape=(944, 128), dtype=float32)
  Parameter mfblock210_embedding1_weight (shape=(1683, 128), dtype=float32)
  Parameter mfblock210_dense0_weight (shape=(128, 0), dtype=float32)
  Parameter mfblock210_dense0_bias (shape=(128,), dtype=float32)
)

In [165]:
loss_function = gluon.loss.L2Loss()

In [166]:
net2.collect_params().initialize(mx.init.Xavier(magnitude = 2.24), ctx = mx.cpu(), force_reinit = True)

In [167]:
trainer = gluon.Trainer(net2.collect_params(), 'sgd', {
    'learning_rate' : lr, 'wd':wd, 'momentum': 0.9
})

In [168]:
eval_net(test_data_iter, net2)

31.97821319580078

In [169]:
epochs = 10
def train2(data_iter, net):
  a = []
  b = []
  for e in range(epochs):
    print(f'Epoch: {format(e)} ')
    for i, (user,item,label) in enumerate(data_iter):
      user = user.as_in_context(ctx).reshape((batch_size,))
      item = item.as_in_context(ctx).reshape((batch_size,))
      label = label.as_in_context(ctx).reshape((batch_size,))
      with mx.autograd.record():
        output = net(user,item)
        loss = loss_function(output,label)
        loss.backward()
      net.collect_params().values()
      trainer.step(batch_size,ignore_stale_grad = True)
    a = eval_net(train_data_iter, net)
    b = eval_net(test_data_iter, net)
    print("EPOCH {}: RMSE ON TRAINING DATA AND TEST DATA: {}. {}".format(e,a,b))
  return a, b

In [170]:
(a,b) = train2(train_data_iter, net2)
print("The final RMSE of training data and testing data are",(a,b))

Epoch: 0 
EPOCH 0: RMSE ON TRAINING DATA AND TEST DATA: 1.1483189037442207. 1.1797329393029212
Epoch: 1 
EPOCH 1: RMSE ON TRAINING DATA AND TEST DATA: 1.1710994287580252. 1.2020826146006585
Epoch: 2 
EPOCH 2: RMSE ON TRAINING DATA AND TEST DATA: 1.1955617877840996. 1.2255912278592587
Epoch: 3 
EPOCH 3: RMSE ON TRAINING DATA AND TEST DATA: 1.1465987780690192. 1.1786295545101166
Epoch: 4 
EPOCH 4: RMSE ON TRAINING DATA AND TEST DATA: 1.214436404928565. 1.2448888936638831
Epoch: 5 
EPOCH 5: RMSE ON TRAINING DATA AND TEST DATA: 1.1238364177942275. 1.1573218400776386
Epoch: 6 
EPOCH 6: RMSE ON TRAINING DATA AND TEST DATA: 1.1341331158950925. 1.1675885209441186
Epoch: 7 
EPOCH 7: RMSE ON TRAINING DATA AND TEST DATA: 1.2676864267140626. 1.2953204399347304
Epoch: 8 
EPOCH 8: RMSE ON TRAINING DATA AND TEST DATA: 1.1401097257435322. 1.1723342721164227
Epoch: 9 
EPOCH 9: RMSE ON TRAINING DATA AND TEST DATA: 1.13102034997195. 1.1643961435556411
The final RMSE of training data and testing data are 

# Amazon SageMaker Built in Algorithm 
## Factorization Machines for Recommendation System

In [175]:
import os
import csv
from urllib.parse import urlparse
import boto3
import numpy as np
from scipy.sparse import lil_matrix

BUCKET  = 'recommendationbuck'
s3 = boto3.client('s3')

In [177]:
def download_file(s3_source, dest):
    if not os.path.exists(dest):
        os.makedirs(dest)
    
    url = urlparse(s3_source)
    bucket,key = url.netloc, url.path.lstrip('/')
    filename = key.split('/')[-1]
    with open ('%s/%s' %(dest,filename), 'wb') as data:
        s3.download_fileobj(bucket, key,data)
    

def loadDataset(filename, lines, columns):
    X = lil_matrix((lines,columns)).astype('float32')
    Y = []
    line=0
    with open(filename,'r') as f:
        samples = csv.reader(f,delimiter='\t')
        for userId,movieId,rating,timestamp in samples:
            X[line, int(userId)-1] = 1
            X[line, int(nbUsers)+int(movieId)-1] = 1
            Y.append(int(rating))
            line = line+1
    Y = np.array(Y).astype('float32')
    return X,Y

nbUsers = 943
nbMovies = 1682
nbFeatures = nbUsers+nbMovies
nbRatingsTrain = 80000
nbRatingsTest = 20000

input_dir = 'ml-100k'
X_train, Y_train = loadDataset('%s/u1.base' % input_dir, nbRatingsTrain, nbFeatures)
X_test, Y_test = loadDataset('%s/u1.test' % input_dir, nbRatingsTest, nbFeatures)

In [178]:
prefix = 'exercies4/fm-movielens100k'
train_key = 'train.protobuf'
train_prefix = '{}/{}'.format(prefix,'train')
test_key = 'test.protobuf'
test_prefix = '{}/{}'.format(prefix,'test')
output_prefix = 's3://{}/{}/output'.format(BUCKET, prefix)

def writeDatasetToProtobuf(X,Y,bucket,prefix,key):
    import io,boto3
    import sagemaker.amazon.common as smac
    buf = io.BytesIO()
    smac.write_spmatrix_to_sparse_tensor(buf,X,Y)
    buf.seek(0)
    print(buf)
    obj = '{}/{}' .format(prefix,key)
    boto3.resource('s3').Bucket(bucket).Object(obj).upload_fileobj(buf)
    print('Wrote dataset: {}/{}'.format(bucket,obj))

writeDatasetToProtobuf(X_train,Y_train,BUCKET,train_prefix,train_key)
writeDatasetToProtobuf(X_test,Y_test,BUCKET,test_prefix,test_key)
print('Output: {}'.format(output_prefix))

<_io.BytesIO object at 0x7f447e8d9710>
Wrote dataset: recommendationbuck/exercies4/fm-movielens100k/train/train.protobuf
<_io.BytesIO object at 0x7f447e8d9110>
Wrote dataset: recommendationbuck/exercies4/fm-movielens100k/test/test.protobuf
Output: s3://recommendationbuck/exercies4/fm-movielens100k/output


In [182]:
import sagemaker
from sagemaker import get_execution_role

train_data = 's3://%s/exercies4/fm-movielens100k/train/train.protobuf' %BUCKET
test_data = 's3://%s/exercies4/fm-movielens100k/test/test.protobuf' %BUCKET

print(boto3.Session().region_name)
region = boto3.Session.region_name

ap-south-1


In [184]:
containers = {
    region:'991648021394.dkr.ecr.ap-south-1.amazonaws.com/factorization-machines:latest'
}
fm = sagemaker.estimator.Estimator(containers[region],
                                   get_execution_role(),
                                   train_instance_count = 1,
                                   train_instance_type = 'ml.c4.xlarge',
                                   output_path = output_prefix,
                                   sagemaker_session = sagemaker.Session())

fm.set_hyperparameters(feature_dim = nbFeatures,
                       predictor_type = 'regressor',
                       mini_batch_size = 1000,
                       num_factors = 64,
                       epochs = 50)

fm.fit({'train':train_data,'test':test_data})

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker:Creating training-job with name: factorization-machines-2023-04-21-14-23-41-245


2023-04-21 14:23:41 Starting - Starting the training job...
2023-04-21 14:23:58 Starting - Preparing the instances for training......
2023-04-21 14:25:08 Downloading - Downloading input data
2023-04-21 14:25:08 Training - Downloading the training image.........
2023-04-21 14:26:34 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[04/21/2023 14:26:45 INFO 139893122135872] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linear_init_sigma': '0.01', 'factors_init_method': 'normal', 'factor

In [207]:
import json
import numpy as np
# from sagemaker.predictor import json_deserializer
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer

nbUsers = 943
nbMovies = 1682
nbFeatures = nbUsers+nbMovies

class FMSerializer(JSONSerializer):
    def serializer(self,data):
        js = {'instances':[]}
        # fm_predictor.content_type('application/json')
        for row in data:
            keys = np.argwhere(row == np.amax(row)).flatten().tolist()
            js['instances'].append({
                'data':{
                'features':{
                'keys':keys,
                'shape':[nbFeatures],
                'values':[1]*len(keys)
                }
                }
            })
        print(js)
        return json.dumps(js)

# fm_predictor.content_type ='application/json'
# fm_predictor.serializer = fm_serializer
# fm_predictor.deserializer = json_deserializer

In [208]:
fm_predictor = fm.deploy(instance_type = 'ml.m4.xlarge', initial_instance_count = 1,serializer = FMSerializer(), deserializer = JSONDeserializer)

INFO:sagemaker:Creating model with name: factorization-machines-2023-04-21-15-11-57-455
INFO:sagemaker:Creating endpoint-config with name factorization-machines-2023-04-21-15-11-57-455
INFO:sagemaker:Creating endpoint with name factorization-machines-2023-04-21-15-11-57-455


--------!

In [None]:
result = fm_predictor.predict(X_test[1000:1010].toarray())
print(result)
print(Y_test[1000:1010])