## Import Packages

In [4]:
!pip install git+https://github.com/maciejkula/spotlight.git@master#egg=spotlight

  Running command git clone -q https://github.com/maciejkula/spotlight.git 'C:\Users\Ria\AppData\Local\Temp\pip-install-rthw7ad9\spotlight_ab4019eed63a4a609a4546211fae77be'


Collecting spotlight
  Cloning https://github.com/maciejkula/spotlight.git (to revision master) to c:\users\ria\appdata\local\temp\pip-install-rthw7ad9\spotlight_ab4019eed63a4a609a4546211fae77be
  Resolved https://github.com/maciejkula/spotlight.git to commit 75f4c8c55090771b52b88ef1a00f75bb39f9f2a9
Building wheels for collected packages: spotlight
  Building wheel for spotlight (setup.py): started
  Building wheel for spotlight (setup.py): finished with status 'done'
  Created wheel for spotlight: filename=spotlight-0.1.6-py3-none-any.whl size=34200 sha256=81de18eeb5bcb99a90c903e6b9df97c15db09abacbd91be6960aede4e9ef7dbd
  Stored in directory: C:\Users\Ria\AppData\Local\Temp\pip-ephem-wheel-cache-01sf630n\wheels\b9\f3\fe\92d82f9670bddfb144c00f90c895f4ef990d7812627e23d8f3
Successfully built spotlight
Installing collected packages: spotlight
  Attempting uninstall: spotlight
    Found existing installation: spotlight 3.3.0
    Uninstalling spotlight-3.3.0:
      Successfully uninstalled 

In [5]:
import numpy as np
import pandas as pd
from spotlight.cross_validation import user_based_train_test_split
from spotlight.evaluation import *
from spotlight.interactions import Interactions
from spotlight.factorization.implicit import ImplicitFactorizationModel
from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.factorization.explicit import ExplicitFactorizationModel
from spotlight.factorization.representations import BilinearNet
from spotlight.layers import BloomEmbedding
import torch
from torch.optim import sparse_adam
from spotlight.evaluation import mrr_score
from scipy.stats import describe

## Reading Data

In [6]:
dataset = pd.read_csv('dataset_aggr.csv')

In [7]:
user_ids = dataset.UserID.values
item_ids = dataset.ItemID.values
ratings = dataset.avg_rating.values

In [8]:
interactions = Interactions(user_ids = np.array(user_ids, dtype=np.int32),
                            item_ids = np.array(item_ids, dtype=np.int32),
                            ratings = np.array(ratings, dtype=np.float32))

In [9]:
train, test = user_based_train_test_split(interactions, test_percentage=0.20)

### Check train-test Proportions

In [10]:
train, test

(<Interactions dataset (1043 users x 763 items x 767 interactions)>,
 <Interactions dataset (1043 users x 763 items x 163 interactions)>)

## Train "implicit factorization model":

### Define parameters:

In [11]:
model = ImplicitFactorizationModel(loss='adaptive_hinge', 
                                   embedding_dim=128, 
                                   n_iter=100, 
                                   batch_size=32,
                                   learning_rate=0.005,
                                   l2=1e-6,
                                   optimizer_func=sparse_adam.SparseAdam,
                                   sparse=True,
                                   num_negative_samples=10)

### Fit model

In [13]:
model.fit(train, verbose=True)

Epoch 0: loss 0.4447501270721356
Epoch 1: loss 0.4262576376398404
Epoch 2: loss 0.44228221227725345
Epoch 3: loss 0.3707310526321332
Epoch 4: loss 0.4426347290476163
Epoch 5: loss 0.42974982783198357
Epoch 6: loss 0.4396854527294636
Epoch 7: loss 0.4607029954592387
Epoch 8: loss 0.42780869950850803
Epoch 9: loss 0.43639396503567696
Epoch 10: loss 0.4694180463751157
Epoch 11: loss 0.408333258703351
Epoch 12: loss 0.3984564213703076
Epoch 13: loss 0.45889636998375255
Epoch 14: loss 0.4675477209190528
Epoch 15: loss 0.43974290912350017
Epoch 16: loss 0.3987838067114353
Epoch 17: loss 0.4398944415152073
Epoch 18: loss 0.44062893092632294
Epoch 19: loss 0.4394723102450371
Epoch 20: loss 0.4203199439992507
Epoch 21: loss 0.4694607723504305
Epoch 22: loss 0.41739627594749135
Epoch 23: loss 0.4523144339521726
Epoch 24: loss 0.3983288376281659
Epoch 25: loss 0.4205563335369031
Epoch 26: loss 0.4062804337590933
Epoch 27: loss 0.4314301609992981
Epoch 28: loss 0.45942317321896553
Epoch 29: loss 0

### Average MRR:

In [14]:
mrr = mrr_score(model, test)

In [15]:
avg_mrr = mrr.mean()

In [16]:
f'Avg MRR score (for test data): {avg_mrr}'

'Avg MRR score (for test data): 0.04516683011237628'

### RMSE score

In [17]:
rmse = rmse_score(model, test)

In [18]:
f'RMSE score (for test data): {rmse}'

'RMSE score (for test data): 2.4094338417053223'

### Precision-Recall at k:

#### On the train set,

In [19]:
precision, recall = precision_recall_score(model, train)

In [20]:
print(f'Precision per user: {precision}')

Precision per user: [1.  1.  1.  0.5 1.  0.6 1.  1.  1.  0.5 1.  0.2 1.  0.5 0.5 0.1 1.  1.
 1.  1.  1.  1.  1.  0.4 1.  1.  0.5 1.  1.  1.  0.5 0.5 1.  1.  0.5]


In [21]:
describe(precision)

DescribeResult(nobs=35, minmax=(0.1, 1.0), mean=0.8085714285714285, variance=0.0790420168067227, skewness=-0.9831397513594327, kurtosis=-0.46830606854289325)

In [22]:
print(f'Recall per user: {recall}')

Recall per user: [1.         0.33333333 0.83333333 1.         0.47619048 1.
 0.4        0.14285714 1.         1.         0.14285714 1.
 0.66666667 1.         1.         1.         0.66666667 0.07194245
 0.4        1.         1.         0.25       1.         1.
 0.5        1.         1.         1.         0.5        0.0862069
 1.         1.         0.625      1.         1.        ]


In [23]:
describe(recall)

DescribeResult(nobs=35, minmax=(0.07194244604316546, 1.0), mean=0.7455729744142758, variance=0.11152892291035478, skewness=-0.8223172797515813, kurtosis=-0.8743828602342836)

#### On the test set

In [24]:
precision, recall = precision_recall_score(model, test)

In [25]:
print(f'Precision per user: {precision}')

Precision per user: [0.8 0.  0.2 0.  0.1 0.3 0. ]


In [26]:
describe(precision)

DescribeResult(nobs=7, minmax=(0.0, 0.8), mean=0.2, variance=0.08333333333333337, skewness=1.436796436521193, kurtosis=0.7687999999999966)

In [27]:
print(f'Recall per user: {recall}')

Recall per user: [0.08080808 0.         0.4        0.         0.05       0.12
 0.        ]


In [28]:
describe(recall)

DescribeResult(nobs=7, minmax=(0.0, 0.4), mean=0.09297258297258297, variance=0.020487106271954755, skewness=1.6249972607888974, kurtosis=1.2223200671000667)

## Train "explicit factorization model":

In [29]:
model = ExplicitFactorizationModel(loss='poisson', 
                                   embedding_dim=128, 
                                   n_iter=150, 
                                   batch_size=32,
                                   learning_rate=0.005,
                                   l2=1e-6,
                                   optimizer_func=sparse_adam.SparseAdam,
                                   sparse=True)

### Fit the model:

In [30]:
model.fit(train, verbose=True)

Epoch 0: loss 0.9886945361892382
Epoch 1: loss 0.9572712853550911
Epoch 2: loss 0.9152253742019335
Epoch 3: loss 0.8521202057600021
Epoch 4: loss 0.7540346110860506
Epoch 5: loss 0.6198508950571219
Epoch 6: loss 0.47021986668308574
Epoch 7: loss 0.3294908568883936
Epoch 8: loss 0.23237766899789372
Epoch 9: loss 0.16765623477598032
Epoch 10: loss 0.11715756054036319
Epoch 11: loss 0.07552980119362473
Epoch 12: loss 0.04367559403181076
Epoch 13: loss 0.021272726046542328
Epoch 14: loss 0.002850098283185313
Epoch 15: loss -0.011957877781242132
Epoch 16: loss -0.023910506318012874
Epoch 17: loss -0.03260742500424385
Epoch 18: loss -0.04073225846514106
Epoch 19: loss -0.04711195221170783
Epoch 20: loss -0.05247923064356049
Epoch 21: loss -0.057306456845253706
Epoch 22: loss -0.060858188662678
Epoch 23: loss -0.06391689057151477
Epoch 24: loss -0.06628733462033172
Epoch 25: loss -0.06891396626209219
Epoch 26: loss -0.07036111402946214
Epoch 27: loss -0.07243925201085706
Epoch 28: loss -0.073

### Average MRR:

In [31]:
mrr = mrr_score(model, test)

In [32]:
avg_mrr = mrr.mean()

In [33]:
f'Avg MRR score (for test data): {avg_mrr}'

'Avg MRR score (for test data): 0.05533418069794644'

### RMSE score:

In [34]:
rmse = rmse_score(model, test)

In [35]:
f'RMSE score (for test data): {rmse}'

'RMSE score (for test data): 1.5767426490783691'

### Precision-Recall at k:

In [36]:
precision, recall = precision_recall_score(model, test)

In [37]:
print(f'Precision per user: {precision}')
print(describe(precision))
print(f'Recall per user: {recall}')
print(describe(recall))

Precision per user: [0.7 0.  0.  0.1 0.2 0.2 0. ]
DescribeResult(nobs=7, minmax=(0.0, 0.7), mean=0.17142857142857143, variance=0.06238095238095238, skewness=1.5279989960437383, kurtosis=1.0312044752636762)
Recall per user: [0.07070707 0.         0.         1.         0.1        0.08
 0.        ]
DescribeResult(nobs=7, minmax=(0.0, 1.0), mean=0.1786724386724387, variance=0.13298876790998002, skewness=1.9830330394777387, kurtosis=2.0395790085717636)


## Model Bloom Embeddings:

In [38]:
compression_ratio = 1.5

In [40]:
user_embeddings = BloomEmbedding(interactions.num_users, 32,
                                 compression_ratio=compression_ratio,
                                 num_hash_functions=2)

item_embeddings = BloomEmbedding(interactions.num_items, 32,
                                 compression_ratio=compression_ratio,
                                 num_hash_functions=2)

network = BilinearNet(interactions.num_users,
                      interactions.num_items,
                      user_embedding_layer=user_embeddings,
                      item_embedding_layer=item_embeddings)

model = ExplicitFactorizationModel(loss='poisson',
                                   n_iter=150,
                                   batch_size=32,
                                   learning_rate=1e-2,
                                   l2=1e-6,
                                   representation=network,
                                   sparse=True,
                                   use_cuda=False)

In [41]:
model.fit(train)

In [42]:
mrr = mrr_score(model, test, train=train)
f'Avg MRR score: {np.mean(mrr)}'

'Avg MRR score: 0.1662496893217771'

In [43]:
rmse = rmse_score(model, test)
f'RMSE score (for test data): {rmse}'

'RMSE score (for test data): 1.410247802734375'

In [44]:
precision, recall = precision_recall_score(model, test, train=train)

In [45]:
print(f'Precision per user: {precision}')
print(describe(precision))
print(f'Recall per user: {recall}')
print(describe(recall))

Precision per user: [0.8 0.  0.1 0.1 0.1 0.1 0. ]
DescribeResult(nobs=7, minmax=(0.0, 0.8), mean=0.17142857142857146, variance=0.07904761904761905, skewness=1.9182296278921176, kurtosis=1.9147554071708504)
Recall per user: [0.08080808 0.         0.2        1.         0.05       0.04
 0.        ]
DescribeResult(nobs=7, minmax=(0.0, 1.0), mean=0.19582972582972585, variance=0.13036416254901104, skewness=1.8993750770276816, kurtosis=1.8324578093532988)


## Unaggregated set, explicit model (with ratings):

In [46]:
dataset = pd.read_csv('dataset.csv')

In [47]:
user_ids = dataset.UserID.values
item_ids = dataset.ItemID.values

ratings = dataset.Rating.values

interactions = Interactions(user_ids = np.array(user_ids, dtype=np.int32),
                            item_ids = np.array(item_ids, dtype=np.int32),
                            ratings = np.array(ratings, dtype=np.float32))

train, test = user_based_train_test_split(interactions, test_percentage=0.20)

In [48]:
train, test 

(<Interactions dataset (1043 users x 763 items x 2764 interactions)>,
 <Interactions dataset (1043 users x 763 items x 1248 interactions)>)

In [49]:
model = ExplicitFactorizationModel(loss='poisson', 
                                   embedding_dim=256, 
                                   n_iter=200, 
                                   batch_size=32,
                                   learning_rate=0.005,
                                   l2=1e-6)

In [50]:
model.fit(train)

In [51]:
mrr = mrr_score(model, test, train=train)
f'Avg MRR score: {np.mean(mrr)}'

'Avg MRR score: 0.028765395806446412'

In [52]:
rmse = rmse_score(model, test)
f'RMSE score (for test data): {rmse}'

'RMSE score (for test data): 2.0668251514434814'

In [53]:
precision, recall = precision_recall_score(model, test, train=train)

In [54]:
print(f'Precision per user: {precision}')
print(describe(precision))
print(f'Recall per user: {recall}')
print(describe(recall))

Precision per user: [0.  0.1 0.  0.1 0.  0.  0.  0.5 0.2 0.8 0.3 0.1 0.3]
DescribeResult(nobs=13, minmax=(0.0, 0.8), mean=0.1846153846153846, variance=0.058076923076923075, skewness=1.4514612235559854, kurtosis=1.2723637852140994)
Recall per user: [0.         0.03333333 0.         0.16666667 0.         0.
 0.         0.125      0.1        0.06896552 0.12       0.2
 0.1875    ]
DescribeResult(nobs=13, minmax=(0.0, 0.2), mean=0.07703580901856764, variance=0.0059731395943643266, skewness=0.3305205617997852, kurtosis=-1.4039245990705647)


## Conclusions:

Despite no user and item features used by the algorithms, we managed to get 1.5 RMSE and 20% precision at k-10. For RMSE, the performance is below algorithms like ALS or SVD.