TODO:
1. Use with implicit info - games in collection + ranked above K
2. Add features to games: category, designer
3. Check how does this work under the hood
4. Do more epochs while training (we are probably underfitting), test WARP loss

Interesting: 
https://towardsdatascience.com/solving-business-usecases-by-recommender-system-using-lightfm-4ba7b3ac8e62 
https://github.com/lyst/lightfm/issues/462 https://github.com/lyst/lightfm/issues/255 https://github.com/lyst/lightfm/issues/50

In [27]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k, reciprocal_rank, auc_score

### Data loading and creating mapping

In [13]:
df = pd.read_csv('../ratings.csv.gz',  usecols=[1,2,3])

In [17]:
df

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating
0,beastvol,13,8.0
1,beastvol,118,7.0
2,beastvol,278,7.0
3,beastvol,823,6.0
4,beastvol,3076,9.0
...,...,...,...
17354938,æleksandr þræð,2399,7.0
17354939,æleksandr þræð,2932,6.0
17354940,æleksandr þræð,5451,8.0
17354941,æleksandr þræð,5554,7.0


In [26]:
cleaned_df.groupby('bgg_user_name')['bgg_id'].nunique()

bgg_user_name
 mycroft           14
-=yod@=-          282
-johnny-          497
-loren-            80
-lucas-            23
                 ... 
zzzxxxyyy          20
zzzzz              33
zzzzzane          148
zzzzzyy            15
æleksandr þræð     12
Name: bgg_id, Length: 206219, dtype: int64

In [18]:
dataset = Dataset()
dataset.fit((x for x in df['bgg_user_name']), (x for x in df['bgg_id']))

In [44]:
dataset.mapping()

({' beastvol': 0,
  ' fu_koios ': 1,
  ' mycroft': 2,
  ' woh': 3,
  '(mostly) harmless': 4,
  '- v -': 5,
  '-=yod@=-': 6,
  '-de-': 7,
  '-grizzly-': 8,
  '-johnny-': 9,
  '-loren-': 10,
  '-lucas-': 11,
  '-mal-': 12,
  '-mide-': 13,
  '-mik-': 14,
  '-morphling-': 15,
  '-pedropablo-': 16,
  '-pj-': 17,
  '-snarf-': 18,
  '-toni-': 19,
  '-xxx-': 20,
  '...hammer': 21,
  '.::clarté::.': 22,
  '.jck.': 23,
  '0 1 1 2 3 5 8': 24,
  '0 hr': 25,
  '000ryuk000': 26,
  '000undo000': 27,
  '0010': 28,
  '005734': 29,
  '007_ulf': 30,
  '007alex': 31,
  '007design': 32,
  '007king_kong': 33,
  '007mrbond': 34,
  '007poptart': 35,
  '007purpleroses': 36,
  '00_fire_00': 37,
  '00_mr_floppy': 38,
  '00arak00': 39,
  '00bogey': 40,
  '00daniel00': 41,
  '00matej00': 42,
  '00mccracken': 43,
  '00schneider': 44,
  '00skip': 45,
  '00vito': 46,
  '00zaphod': 47,
  '00zero': 48,
  '010dennis': 49,
  '0112358': 50,
  '01151125': 51,
  '0122lostchild2201': 52,
  '015599m': 53,
  '0165miked': 54,
 

In [19]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 370050, num_items 73339.


In [20]:
df.groupby('bgg_user_name')['bgg_id'].nunique()

bgg_user_name
 beastvol              9
 fu_koios              2
 mycroft              14
 woh                   4
(mostly) harmless      1
                    ... 
zzzzz                 33
zzzzzane             148
zzzzzyy               15
zébulon                2
æleksandr þræð        12
Name: bgg_id, Length: 370049, dtype: int64

In [63]:
df.groupby('bgg_id')['bgg_user_name'].nunique()

bgg_id
1          5111
2           546
3         14653
4           340
5         17804
          ...  
330645        1
330714        1
330806        1
331028        1
331106        3
Name: bgg_user_name, Length: 73339, dtype: int64

### Training on full dataset

In [28]:
(interactions, weights) = dataset.build_interactions(((val['bgg_user_name'], val['bgg_id'], val['bgg_user_rating']) for idx, val in df.iterrows()))

In [30]:
print(repr(interactions))
print(repr(weights))

<370050x73339 sparse matrix of type '<class 'numpy.int32'>'
	with 17354943 stored elements in COOrdinate format>
<370050x73339 sparse matrix of type '<class 'numpy.float32'>'
	with 17354943 stored elements in COOrdinate format>


In [59]:
model = LightFM(loss='bpr')
model.fit(weights, verbose=True)

<lightfm.lightfm.LightFM at 0x7fc40e630730>

### What does the output look like?

In [93]:
def return_top_5(user_name, model, user_mapping, games_mapping):
    user_id = user_mapping[user_name]
    ratings = model.predict(user_id, np.arange(73339))
    games_ids = np.argsort(ratings)[::-1][:5]
    print(games_ids)
    top_5 = []
    for idx in games_ids:
        top_5.append(list(games_mapping.keys())[list(games_mapping.values()).index(idx)])
    return top_5
    
return_top_5(" beastvol", model, dataset.mapping()[0], dataset.mapping()[2])

[  4 107   0  28  58]


[3076, 31260, 13, 822, 2651]

In [19]:
df.head(15)

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating
0,beastvol,13,8.0
1,beastvol,118,7.0
2,beastvol,278,7.0
3,beastvol,823,6.0
4,beastvol,3076,9.0
5,beastvol,5737,8.0
6,beastvol,9209,7.0
7,beastvol,12004,5.0
8,beastvol,18602,9.0
9,fu_koios,112092,9.0


In [105]:
print(model.predict(0, np.arange(73339))[4])
print(model.predict(0, np.arange(73339))[107])
print(model.predict(0, np.arange(73339))[0])
print(model.predict(0, np.arange(73339))[28])
print(model.predict(0, np.arange(73339))[58])

1.9241098
1.7616148
1.7266395
1.700431
1.687653


In [106]:
idx = dataset.mapping()[2][18602]
print(idx)
print(model.predict(0, np.arange(73339))[8])

8
1.196267


### Dataset splitted into train and test
Looks like LightFM has a function for that - random_train_test_split, refactor later

In [4]:
users = df['bgg_user_name'].unique()
np.random.shuffle(users)
train_size = int(0.7*users.shape[0])
train_df = df[df['bgg_user_name'].isin(users[:train_size])]
test_df = df[df['bgg_user_name'].isin(users[train_size:])]

In [5]:
def split_test(test_df, seed=42, frac=0.8):
    grouped = test_df.groupby(by='bgg_user_name')
    test_known = []
    test_unknown = []
    for user, df in grouped:
        df_size = df.shape[0]
        known_size = int(round(frac*df_size))
        known_indices = np.random.choice(df_size, known_size, replace=False)
        known_data = df.iloc[known_indices]
        test_known.append(known_data)

        unknown_indices = np.setdiff1d(np.arange(df_size), known_indices)
        unknown_data = df.iloc[unknown_indices]
        test_unknown.append(unknown_data)

    return pd.concat(test_known), pd.concat(test_unknown)

In [6]:
test_known, test_unknown = split_test(test_df)

In [7]:
# 4160243
test_known

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating
9,fu_koios,112092,9.0
10,fu_koios,223033,9.0
14,mycroft,823,7.0
11,mycroft,5,8.0
20,mycroft,9829,7.0
...,...,...,...
17354940,æleksandr þræð,5451,8.0
17354934,æleksandr þræð,1540,9.5
17354939,æleksandr þræð,2932,6.0
17354942,æleksandr þræð,9962,8.0


In [9]:
interactions_df = train_df.append(test_known)

In [10]:
(interactions, weights) = dataset.build_interactions(((val['bgg_user_name'], val['bgg_id'], val['bgg_user_rating']) for idx, val in interactions_df.iterrows()))

In [11]:
(test_interactions, test_weights) = dataset.build_interactions(((val['bgg_user_name'], val['bgg_id'], val['bgg_user_rating']) for idx, val in test_unknown.iterrows()))

In [12]:
model = LightFM(loss='bpr')
model.fit(weights, verbose=True)

Epoch 0


<lightfm.lightfm.LightFM at 0x7f7c2e1ef040>

### Model evaluation
We should probably remove users with very small amout of interactions from dataset.

Measure the precision at k metric for a model: the fraction of known positives in the first k positions of the ranked list of results. A perfect score is 1.0.

In [14]:
train_precision = precision_at_k(model, weights, k=10, num_threads=8).mean()
print('Precision: train %.2f' % (train_precision))

test_precision = precision_at_k(model, test_weights, train_interactions = weights, k=10, num_threads=8).mean()
print('Precision: test %.2f' % (test_precision))

Precision: train 0.24
Precision: test 0.06


Measure the recall at k metric for a model: the number of positive items in the first k positions of the ranked list of results divided by the number of positive items in the test period. 

In [15]:
train_recall = recall_at_k(model, weights, k=10, num_threads=8).mean()
print('Recall: train %.2f' % (train_recall))

test_recall = recall_at_k(model, test_weights, train_interactions = weights, k=10, num_threads=8).mean()
print('Recall: test %.2f' % (test_recall))

Recall: train 0.09
Recall: test 0.10


Measure the ROC AUC metric for a model: the probability that a randomly chosen positive example has a higher score than a randomly chosen negative example. A perfect score is 1.0.

In [16]:
train_auc = auc_score(model, weights, num_threads=8).mean()
print('AUC: train %.2f' % (train_auc))

test_auc = auc_score(model, test_weights, train_interactions = weights, num_threads=8).mean()
print('AUC: test %.2f' % (test_auc))

AUC: train 0.85
AUC: test 0.87


Measure the reciprocal rank metric for a model: 1 / the rank of the highest ranked positive example. A perfect score is 1.0.

In [17]:
train_recip = reciprocal_rank(model, weights, num_threads=8).mean()
print('Reciprocal rank: train %.2f' % (train_recip))

test_recip = reciprocal_rank(model, test_weights, train_interactions = weights, num_threads=8).mean()
print('Reciprocal rank: test %.2f' % (test_recip))

Reciprocal rank: train 0.42
Reciprocal rank: test 0.18


### Evaluation on cleaned dataset

In [28]:
grouped = df.groupby(by='bgg_user_name')
cleaned_df = []
for user, user_df in grouped:
    if user_df.shape[0] < 10:
        continue
    cleaned_df.append(user_df[:])
cleaned_df = pd.concat(cleaned_df)

In [30]:
cleaned_dataset = Dataset()
cleaned_dataset.fit((x for x in cleaned_df['bgg_user_name']), (x for x in cleaned_df['bgg_id']))

In [46]:
(cleaned_interactions, cleaned_weights) = cleaned_dataset.build_interactions(((val['bgg_user_name'], val['bgg_id'], val['bgg_user_rating']) for idx, val in cleaned_df.iterrows()))

In [47]:
cleaned_train, cleaned_test = random_train_test_split(cleaned_weights, test_percentage = 0.2, random_state = 42)

In [52]:
cleaned_model = LightFM(loss='warp')
cleaned_model.fit(cleaned_train, verbose=True, epochs=20, num_threads=8)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19


<lightfm.lightfm.LightFM at 0x7fda89ccc460>

In [53]:
train_precision = precision_at_k(cleaned_model, cleaned_train, k=10, num_threads=8).mean()
print('Precision: train %.2f' % (train_precision))

test_precision = precision_at_k(cleaned_model, cleaned_test, train_interactions = cleaned_train, k=10, num_threads=8).mean()
print('Precision: test %.2f' % (test_precision))

Precision: train 0.36
Precision: test 0.16


In [33]:
#This metric doesn't make sense for us. 

train_recall = recall_at_k(cleaned_model, cleaned_train, k=10, num_threads=8).mean()
print('Recall: train %.2f' % (train_recall))

test_recall = recall_at_k(cleaned_model, cleaned_test, train_interactions = cleaned_train, k=10, num_threads=8).mean()
print('Recall: test %.2f' % (test_recall))

Recall: train 0.09
Recall: test 0.09


In [39]:
train_auc = auc_score(cleaned_model, cleaned_train, num_threads=8).mean()
print('AUC: train %.2f' % (train_auc))

test_auc = auc_score(cleaned_model, cleaned_test, train_interactions = cleaned_train, num_threads=8).mean()
print('AUC: test %.2f' % (test_auc))

AUC: train 0.87
AUC: test 0.86


In [None]:
train_recip = reciprocal_rank(cleaned_model, cleaned_train, num_threads=8).mean()
print('Reciprocal rank: train %.2f' % (train_recip))

test_recip = reciprocal_rank(cleaned_model, cleaned_test, train_interactions = cleaned_train, num_threads=8).mean()
print('Reciprocal rank: test %.2f' % (test_recip))