In [None]:
import numpy as np
import pandas as pd

from lightfm.datasets import fetch_movielens
from lightfm import LightFM
from lightfm.evaluation import precision_at_k, auc_score, recall_at_k

movielens = fetch_movielens()

In [None]:
train = movielens['train']
test = movielens['test']

## Попробуем подобрать лучшие параметры

In [None]:
lr_sc = ['adagrad', 'adadelta']
loss = ['logistic', 'bpr', 'warp', 'warp-kos']

In [None]:
dict_ = {}
for lr in lr_sc:
  for l in loss:
    for k in range(10, 20, 5):
      for l_r in [0.05, 0.005, 0.0005]:
        model = LightFM(learning_schedule=lr, loss=l, learning_rate=l_r)
        model.fit(train, epochs=10)

        train_p = precision_at_k(model, train, k=k).mean()
        test_p = precision_at_k(model, test, k=k, train_interactions=train).mean()

        train_r = precision_at_k(model, train, k=k).mean()
        test_r = precision_at_k(model, test, k=k).mean()

        dict_[lr + '_' + l + '_' + str(k) + '_' + str(l_r)] = [f'Train precision: {train_p}', f'test precision: {test_p}', f'Train recall: {train_r}', f'test recall: {test_r}']

In [None]:
df = pd.DataFrame.from_dict(dict_)
df

Unnamed: 0,adagrad_logistic_10_0.05,adagrad_logistic_10_0.005,adagrad_logistic_10_0.0005,adagrad_logistic_15_0.05,adagrad_logistic_15_0.005,adagrad_logistic_15_0.0005,adagrad_bpr_10_0.05,adagrad_bpr_10_0.005,adagrad_bpr_10_0.0005,adagrad_bpr_15_0.05,...,adadelta_warp_10_0.0005,adadelta_warp_15_0.05,adadelta_warp_15_0.005,adadelta_warp_15_0.0005,adadelta_warp-kos_10_0.05,adadelta_warp-kos_10_0.005,adadelta_warp-kos_10_0.0005,adadelta_warp-kos_15_0.05,adadelta_warp-kos_15_0.005,adadelta_warp-kos_15_0.0005
0,Train precision: 0.4370095133781433,Train precision: 0.437751829624176,Train precision: 0.42969247698783875,Train precision: 0.4155532419681549,Train precision: 0.4151996970176697,Train precision: 0.41025102138519287,Train precision: 0.5880169868469238,Train precision: 0.41049838066101074,Train precision: 0.40296924114227295,Train precision: 0.5729233622550964,...,Train precision: 0.6416754722595215,Train precision: 0.6180983185768127,Train precision: 0.6193707585334778,Train precision: 0.6162601709365845,Train precision: 0.6392365097999573,Train precision: 0.6317073702812195,Train precision: 0.6435843110084534,Train precision: 0.6136444807052612,Train precision: 0.6212796568870544,Train precision: 0.6203605532646179
1,test precision: 0.12714740633964539,test precision: 0.12428421527147293,test precision: 0.1158006489276886,test precision: 0.10357017815113068,test precision: 0.10569106042385101,test precision: 0.09812655299901962,test precision: 0.19427359104156494,test precision: 0.0960763618350029,test precision: 0.0922587513923645,test precision: 0.1717214733362198,...,test precision: 0.23881229758262634,test precision: 0.20459528267383575,test precision: 0.20261578261852264,test precision: 0.20219160616397858,test precision: 0.22672322392463684,test precision: 0.22619301080703735,test precision: 0.22852598130702972,test precision: 0.19377872347831726,test precision: 0.192082017660141,test precision: 0.1934252679347992
2,Train recall: 0.4370095133781433,Train recall: 0.437751829624176,Train recall: 0.42969247698783875,Train recall: 0.4155532419681549,Train recall: 0.4151996970176697,Train recall: 0.41025102138519287,Train recall: 0.5880169868469238,Train recall: 0.41049838066101074,Train recall: 0.40296924114227295,Train recall: 0.5729233622550964,...,Train recall: 0.6416754722595215,Train recall: 0.6180983185768127,Train recall: 0.6193707585334778,Train recall: 0.6162601709365845,Train recall: 0.6392365097999573,Train recall: 0.6317073702812195,Train recall: 0.6435843110084534,Train recall: 0.6136444807052612,Train recall: 0.6212796568870544,Train recall: 0.6203605532646179
3,test recall: 0.08133616298437119,test recall: 0.07677625864744186,test recall: 0.07285259664058685,test recall: 0.07104983925819397,test recall: 0.07090844959020615,test recall: 0.0668080598115921,test recall: 0.09904561191797256,test recall: 0.054612938314676285,test recall: 0.05312831699848175,test recall: 0.09487452358007431,...,test recall: 0.1100742444396019,test recall: 0.10526689141988754,test recall: 0.10236833244562149,test recall: 0.10279250890016556,test recall: 0.10148462653160095,test recall: 0.10381760448217392,test recall: 0.09851537644863129,test recall: 0.0941675528883934,test recall: 0.09240014851093292,test recall: 0.09508661180734634


In [None]:
model_1 = LightFM(learning_schedule='adadelta', loss='warp', learning_rate=0.0005)
model_1.fit(train, epochs=10)

train_p = precision_at_k(model_1, train, k=k).mean()
test_p = precision_at_k(model_1, test, k=k, train_interactions=train).mean()
auc = auc_score(model_1, test).mean()

print(train_p, test_p, auc)

0.6160481 0.2003535 0.9084524


In [None]:
def sample_recommendation(model, data, user_ids):
    n_users, n_items = data['train'].shape
    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()                                    
                          [user_id].indices]
        
        scores = model.predict(user_id, np.arange(n_items))

        top_items = data['item_labels'][np.argsort(-scores)]

        print("User %s" % user_id)
        print("     Known positives:")
        
        for x in known_positives[:3]:
            print("        %s" % x)
        
        print("     Recommended:")
        
        for x in top_items[:3]:
            print("        %s" % x)

In [None]:
sample_recommendation(model_1, movielens, [10, 25, 451])

User 10
     Known positives:
        Babe (1995)
        Dead Man Walking (1995)
        Seven (Se7en) (1995)
     Recommended:
        Return of the Jedi (1983)
        Four Weddings and a Funeral (1994)
        Star Wars (1977)
User 25
     Known positives:
        Toy Story (1995)
        Twelve Monkeys (1995)
        Dead Man Walking (1995)
     Recommended:
        Rock, The (1996)
        Fargo (1996)
        Contact (1997)
User 451
     Known positives:
        Twelve Monkeys (1995)
        Babe (1995)
        Postino, Il (1994)
     Recommended:
        Raiders of the Lost Ark (1981)
        Casablanca (1942)
        Empire Strikes Back, The (1980)
