In [1]:
import io 
import pandas as pd
from collections import defaultdict
from surprise import Dataset
from surprise import NormalPredictor
from surprise import SVD
from surprise import KNNBasic
from surprise import get_dataset_dir
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

In [2]:
data = Dataset.load_builtin('ml-100k')
K = 30
trainset, testset = train_test_split(data, test_size=.25)

In [3]:
# ассоциативный массив (названия алгоритмов - ключи, функиции - значения)
algorithms = {
    "NP": NormalPredictor(),
    "KNN_cos": KNNBasic(k = K, sim_options = { 'name': 'cosine' }),
    "KNN_MSD": KNNBasic(k = K),
    "KNN_Pearson": KNNBasic(k = K, sim_options = { 'name': 'pearson' }),
    "SVD" : SVD()
}
RSMA = {}

In [4]:
# для каждого алгоритма с помошбю функции cross_validate оцениваем метрику RMSE (среднеквадратичную ошибку модели)
for [name, algo] in algorithms.items():
    crv = cross_validate(algo, data, measures=['RMSE'], verbose=False)
    RSMA[name] = round(crv['test_rmse'].mean(), 3)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson si

In [5]:
# выберем среди всех алгоритмов лучший
print(RSMA)
bestAlgoName = min(RSMA.items(), key=lambda x: x[1])[0]
print(bestAlgoName)
bestAlgo = algorithms[bestAlgoName]
# тренируем алгоритм на тренировочном наборе данных, даём предсказание для тестового
bestAlgo.fit(trainset)
predictions = bestAlgo.test(testset)
for prediction in predictions:
    print(prediction)

{'NP': 1.522, 'KNN_cos': 1.019, 'KNN_MSD': 0.977, 'KNN_Pearson': 1.013, 'SVD': 0.936}
SVD
user: 648        item: 367        r_ui = 3.00   est = 3.25   {'was_impossible': False}
user: 286        item: 288        r_ui = 5.00   est = 3.50   {'was_impossible': False}
user: 13         item: 869        r_ui = 3.00   est = 2.59   {'was_impossible': False}
user: 405        item: 1228       r_ui = 1.00   est = 1.12   {'was_impossible': False}
user: 300        item: 881        r_ui = 5.00   est = 3.62   {'was_impossible': False}
user: 927        item: 158        r_ui = 2.00   est = 2.80   {'was_impossible': False}
user: 561        item: 229        r_ui = 3.00   est = 2.04   {'was_impossible': False}
user: 385        item: 1159       r_ui = 4.00   est = 3.08   {'was_impossible': False}
user: 846        item: 41         r_ui = 3.00   est = 3.50   {'was_impossible': False}
user: 62         item: 401        r_ui = 3.00   est = 2.76   {'was_impossible': False}
user: 23         item: 98         r_ui =

user: 363        item: 428        r_ui = 5.00   est = 3.60   {'was_impossible': False}
user: 705        item: 685        r_ui = 5.00   est = 3.74   {'was_impossible': False}
user: 279        item: 301        r_ui = 4.00   est = 3.21   {'was_impossible': False}
user: 537        item: 186        r_ui = 4.00   est = 2.97   {'was_impossible': False}
user: 380        item: 61         r_ui = 4.00   est = 3.37   {'was_impossible': False}
user: 764        item: 866        r_ui = 4.00   est = 3.11   {'was_impossible': False}
user: 315        item: 48         r_ui = 4.00   est = 4.22   {'was_impossible': False}
user: 46         item: 181        r_ui = 4.00   est = 4.45   {'was_impossible': False}
user: 416        item: 1139       r_ui = 3.00   est = 3.12   {'was_impossible': False}
user: 430        item: 547        r_ui = 2.00   est = 3.34   {'was_impossible': False}
user: 343        item: 527        r_ui = 5.00   est = 4.26   {'was_impossible': False}
user: 586        item: 405        r_ui = 5.

user: 598        item: 323        r_ui = 4.00   est = 3.27   {'was_impossible': False}
user: 92         item: 168        r_ui = 4.00   est = 3.92   {'was_impossible': False}
user: 699        item: 523        r_ui = 2.00   est = 3.69   {'was_impossible': False}
user: 237        item: 28         r_ui = 4.00   est = 3.78   {'was_impossible': False}
user: 943        item: 200        r_ui = 4.00   est = 3.85   {'was_impossible': False}
user: 797        item: 1023       r_ui = 3.00   est = 2.13   {'was_impossible': False}
user: 456        item: 382        r_ui = 1.00   est = 3.65   {'was_impossible': False}
user: 555        item: 302        r_ui = 3.00   est = 4.68   {'was_impossible': False}
user: 773        item: 462        r_ui = 5.00   est = 3.81   {'was_impossible': False}
user: 298        item: 294        r_ui = 3.00   est = 3.66   {'was_impossible': False}
user: 23         item: 216        r_ui = 4.00   est = 3.64   {'was_impossible': False}
user: 126        item: 313        r_ui = 5.

user: 254        item: 1091       r_ui = 3.00   est = 2.40   {'was_impossible': False}
user: 518        item: 628        r_ui = 5.00   est = 3.81   {'was_impossible': False}
user: 698        item: 50         r_ui = 5.00   est = 3.41   {'was_impossible': False}
user: 653        item: 245        r_ui = 4.00   est = 2.06   {'was_impossible': False}
user: 446        item: 300        r_ui = 3.00   est = 3.01   {'was_impossible': False}
user: 293        item: 566        r_ui = 3.00   est = 3.01   {'was_impossible': False}
user: 488        item: 491        r_ui = 4.00   est = 3.42   {'was_impossible': False}
user: 64         item: 633        r_ui = 5.00   est = 3.80   {'was_impossible': False}
user: 837        item: 472        r_ui = 3.00   est = 2.46   {'was_impossible': False}
user: 181        item: 1367       r_ui = 2.00   est = 1.88   {'was_impossible': False}
user: 478        item: 93         r_ui = 4.00   est = 3.51   {'was_impossible': False}
user: 862        item: 462        r_ui = 4.

user: 144        item: 328        r_ui = 3.00   est = 3.23   {'was_impossible': False}
user: 5          item: 369        r_ui = 1.00   est = 2.00   {'was_impossible': False}
user: 294        item: 508        r_ui = 4.00   est = 3.76   {'was_impossible': False}
user: 236        item: 1039       r_ui = 2.00   est = 3.92   {'was_impossible': False}
user: 837        item: 285        r_ui = 4.00   est = 3.61   {'was_impossible': False}
user: 301        item: 33         r_ui = 4.00   est = 3.49   {'was_impossible': False}
user: 164        item: 685        r_ui = 5.00   est = 3.95   {'was_impossible': False}
user: 621        item: 405        r_ui = 5.00   est = 3.30   {'was_impossible': False}
user: 860        item: 269        r_ui = 2.00   est = 3.62   {'was_impossible': False}
user: 694        item: 489        r_ui = 4.00   est = 4.45   {'was_impossible': False}
user: 285        item: 288        r_ui = 5.00   est = 3.92   {'was_impossible': False}
user: 13         item: 158        r_ui = 1.

user: 627        item: 55         r_ui = 4.00   est = 3.41   {'was_impossible': False}
user: 488        item: 127        r_ui = 4.00   est = 3.47   {'was_impossible': False}
user: 318        item: 404        r_ui = 3.00   est = 3.68   {'was_impossible': False}
user: 537        item: 504        r_ui = 3.00   est = 3.10   {'was_impossible': False}
user: 275        item: 630        r_ui = 3.00   est = 2.65   {'was_impossible': False}
user: 846        item: 226        r_ui = 4.00   est = 3.66   {'was_impossible': False}
user: 326        item: 202        r_ui = 4.00   est = 3.74   {'was_impossible': False}
user: 298        item: 430        r_ui = 5.00   est = 4.08   {'was_impossible': False}
user: 716        item: 1039       r_ui = 5.00   est = 3.92   {'was_impossible': False}
user: 379        item: 173        r_ui = 5.00   est = 4.85   {'was_impossible': False}
user: 804        item: 192        r_ui = 4.00   est = 4.30   {'was_impossible': False}
user: 890        item: 313        r_ui = 5.

user: 690        item: 790        r_ui = 3.00   est = 2.81   {'was_impossible': False}
user: 786        item: 416        r_ui = 4.00   est = 3.70   {'was_impossible': False}
user: 222        item: 409        r_ui = 3.00   est = 2.86   {'was_impossible': False}
user: 453        item: 151        r_ui = 3.00   est = 3.34   {'was_impossible': False}
user: 561        item: 436        r_ui = 4.00   est = 3.10   {'was_impossible': False}
user: 821        item: 79         r_ui = 5.00   est = 4.60   {'was_impossible': False}
user: 429        item: 22         r_ui = 5.00   est = 3.80   {'was_impossible': False}
user: 299        item: 965        r_ui = 4.00   est = 3.16   {'was_impossible': False}
user: 416        item: 1041       r_ui = 3.00   est = 3.64   {'was_impossible': False}
user: 293        item: 176        r_ui = 4.00   est = 3.30   {'was_impossible': False}
user: 305        item: 268        r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 316        item: 276        r_ui = 2.

user: 440        item: 751        r_ui = 3.00   est = 4.07   {'was_impossible': False}
user: 916        item: 381        r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 40         item: 333        r_ui = 4.00   est = 3.28   {'was_impossible': False}
user: 456        item: 98         r_ui = 3.00   est = 3.98   {'was_impossible': False}
user: 328        item: 693        r_ui = 2.00   est = 3.56   {'was_impossible': False}
user: 787        item: 333        r_ui = 3.00   est = 3.21   {'was_impossible': False}
user: 745        item: 531        r_ui = 3.00   est = 3.19   {'was_impossible': False}
user: 146        item: 346        r_ui = 4.00   est = 3.76   {'was_impossible': False}
user: 77         item: 172        r_ui = 3.00   est = 4.36   {'was_impossible': False}
user: 218        item: 186        r_ui = 3.00   est = 4.04   {'was_impossible': False}
user: 869        item: 312        r_ui = 2.00   est = 2.61   {'was_impossible': False}
user: 267        item: 187        r_ui = 5.

user: 22         item: 878        r_ui = 1.00   est = 2.66   {'was_impossible': False}
user: 145        item: 407        r_ui = 2.00   est = 2.30   {'was_impossible': False}
user: 314        item: 742        r_ui = 4.00   est = 4.36   {'was_impossible': False}
user: 378        item: 295        r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 270        item: 781        r_ui = 5.00   est = 4.19   {'was_impossible': False}
user: 897        item: 265        r_ui = 3.00   est = 4.35   {'was_impossible': False}
user: 174        item: 255        r_ui = 5.00   est = 3.64   {'was_impossible': False}
user: 414        item: 346        r_ui = 5.00   est = 4.24   {'was_impossible': False}
user: 864        item: 408        r_ui = 5.00   est = 4.95   {'was_impossible': False}
user: 387        item: 181        r_ui = 4.00   est = 3.36   {'was_impossible': False}
user: 517        item: 294        r_ui = 1.00   est = 3.05   {'was_impossible': False}
user: 749        item: 56         r_ui = 2.

user: 406        item: 13         r_ui = 2.00   est = 3.19   {'was_impossible': False}
user: 235        item: 511        r_ui = 5.00   est = 4.22   {'was_impossible': False}
user: 467        item: 1011       r_ui = 2.00   est = 3.34   {'was_impossible': False}
user: 543        item: 748        r_ui = 3.00   est = 2.68   {'was_impossible': False}
user: 327        item: 269        r_ui = 3.00   est = 3.72   {'was_impossible': False}
user: 130        item: 257        r_ui = 4.00   est = 4.07   {'was_impossible': False}
user: 755        item: 875        r_ui = 1.00   est = 2.60   {'was_impossible': False}
user: 869        item: 1132       r_ui = 1.00   est = 2.69   {'was_impossible': False}
user: 332        item: 125        r_ui = 5.00   est = 4.16   {'was_impossible': False}
user: 313        item: 516        r_ui = 4.00   est = 3.98   {'was_impossible': False}
user: 897        item: 179        r_ui = 3.00   est = 3.92   {'was_impossible': False}
user: 425        item: 180        r_ui = 4.

user: 62         item: 527        r_ui = 4.00   est = 3.89   {'was_impossible': False}
user: 339        item: 661        r_ui = 5.00   est = 4.45   {'was_impossible': False}
user: 864        item: 238        r_ui = 5.00   est = 4.22   {'was_impossible': False}
user: 121        item: 291        r_ui = 3.00   est = 3.15   {'was_impossible': False}
user: 60         item: 28         r_ui = 5.00   est = 4.13   {'was_impossible': False}
user: 374        item: 193        r_ui = 4.00   est = 3.58   {'was_impossible': False}
user: 753        item: 483        r_ui = 5.00   est = 4.18   {'was_impossible': False}
user: 182        item: 845        r_ui = 3.00   est = 3.49   {'was_impossible': False}
user: 504        item: 185        r_ui = 5.00   est = 3.83   {'was_impossible': False}
user: 62         item: 167        r_ui = 2.00   est = 2.74   {'was_impossible': False}
user: 559        item: 188        r_ui = 5.00   est = 3.49   {'was_impossible': False}
user: 181        item: 1001       r_ui = 1.

user: 592        item: 1609       r_ui = 1.00   est = 3.32   {'was_impossible': False}
user: 880        item: 566        r_ui = 3.00   est = 3.77   {'was_impossible': False}
user: 569        item: 508        r_ui = 3.00   est = 3.98   {'was_impossible': False}
user: 474        item: 603        r_ui = 5.00   est = 4.82   {'was_impossible': False}
user: 311        item: 131        r_ui = 3.00   est = 3.96   {'was_impossible': False}
user: 880        item: 249        r_ui = 4.00   est = 3.58   {'was_impossible': False}
user: 399        item: 1170       r_ui = 3.00   est = 2.53   {'was_impossible': False}
user: 923        item: 827        r_ui = 3.00   est = 3.64   {'was_impossible': False}
user: 373        item: 1133       r_ui = 3.00   est = 3.10   {'was_impossible': False}
user: 804        item: 235        r_ui = 5.00   est = 2.71   {'was_impossible': False}
user: 296        item: 100        r_ui = 5.00   est = 4.46   {'was_impossible': False}
user: 59         item: 313        r_ui = 5.

user: 393        item: 173        r_ui = 5.00   est = 3.99   {'was_impossible': False}
user: 655        item: 166        r_ui = 3.00   est = 3.34   {'was_impossible': False}
user: 929        item: 483        r_ui = 4.00   est = 4.00   {'was_impossible': False}
user: 934        item: 1203       r_ui = 5.00   est = 3.87   {'was_impossible': False}
user: 128        item: 501        r_ui = 3.00   est = 3.46   {'was_impossible': False}
user: 790        item: 154        r_ui = 4.00   est = 3.33   {'was_impossible': False}
user: 919        item: 147        r_ui = 4.00   est = 3.77   {'was_impossible': False}
user: 488        item: 211        r_ui = 4.00   est = 3.65   {'was_impossible': False}
user: 56         item: 64         r_ui = 5.00   est = 4.59   {'was_impossible': False}
user: 499        item: 174        r_ui = 3.00   est = 4.06   {'was_impossible': False}
user: 343        item: 1140       r_ui = 3.00   est = 3.03   {'was_impossible': False}
user: 314        item: 1289       r_ui = 2.

user: 381        item: 95         r_ui = 4.00   est = 3.73   {'was_impossible': False}
user: 758        item: 1016       r_ui = 4.00   est = 3.70   {'was_impossible': False}
user: 526        item: 343        r_ui = 3.00   est = 2.73   {'was_impossible': False}
user: 640        item: 689        r_ui = 4.00   est = 3.74   {'was_impossible': False}
user: 243        item: 632        r_ui = 5.00   est = 3.88   {'was_impossible': False}
user: 589        item: 678        r_ui = 4.00   est = 3.06   {'was_impossible': False}
user: 346        item: 1110       r_ui = 1.00   est = 2.95   {'was_impossible': False}
user: 314        item: 406        r_ui = 3.00   est = 3.07   {'was_impossible': False}
user: 87         item: 70         r_ui = 5.00   est = 3.82   {'was_impossible': False}
user: 871        item: 324        r_ui = 3.00   est = 3.01   {'was_impossible': False}
user: 246        item: 570        r_ui = 1.00   est = 2.41   {'was_impossible': False}
user: 395        item: 286        r_ui = 4.

user: 336        item: 154        r_ui = 5.00   est = 3.34   {'was_impossible': False}
user: 23         item: 13         r_ui = 4.00   est = 3.09   {'was_impossible': False}
user: 524        item: 1560       r_ui = 4.00   est = 3.05   {'was_impossible': False}
user: 934        item: 190        r_ui = 4.00   est = 4.50   {'was_impossible': False}
user: 311        item: 205        r_ui = 5.00   est = 4.08   {'was_impossible': False}
user: 313        item: 742        r_ui = 3.00   est = 3.77   {'was_impossible': False}
user: 217        item: 17         r_ui = 3.00   est = 2.72   {'was_impossible': False}
user: 925        item: 773        r_ui = 1.00   est = 3.32   {'was_impossible': False}
user: 498        item: 447        r_ui = 3.00   est = 3.24   {'was_impossible': False}
user: 479        item: 490        r_ui = 4.00   est = 3.86   {'was_impossible': False}
user: 474        item: 471        r_ui = 3.00   est = 4.00   {'was_impossible': False}
user: 249        item: 455        r_ui = 4.

user: 385        item: 419        r_ui = 2.00   est = 2.48   {'was_impossible': False}
user: 85         item: 64         r_ui = 5.00   est = 3.92   {'was_impossible': False}
user: 889        item: 943        r_ui = 3.00   est = 3.72   {'was_impossible': False}
user: 943        item: 231        r_ui = 2.00   est = 2.53   {'was_impossible': False}
user: 387        item: 200        r_ui = 5.00   est = 3.70   {'was_impossible': False}
user: 773        item: 1529       r_ui = 5.00   est = 2.82   {'was_impossible': False}
user: 303        item: 866        r_ui = 2.00   est = 2.48   {'was_impossible': False}
user: 561        item: 317        r_ui = 3.00   est = 3.27   {'was_impossible': False}
user: 843        item: 161        r_ui = 2.00   est = 2.92   {'was_impossible': False}
user: 707        item: 382        r_ui = 3.00   est = 3.59   {'was_impossible': False}
user: 396        item: 329        r_ui = 2.00   est = 3.36   {'was_impossible': False}
user: 22         item: 692        r_ui = 4.

user: 747        item: 12         r_ui = 4.00   est = 4.84   {'was_impossible': False}
user: 843        item: 182        r_ui = 2.00   est = 3.27   {'was_impossible': False}
user: 721        item: 266        r_ui = 3.00   est = 3.10   {'was_impossible': False}
user: 606        item: 96         r_ui = 5.00   est = 4.40   {'was_impossible': False}
user: 308        item: 928        r_ui = 4.00   est = 3.10   {'was_impossible': False}
user: 468        item: 772        r_ui = 4.00   est = 3.57   {'was_impossible': False}
user: 802        item: 444        r_ui = 4.00   est = 3.03   {'was_impossible': False}
user: 643        item: 215        r_ui = 3.00   est = 3.56   {'was_impossible': False}
user: 445        item: 433        r_ui = 2.00   est = 2.20   {'was_impossible': False}
user: 436        item: 215        r_ui = 4.00   est = 4.03   {'was_impossible': False}
user: 659        item: 494        r_ui = 4.00   est = 4.28   {'was_impossible': False}
user: 637        item: 1102       r_ui = 3.

user: 73         item: 1          r_ui = 2.00   est = 3.48   {'was_impossible': False}
user: 673        item: 288        r_ui = 4.00   est = 3.57   {'was_impossible': False}
user: 96         item: 265        r_ui = 5.00   est = 3.96   {'was_impossible': False}
user: 308        item: 853        r_ui = 5.00   est = 3.82   {'was_impossible': False}
user: 85         item: 1113       r_ui = 2.00   est = 3.21   {'was_impossible': False}
user: 634        item: 919        r_ui = 2.00   est = 3.33   {'was_impossible': False}
user: 711        item: 82         r_ui = 3.00   est = 3.34   {'was_impossible': False}
user: 864        item: 128        r_ui = 4.00   est = 3.68   {'was_impossible': False}
user: 569        item: 25         r_ui = 4.00   est = 3.57   {'was_impossible': False}
user: 399        item: 452        r_ui = 3.00   est = 2.12   {'was_impossible': False}
user: 549        item: 50         r_ui = 5.00   est = 4.66   {'was_impossible': False}
user: 749        item: 595        r_ui = 4.

user: 912        item: 654        r_ui = 3.00   est = 3.88   {'was_impossible': False}
user: 119        item: 40         r_ui = 4.00   est = 3.48   {'was_impossible': False}
user: 464        item: 328        r_ui = 3.00   est = 3.72   {'was_impossible': False}
user: 610        item: 591        r_ui = 3.00   est = 3.55   {'was_impossible': False}
user: 686        item: 185        r_ui = 5.00   est = 4.67   {'was_impossible': False}
user: 307        item: 71         r_ui = 5.00   est = 3.56   {'was_impossible': False}
user: 76         item: 223        r_ui = 2.00   est = 4.17   {'was_impossible': False}
user: 49         item: 385        r_ui = 1.00   est = 2.43   {'was_impossible': False}
user: 881        item: 11         r_ui = 4.00   est = 3.72   {'was_impossible': False}
user: 559        item: 311        r_ui = 3.00   est = 3.48   {'was_impossible': False}
user: 774        item: 406        r_ui = 1.00   est = 1.22   {'was_impossible': False}
user: 328        item: 97         r_ui = 3.

user: 538        item: 710        r_ui = 3.00   est = 3.46   {'was_impossible': False}
user: 835        item: 187        r_ui = 4.00   est = 4.03   {'was_impossible': False}
user: 660        item: 161        r_ui = 1.00   est = 2.44   {'was_impossible': False}
user: 332        item: 302        r_ui = 5.00   est = 5.00   {'was_impossible': False}
user: 725        item: 245        r_ui = 4.00   est = 3.28   {'was_impossible': False}
user: 222        item: 87         r_ui = 3.00   est = 3.47   {'was_impossible': False}
user: 634        item: 285        r_ui = 4.00   est = 4.17   {'was_impossible': False}
user: 892        item: 477        r_ui = 4.00   est = 3.66   {'was_impossible': False}
user: 805        item: 8          r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 450        item: 467        r_ui = 4.00   est = 4.10   {'was_impossible': False}
user: 555        item: 47         r_ui = 2.00   est = 4.14   {'was_impossible': False}
user: 577        item: 436        r_ui = 4.

user: 110        item: 2          r_ui = 3.00   est = 3.21   {'was_impossible': False}
user: 881        item: 63         r_ui = 4.00   est = 2.57   {'was_impossible': False}
user: 227        item: 250        r_ui = 2.00   est = 3.71   {'was_impossible': False}
user: 486        item: 1014       r_ui = 3.00   est = 2.90   {'was_impossible': False}
user: 314        item: 1276       r_ui = 4.00   est = 3.68   {'was_impossible': False}
user: 468        item: 647        r_ui = 5.00   est = 4.48   {'was_impossible': False}
user: 211        item: 890        r_ui = 2.00   est = 1.78   {'was_impossible': False}
user: 796        item: 96         r_ui = 4.00   est = 4.23   {'was_impossible': False}
user: 387        item: 679        r_ui = 5.00   est = 3.36   {'was_impossible': False}
user: 417        item: 804        r_ui = 3.00   est = 2.95   {'was_impossible': False}
user: 189        item: 13         r_ui = 4.00   est = 3.48   {'was_impossible': False}
user: 286        item: 417        r_ui = 3.

user: 586        item: 240        r_ui = 3.00   est = 2.45   {'was_impossible': False}
user: 321        item: 491        r_ui = 3.00   est = 3.84   {'was_impossible': False}
user: 200        item: 756        r_ui = 3.00   est = 3.51   {'was_impossible': False}
user: 795        item: 186        r_ui = 3.00   est = 3.42   {'was_impossible': False}
user: 405        item: 449        r_ui = 1.00   est = 1.00   {'was_impossible': False}
user: 49         item: 147        r_ui = 1.00   est = 2.80   {'was_impossible': False}
user: 239        item: 179        r_ui = 5.00   est = 4.34   {'was_impossible': False}
user: 1          item: 265        r_ui = 4.00   est = 3.80   {'was_impossible': False}
user: 712        item: 728        r_ui = 4.00   est = 3.76   {'was_impossible': False}
user: 561        item: 513        r_ui = 3.00   est = 3.56   {'was_impossible': False}
user: 561        item: 1139       r_ui = 1.00   est = 1.95   {'was_impossible': False}
user: 676        item: 546        r_ui = 3.

user: 760        item: 873        r_ui = 4.00   est = 2.69   {'was_impossible': False}
user: 911        item: 1060       r_ui = 4.00   est = 2.99   {'was_impossible': False}
user: 833        item: 227        r_ui = 2.00   est = 2.42   {'was_impossible': False}
user: 450        item: 589        r_ui = 3.00   est = 4.36   {'was_impossible': False}
user: 109        item: 441        r_ui = 2.00   est = 2.41   {'was_impossible': False}
user: 26         item: 150        r_ui = 3.00   est = 3.10   {'was_impossible': False}
user: 422        item: 276        r_ui = 5.00   est = 3.35   {'was_impossible': False}
user: 705        item: 97         r_ui = 3.00   est = 4.24   {'was_impossible': False}
user: 897        item: 183        r_ui = 5.00   est = 4.03   {'was_impossible': False}
user: 286        item: 95         r_ui = 5.00   est = 3.85   {'was_impossible': False}
user: 566        item: 192        r_ui = 5.00   est = 4.18   {'was_impossible': False}
user: 405        item: 1499       r_ui = 1.

user: 268        item: 161        r_ui = 3.00   est = 3.02   {'was_impossible': False}
user: 293        item: 401        r_ui = 1.00   est = 2.41   {'was_impossible': False}
user: 632        item: 735        r_ui = 4.00   est = 4.04   {'was_impossible': False}
user: 406        item: 418        r_ui = 5.00   est = 3.07   {'was_impossible': False}
user: 476        item: 1036       r_ui = 2.00   est = 2.57   {'was_impossible': False}
user: 437        item: 69         r_ui = 2.00   est = 3.22   {'was_impossible': False}
user: 764        item: 633        r_ui = 5.00   est = 4.09   {'was_impossible': False}
user: 894        item: 316        r_ui = 4.00   est = 4.18   {'was_impossible': False}
user: 378        item: 606        r_ui = 5.00   est = 4.12   {'was_impossible': False}
user: 796        item: 480        r_ui = 4.00   est = 4.08   {'was_impossible': False}
user: 727        item: 226        r_ui = 3.00   est = 2.99   {'was_impossible': False}
user: 416        item: 248        r_ui = 5.

user: 541        item: 1409       r_ui = 4.00   est = 3.22   {'was_impossible': False}
user: 308        item: 1252       r_ui = 3.00   est = 3.66   {'was_impossible': False}
user: 744        item: 276        r_ui = 4.00   est = 3.77   {'was_impossible': False}
user: 622        item: 162        r_ui = 3.00   est = 3.66   {'was_impossible': False}
user: 10         item: 603        r_ui = 5.00   est = 4.76   {'was_impossible': False}
user: 154        item: 143        r_ui = 3.00   est = 3.71   {'was_impossible': False}
user: 714        item: 1          r_ui = 3.00   est = 4.16   {'was_impossible': False}
user: 682        item: 216        r_ui = 4.00   est = 3.67   {'was_impossible': False}
user: 766        item: 40         r_ui = 3.00   est = 2.68   {'was_impossible': False}
user: 389        item: 386        r_ui = 3.00   est = 3.09   {'was_impossible': False}
user: 852        item: 323        r_ui = 3.00   est = 2.92   {'was_impossible': False}
user: 257        item: 237        r_ui = 2.

user: 874        item: 311        r_ui = 4.00   est = 3.76   {'was_impossible': False}
user: 201        item: 447        r_ui = 5.00   est = 2.99   {'was_impossible': False}
user: 943        item: 721        r_ui = 5.00   est = 3.14   {'was_impossible': False}
user: 663        item: 96         r_ui = 5.00   est = 4.08   {'was_impossible': False}
user: 18         item: 212        r_ui = 5.00   est = 3.73   {'was_impossible': False}
user: 385        item: 529        r_ui = 4.00   est = 4.06   {'was_impossible': False}
user: 316        item: 190        r_ui = 5.00   est = 3.60   {'was_impossible': False}
user: 883        item: 269        r_ui = 3.00   est = 3.95   {'was_impossible': False}
user: 213        item: 218        r_ui = 4.00   est = 3.99   {'was_impossible': False}
user: 660        item: 652        r_ui = 4.00   est = 3.18   {'was_impossible': False}
user: 6          item: 526        r_ui = 3.00   est = 3.80   {'was_impossible': False}
user: 206        item: 346        r_ui = 5.

user: 429        item: 231        r_ui = 2.00   est = 2.32   {'was_impossible': False}
user: 174        item: 41         r_ui = 1.00   est = 3.17   {'was_impossible': False}
user: 716        item: 153        r_ui = 4.00   est = 3.63   {'was_impossible': False}
user: 122        item: 212        r_ui = 5.00   est = 4.28   {'was_impossible': False}
user: 710        item: 483        r_ui = 5.00   est = 4.52   {'was_impossible': False}
user: 429        item: 223        r_ui = 4.00   est = 3.94   {'was_impossible': False}
user: 867        item: 211        r_ui = 3.00   est = 4.43   {'was_impossible': False}
user: 254        item: 451        r_ui = 2.00   est = 3.22   {'was_impossible': False}
user: 323        item: 151        r_ui = 4.00   est = 3.74   {'was_impossible': False}
user: 592        item: 1623       r_ui = 4.00   est = 3.66   {'was_impossible': False}
user: 199        item: 294        r_ui = 1.00   est = 2.78   {'was_impossible': False}
user: 200        item: 609        r_ui = 3.

user: 367        item: 559        r_ui = 4.00   est = 3.85   {'was_impossible': False}
user: 56         item: 1091       r_ui = 2.00   est = 3.04   {'was_impossible': False}
user: 886        item: 388        r_ui = 1.00   est = 2.32   {'was_impossible': False}
user: 591        item: 66         r_ui = 2.00   est = 3.41   {'was_impossible': False}
user: 893        item: 264        r_ui = 3.00   est = 2.92   {'was_impossible': False}
user: 305        item: 180        r_ui = 4.00   est = 3.32   {'was_impossible': False}
user: 524        item: 204        r_ui = 3.00   est = 3.59   {'was_impossible': False}
user: 254        item: 230        r_ui = 4.00   est = 3.31   {'was_impossible': False}
user: 338        item: 52         r_ui = 5.00   est = 3.76   {'was_impossible': False}
user: 655        item: 1174       r_ui = 3.00   est = 2.97   {'was_impossible': False}
user: 357        item: 275        r_ui = 5.00   est = 4.79   {'was_impossible': False}
user: 535        item: 275        r_ui = 4.

user: 385        item: 129        r_ui = 3.00   est = 3.27   {'was_impossible': False}
user: 425        item: 455        r_ui = 2.00   est = 2.94   {'was_impossible': False}
user: 380        item: 498        r_ui = 4.00   est = 3.47   {'was_impossible': False}
user: 60         item: 755        r_ui = 4.00   est = 3.53   {'was_impossible': False}
user: 615        item: 1192       r_ui = 4.00   est = 3.61   {'was_impossible': False}
user: 527        item: 657        r_ui = 4.00   est = 4.43   {'was_impossible': False}
user: 417        item: 228        r_ui = 3.00   est = 3.27   {'was_impossible': False}
user: 654        item: 215        r_ui = 4.00   est = 4.05   {'was_impossible': False}
user: 425        item: 190        r_ui = 3.00   est = 3.71   {'was_impossible': False}
user: 286        item: 174        r_ui = 4.00   est = 3.67   {'was_impossible': False}
user: 109        item: 425        r_ui = 2.00   est = 3.78   {'was_impossible': False}
user: 243        item: 737        r_ui = 3.

user: 243        item: 813        r_ui = 4.00   est = 3.57   {'was_impossible': False}
user: 749        item: 211        r_ui = 5.00   est = 4.26   {'was_impossible': False}
user: 840        item: 639        r_ui = 4.00   est = 4.11   {'was_impossible': False}
user: 90         item: 185        r_ui = 5.00   est = 4.31   {'was_impossible': False}
user: 901        item: 662        r_ui = 4.00   est = 3.79   {'was_impossible': False}
user: 42         item: 501        r_ui = 5.00   est = 3.87   {'was_impossible': False}
user: 181        item: 1387       r_ui = 1.00   est = 1.60   {'was_impossible': False}
user: 304        item: 259        r_ui = 1.00   est = 2.94   {'was_impossible': False}
user: 868        item: 153        r_ui = 2.00   est = 3.41   {'was_impossible': False}
user: 7          item: 594        r_ui = 3.00   est = 3.64   {'was_impossible': False}
user: 401        item: 26         r_ui = 3.00   est = 3.00   {'was_impossible': False}
user: 685        item: 882        r_ui = 3.

user: 416        item: 477        r_ui = 4.00   est = 3.37   {'was_impossible': False}
user: 87         item: 154        r_ui = 4.00   est = 3.97   {'was_impossible': False}
user: 326        item: 447        r_ui = 4.00   est = 2.59   {'was_impossible': False}
user: 881        item: 542        r_ui = 1.00   est = 2.74   {'was_impossible': False}
user: 886        item: 5          r_ui = 3.00   est = 3.21   {'was_impossible': False}
user: 617        item: 396        r_ui = 1.00   est = 2.57   {'was_impossible': False}
user: 580        item: 282        r_ui = 5.00   est = 3.69   {'was_impossible': False}
user: 475        item: 539        r_ui = 3.00   est = 2.58   {'was_impossible': False}
user: 437        item: 473        r_ui = 5.00   est = 3.29   {'was_impossible': False}
user: 838        item: 121        r_ui = 2.00   est = 3.88   {'was_impossible': False}
user: 586        item: 241        r_ui = 4.00   est = 3.39   {'was_impossible': False}
user: 606        item: 108        r_ui = 1.

user: 6          item: 180        r_ui = 4.00   est = 3.87   {'was_impossible': False}
user: 62         item: 405        r_ui = 3.00   est = 2.89   {'was_impossible': False}
user: 189        item: 175        r_ui = 5.00   est = 4.24   {'was_impossible': False}
user: 135        item: 203        r_ui = 4.00   est = 3.51   {'was_impossible': False}
user: 190        item: 282        r_ui = 3.00   est = 3.51   {'was_impossible': False}
user: 531        item: 312        r_ui = 5.00   est = 3.17   {'was_impossible': False}
user: 271        item: 659        r_ui = 3.00   est = 3.98   {'was_impossible': False}
user: 59         item: 323        r_ui = 4.00   est = 3.04   {'was_impossible': False}
user: 236        item: 411        r_ui = 1.00   est = 2.44   {'was_impossible': False}
user: 293        item: 895        r_ui = 3.00   est = 2.36   {'was_impossible': False}
user: 653        item: 712        r_ui = 3.00   est = 2.11   {'was_impossible': False}
user: 530        item: 255        r_ui = 4.

user: 227        item: 276        r_ui = 4.00   est = 3.81   {'was_impossible': False}
user: 679        item: 83         r_ui = 5.00   est = 3.95   {'was_impossible': False}
user: 519        item: 335        r_ui = 5.00   est = 3.54   {'was_impossible': False}
user: 334        item: 937        r_ui = 3.00   est = 3.36   {'was_impossible': False}
user: 712        item: 699        r_ui = 5.00   est = 4.09   {'was_impossible': False}
user: 456        item: 174        r_ui = 4.00   est = 4.02   {'was_impossible': False}
user: 554        item: 756        r_ui = 3.00   est = 3.14   {'was_impossible': False}
user: 181        item: 129        r_ui = 2.00   est = 2.16   {'was_impossible': False}
user: 43         item: 1          r_ui = 5.00   est = 3.75   {'was_impossible': False}
user: 807        item: 404        r_ui = 3.00   est = 3.86   {'was_impossible': False}
user: 566        item: 49         r_ui = 2.00   est = 2.81   {'was_impossible': False}
user: 435        item: 665        r_ui = 3.

In [16]:
# посчитаем метрики precision@k and recall@k для k=5 и порога отсечения 3.52
# https://github.com/NicolasHug/Surprise/blob/master/examples/precision_recall_at_k.py - готовая функция
# precision@k - отношение релевантных и рекомендованных фильмов, к количеству рекомендованных фильмов
# recall@k - отношение релевантных и рекомендованных фильмов, к количеству релевантных (тех, которые человек захочет посмотреть) фильмов
def precision_recall_at_k(predictions, k = 5, threshold = 3.52):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=3.52)

# усредняем по всем пользователям
precision_at_k = sum(prec for prec in precisions.values()) / len(precisions)
recall_at_k = sum(rec for rec in recalls.values()) / len(recalls)
print(precision_at_k)
print(recall_at_k)

0.7433828733191795
0.370768337830131


In [6]:
# получим рекомендации для человека 14
USER_INDEX = '14'
N = 5
userPredictions = list(filter(lambda x: x.uid == USER_INDEX, predictions))
top = sorted(userPredictions, key=lambda x: x.est, reverse=True)[0:N]
top

[Prediction(uid='14', iid='50', r_ui=5.0, est=4.774747708506592, details={'was_impossible': False}),
 Prediction(uid='14', iid='172', r_ui=5.0, est=4.724461503712341, details={'was_impossible': False}),
 Prediction(uid='14', iid='176', r_ui=1.0, est=4.6507253803560165, details={'was_impossible': False}),
 Prediction(uid='14', iid='603', r_ui=4.0, est=4.54496916723333, details={'was_impossible': False}),
 Prediction(uid='14', iid='127', r_ui=2.0, est=4.533140042757135, details={'was_impossible': False})]

In [7]:
# получим информацию о рекомендованных фильмах по их id
def getInfoForList(l):
    print(l)
    data_path = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    info = {}
    data = pd.read_csv(data_path, sep='|',encoding='ISO-8859-1', header = None) 
    for id in l:
        row = data.iloc[int(id)]
        info[id] = (row[1], row[2])

    return info
info = getInfoForList(list(map(lambda x: x.iid, top)))
print(info)

['50', '172', '176', '603', '127']
{'50': ('Legends of the Fall (1994)', '01-Jan-1994'), '172': ('Princess Bride, The (1987)', '01-Jan-1987'), '176': ('Good, The Bad and The Ugly, The (1966)', '01-Jan-1966'), '603': ('It Happened One Night (1934)', '01-Jan-1934'), '127': ('Supercop (1992)', '26-Jul-1996')}


In [8]:
# выведем результат
print('User {}'.format(USER_INDEX))
for pred in top:
    print('{}, {}, {}'.format(pred.iid, info[pred.iid], round(pred.est, 3)))

User 14
50, ('Legends of the Fall (1994)', '01-Jan-1994'), 4.775
172, ('Princess Bride, The (1987)', '01-Jan-1987'), 4.724
176, ('Good, The Bad and The Ugly, The (1966)', '01-Jan-1966'), 4.651
603, ('It Happened One Night (1934)', '01-Jan-1934'), 4.545
127, ('Supercop (1992)', '26-Jul-1996'), 4.533
