In [1]:
from lenskit.util import init_rng
from lenskit.batch import predict, recommend, MultiEval
from lenskit.crossfold import partition_users,partition_rows,sample_rows, SampleN, SampleFrac
from lenskit.algorithms import basic,als,svd,funksvd, user_knn, item_knn
from lenskit.datasets import MovieLens, ML100K
from lenskit import topn, util, Recommender
from lenskit.metrics.predict import rmse, mae
from myItemBasedImplementation import MyItemBasedImplementation
#from bbcf import BiclusterBasedCF
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:

from tqdm.notebook import tqdm_notebook as tqdm
tqdm.pandas()

  from pandas import Panel


In [3]:
util.log_to_notebook()

[   INFO] lenskit.util.log notebook logging configured


# Load data - ML-20M, ML-Latest, ML-Latest-Small, ML100K, ML1M, ML10M

In [None]:
mlsmall = MovieLens('../Datasets/ml-latest-small')
mlsmall.ratings.head()

# Algorithms

In [None]:
#Personalized Mean Rating Prediction
algo_bias = basic.Bias()
algo_random = basic.Random(rng_spec=99)
algo_pop = basic.Popular()
algo_knnuu = user_knn.UserUser(nnbrs=20)
algo_knnii = item_knn.ItemItem(nnbrs=20)
##pure svd
algo_svd = svd.BiasedSVD(20)
## iterative SVD
algo_biasedmf = als.BiasedMF(20)
algo_implicitmf = als.ImplicitMF(20)
##funk svd
algo_funksvd =funksvd.FunkSVD(20,random_state=99)

# Generate Recommendations

User-based splitting - making sure each user is tested with the same number of ratings.
100836/5 = 20167 

In [None]:
eval = MultiEval('../Results/ml-latest-small', recommend=20)
#crossfold rows - 
crossfold_rows = list(partition_rows(mlsmall.ratings, 10,rng_spec=99))
eval.add_datasets(crossfold_rows, name="ML-Small-crossvalrows-10folds")
#splitted data
crossfold_users = list(partition_users(mlsmall.ratings, 10, SampleN(10),rng_spec=99))
eval.add_datasets(crossfold_users, name='ML-Small-crossvalusers-10folds-10users')

In [None]:
#esta a dar erro random
#eval.add_algorithms(Recommender.adapt(algo_random), name="Random")
eval.add_algorithms(algo_bias, name="Bias")
eval.add_algorithms(algo_pop, name='Pop')
eval.add_algorithms(algo_biasedmf, name='BiasedMF')
eval.add_algorithms(algo_implicitmf, name='ImplicitMF')
eval.add_algorithms(algo_funksvd,name="Funksvd")
#esta a dar erro svd
#eval.add_algorithms(algo_svd,name="Svd")
eval.add_algorithms(Recommender.adapt(algo_knnuu),name="Knn-User")
eval.add_algorithms(Recommender.adapt(algo_knnii),name="Knn-Item")


In [None]:
#eval.run(progress=tqdm)

# Analysis

In [None]:

runs = pd.read_csv('../Results/ml-latest-small/runs.csv')
runs.set_index('RunId', inplace=True)
runs.head()

In [None]:
recs = pd.read_parquet('../Results/ml-latest-small/recommendations.parquet')
recs.head()

In [None]:
ground_truth_rows = pd.concat((p.test for p in crossfold_rows), ignore_index=True)
ground_truth_users = pd.concat((p.test for p in crossfold_users), ignore_index=True)

ground_truth_rows

In [None]:
rla = topn.RecListAnalysis()
rla.add_metric(topn.recall)
rla.add_metric(topn.precision)
rla.add_metric(topn.ndcg)
raw_results_rows = rla.compute(recs, ground_truth_rows)
raw_results_users = rla.compute(recs, ground_truth_users)

raw_results_rows.head()

In [None]:
results_rows = raw_results_rows.join(runs[['name']], on='RunId')
results_users = raw_results_users.join(runs[['name']], on='RunId')
results_rows.head()

We can compute the overall average performance for each algorithm configuration - fillna makes the group-by happy with Popular's lack of a feature count:


In [None]:
results_rows.fillna(0).groupby(['name'])[['precision','recall','ndcg']].mean()
results_users.fillna(0).groupby(['name'])[['precision','recall','ndcg']].mean()

In [None]:
results_rows.fillna(0).groupby(['name'])[['precision','recall','ndcg']].mean().plot.bar()
results_users.fillna(0).groupby(['name'])[['precision','recall','ndcg']].mean().plot.bar()


# Teste do MyAlgorithm

In [4]:
init_rng(99)
mlsmall = ML100K('../Datasets/ml-100k')
ratings = mlsmall.ratings

[   INFO] lenskit.util.random initialized LensKit RNG with seed SeedSequence(
    entropy=99,
)
[   INFO] lenskit.util.random initializing numpy.random and random with seed 3328269970


In [None]:
algo_dict = dict()
algo_useruser = user_knn.UserUser(nnbrs=30,min_nbrs=1,min_sim=0.0000001,center=False)
algo_dict["UserUser-cosine"] = algo_useruser
algo_useruser_center = user_knn.UserUser(nnbrs=30,min_nbrs=1,min_sim=0.0000001)
algo_dict["UserUser-meancentered"] = algo_useruser_center

algo_itemitem = item_knn.ItemItem( nnbrs=10,min_nbrs=1,min_sim=0.0000001,center=False)
algo_dict["ItemItem-cosine"] = algo_itemitem
algo_itemitem_center = item_knn.ItemItem( nnbrs=10,min_nbrs=1,min_sim=0.0000001)
algo_dict["ItemItem-meancentered"] = algo_itemitem_center
algo_ibknn_adjustedcosine = MyItemBasedImplementation(nnbrs=10,min_nbrs=1,min_sim=0.0000001, sim_metric= "adjusted_cosine")
algo_dict["MIBKNN-adjustedcosine"] = algo_ibknn_adjustedcosine
algo_ibknn_cosine_corates_sarwar = MyItemBasedImplementation(nnbrs=10,min_nbrs=1,min_sim=0.0000001,sarwar=True, sim_metric= "cosine_corates")
algo_dict["MIBKNN-cosine-corates-sarwar"] = algo_ibknn_cosine_corates_sarwar
algo_ibknn_adjustedcosine_sarwar = MyItemBasedImplementation(nnbrs=10,min_nbrs=1,min_sim=0.0000001,sarwar=True, sim_metric= "adjusted_cosine")
algo_dict["MIBKNN-adjustedcosine-sarwar"] = algo_ibknn_adjustedcosine_sarwar   

In [None]:
def eval_prediction(aname, algo, train, test):

    model = algo.fit(train)
    pred = predict(model, test)
    # add the algorithm
    pred['Algorithm'] = aname
    return pred


#### User-based 5-fold cross-validation with 5 test rows per user

In [None]:
all_preds = dict()
test_data = []
for train, test in partition_users(ratings, 1, SampleN(5), rng_spec = 99):
    test_data.append(test)
    for name, algorithm in algo_dict.items():
        print(name)
        all_preds.setdefault(name, []).append(eval_prediction(name, algorithm, train, test))

In [None]:
def eval_accuracy_results(results):
    rmse_scores = []
    mae_scores = []
    for partition in results:
        rmse_scores.append(partition.groupby('user').apply(lambda df: rmse(df.prediction, df.rating)).mean())
        mae_scores.append(partition.groupby('user').apply(lambda df: mae(df.prediction, df.rating)).mean())
    return np.mean(rmse_scores), np.mean(mae_scores), rmse_scores, mae_scores

In [None]:
for algo_name in all_preds.keys():
    print(algo_name + ":")
    print("rmse - "  + str(round(eval_accuracy_results(all_preds[algo_name])[0],3)) +
         ", mae - " + str(round(eval_accuracy_results(all_preds[algo_name])[1],3)))


In [None]:
def eval_coverage(results_algo):
    num_nans = list()
    total_nans = 0
    total_rows = 0
    for partition in results_algo:
        nans = partition.prediction.isna().sum()
        rows = len(partition)
        num_nans.append((nans,rows))
        total_nans += nans
        total_rows += rows
    return (total_rows-total_nans)/total_rows, num_nans 

In [None]:
print("COVERAGE EVAL")
for algo_name in all_preds.keys():
    print(algo_name + ":")
    print("coverage - " + str(round(eval_coverage(all_preds[algo_name])[0],3)))

In [None]:
for algo_name in all_preds.keys():
    print(algo_name + ":")
    print("coverage - " + str(eval_coverage(all_preds[algo_name])[1]))

### Analise BBCF