In [30]:
from surprise import SVD
from surprise import KNNBaseline
from surprise import Dataset
from surprise import Reader
from surprise.similarities import cosine
import numpy as np
import pandas as pd

In [32]:
df = pd.read_csv('ml-latest-small/ratings.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [33]:
moviesDF = pd.read_csv("ml-latest-small/movies.csv", sep=",")
moviesDF.movieId = moviesDF.movieId.astype(int)
moviesDF.set_index("movieId", inplace=True)

In [34]:
# simplification of the evaluation case: predict last-k for each user
dfTrain = pd.DataFrame()
dfTest = pd.DataFrame()
for currUser in df.userId.unique():
    dataCurrUser = df[df.userId == currUser]
    currUserTrain = dataCurrUser.iloc[:-10]
    currUserTest = dataCurrUser.iloc[-10:]
    dfTrain = dfTrain.append(currUserTrain)
    dfTest = dfTest.append(currUserTest)

In [35]:
allObjects = df.movieId.unique().astype(str)
allTestSetUsers = dfTest.userId.unique().astype(str)
allObjects

array(['1', '3', '6', ..., '160836', '163937', '163981'], dtype='<U21')

In [36]:
dfTrain.to_csv('ml-latest-small/ratingsTrain.csv', index=False)
dfTest.to_csv('ml-latest-small/ratingsTest.csv', index=False)

In [37]:
file_path = 'ml-latest-small/ratingsTrain.csv'
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)

data = Dataset.load_from_file(file_path, reader=reader)
trainset = data.build_full_trainset()

In [None]:
def novelty(perUserRecommendations):
  # novelty = -log(num_users who have rated the item / num_users)
  num_users = len(dfTrain.userId.unique())
  acc_novelty = 0
  for pred in perUserRecommendations:
    num_rated = len(dfTrain[dfTrain["movieId"] == int(pred.iid)])
    acc_novelty -= np.log2(num_rated/num_users)
  # return average novelty
  return acc_novelty / len(perUserRecommendations)

In [None]:
def hit_rate(perUserRecommendations):
  hits = 0
  for pred in perUserRecommendations:
    if ((dfTest['userId'] == int(pred.uid)) & (dfTest['movieId'] == int(pred.iid))).any():
      hits += 1
  return hits / len(perUserRecommendations)

In [None]:
def binary_relevance_scores(recommendations: list) -> list:
  result = []
  for recommendation in recommendations:
    ratingDf = dfTest[dfTest.userId == int(recommendation.uid)]
    ratingDf = ratingDf[ratingDf.movieId == int(recommendation.iid)]

    # recommendation iif is relevant if has been reviewed by the user uid
    relevance = 1.0 if not ratingDf.empty else 0.0
    result.append(relevance)

  return np.array(result)

def ndcg(perUserRecommendations):

  def dcg(rel_scores):
    pos = np.arange(1, rel_scores.shape[0] + 1)
    return np.sum(
      rel_scores / np.log2(pos + 1)
    )

  relevance_scores = binary_relevance_scores(perUserRecommendations)
  hits = int(np.sum(relevance_scores))
  # all relevant movies are placed first
  ideal_relevance_scores = np.array(
    ([1] * hits) + ([0] * (len(perUserRecommendations) - hits))
  )
  actual_dcg, ideal_dcg = dcg(relevance_scores), dcg(ideal_relevance_scores)

  if actual_dcg == 0.0:
    return 0.0

  return actual_dcg / ideal_dcg

In [1]:
def evaluate(perUserRecommendations):
    #implement evaluation metrics here
    # some accuracy metric is a baseline (precision@k, nDCG, MAP,...)
    # then implement some beyond-accuracy metric (diversity, novelty, coverage, popularity bias,...)    
    # some metrics already implemented somewhere:-) 
    # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html

    m1 = hit_rate(perUserRecommendations)
    m2 = novelty(perUserRecommendations)
    m3 = ndcg(perUserRecommendations)

    return (m1, m2, m3)

def recommend(perUserPredictions, top_k):
    # select which items should be recommended
    # baseline is selection of top-k items with highest estimated ratingpredict
    # you can implement some diversity / novelty / coverage enhancements here
    return sorted(perUserPredictions, key=lambda x: x.est, reverse=True)[:top_k]

def metricStatistics(perUserMetrics):
    # aggregate per-user metrics into an overall statistic
    # baseline is mean, but you can be more creative
    # one other option (needs results of all hyperparam settings) is to compare how many times the algorithm provided better / worse recommendation than other alternatives
    mean = np.mean(perUserMetrics, axis=0)
    median = np.median(perUserMetrics, axis=0)
    var = np.var(perUserMetrics, axis=0)
    std = np.std(perUserMetrics, axis=0)

    return np.array([mean, median, var, std])

def pickBestVariant(results):
    # based on the results of the evaluation, select best-performing method
    # do the selection based on individual metrics as baseline
    # or think about how to make an aggregated selection based on multiple metrics
    # ideally, visualize the results to see the tradeoff between metrics
    pass


In [40]:
algs = [SVD(), KNNBaseline()]
for alg in algs:
    alg.fit(trainset)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.


In [41]:
# todo use some hyperparameter tuning loop here
# ideally, try more than one algorithm

results = []

for alg in algs:
    metricsPerUser = []
    for uid in allTestSetUsers:
        perUserPredictions = []
        for oid in allObjects:
            perUserPredictions.append(alg.predict(uid,oid, clip=False))

        recs = recommend(perUserPredictions, 20)
        #for rec in recs:
        #    print(rec)
        #break

        (m1, m2, m3) = evaluate(recs)
        print(f"{m1} || {m2} || {m3}")
        # it may be necessary to collect additional information for evaluate() e.g. known ratings similarity matrix etc.
        metricsPerUser.append(np.array([m1, m2, m3]))

    metricsPerUser = np.array(metricsPerUser)
    results_per_alg = metricStatistics(metricsPerUser)
    
    # accumulate m1, m2 to sth. like metricsPerUser
    results.append(results_per_alg)

pickBestVariant(np.array(results))
# results = metricStatistics(metricsPerUser)

user: 1          item: 246        r_ui = None   est = 5.22   {'was_impossible': False}
user: 1          item: 541        r_ui = None   est = 5.22   {'was_impossible': False}
user: 1          item: 1276       r_ui = None   est = 5.19   {'was_impossible': False}
user: 1          item: 2160       r_ui = None   est = 5.18   {'was_impossible': False}
user: 1          item: 7361       r_ui = None   est = 5.13   {'was_impossible': False}
user: 1          item: 5690       r_ui = None   est = 5.13   {'was_impossible': False}
user: 1          item: 318        r_ui = None   est = 5.11   {'was_impossible': False}
user: 1          item: 750        r_ui = None   est = 5.10   {'was_impossible': False}
user: 1          item: 3468       r_ui = None   est = 5.08   {'was_impossible': False}
user: 1          item: 1225       r_ui = None   est = 5.07   {'was_impossible': False}
user: 1          item: 2019       r_ui = None   est = 5.07   {'was_impossible': False}
user: 1          item: 4993       r_ui = No

In [51]:
# Use the knn.
algoKNN = KNNBaseline()
algoKNN.fit(trainset)
pred = algoKNN.predict("1","2", clip=False)
print(pred)
print(pred.uid, pred.iid, pred.est)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
user: 1          item: 2          r_ui = None   est = 4.15   {'actual_k': 40, 'was_impossible': False}
1 2 4.148609180175668
