# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

## Część 1. - przygotowanie danych

In [2]:
# importujemy wszystkie potrzebne pakiety

import math
import numpy as np
import pandas

from collections import defaultdict

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [21]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('data/ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,9,7,9,8,9,9,8,9,9,...,8,8,9,8,9,8,8,8,9,8
2,6,5,4,5,5,5,5,4,5,5,...,4,5,5,5,5,5,4,4,5,5
3,4,4,3,4,4,4,4,3,4,4,...,4,4,4,4,4,4,4,4,4,4
4,8,7,6,7,7,7,7,6,6,7,...,6,7,7,6,7,7,7,6,7,6
5,6,5,4,5,5,6,5,4,5,5,...,5,5,5,5,5,5,5,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,7,7,8,7,8,7,7,7,8,...,6,7,7,7,8,7,7,7,8,7
607,8,7,6,8,7,8,7,6,7,8,...,7,7,7,7,8,7,7,7,7,7
608,7,6,6,7,6,7,6,5,6,7,...,6,6,6,6,7,6,6,6,7,6
609,5,4,4,5,4,5,4,3,4,4,...,4,4,4,4,4,4,5,4,5,4


In [4]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
groups

[[372, 598, 173, 414, 227],
 [351, 547, 22, 428, 429],
 [539, 421, 258, 594, 368],
 [192, 62, 342, 53, 72],
 [304, 476, 485, 409, 16],
 [499, 226, 212, 253, 356],
 [242, 575, 262, 596, 149],
 [156, 429, 154, 280, 3],
 [45, 365, 43, 112, 540],
 [270, 328, 374, 472, 359],
 [177, 454, 8, 321, 332],
 [226, 281, 314, 6, 7],
 [424, 145, 261, 592, 500],
 [108, 242, 519, 76, 225],
 [357, 135, 417, 65, 327],
 [419, 157, 271, 59, 79],
 [523, 207, 231, 574, 396],
 [486, 72, 456, 224, 607],
 [159, 103, 578, 527, 381],
 [353, 577, 180, 334, 157],
 [402, 259, 534, 375, 18],
 [518, 569, 215, 241, 101],
 [26, 508, 342, 146, 537],
 [564, 69, 588, 376, 346],
 [267, 351, 141, 331, 147],
 [361, 92, 228, 80, 188],
 [47, 195, 341, 133, 333],
 [330, 230, 418, 478, 265],
 [20, 457, 207, 270, 237],
 [293, 36, 309, 248, 302],
 [195, 143, 26, 149, 174],
 [156, 560, 524, 117, 571],
 [312, 167, 186, 603, 491],
 [480, 451, 360, 39, 228],
 [395, 415, 394, 542, 511],
 [347, 237, 496, 229, 439],
 [2, 400, 182, 233, 55

## Część 2. - algorytmy proste

In [5]:
# zdefiniujmy interfejs dla wszystkich algorytmow rekomendacyjnych

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass


# jako pierwszy zaimplementujemy algorytm losowy - dla porownania
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [6]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            for u in group:
                mean += ratings[m][u]
            mean /= len(group)
            movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies

In [7]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold

    def recommend(self, movies, ratings, group, size):
        def threshold(m, u):
            return self.score_threshold < ratings[m][u]
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            skip = False
            for u in group:
                if threshold(m, u):
                    mean += ratings[m][u]
                else:
                    skip = True
            mean /= len(group)
            if not skip:
                movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies


In [8]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        def key(x):
            return ratings[x][self.dictator_id]

        selected_movies = [
            x for x in sorted(movies, key=key, reverse=True)
        ][:size]
        return selected_movies

In [9]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
    
    def recommend(self, movies, ratings, group, size):
        def key(u):
            return lambda m: ratings[m][u]
        preferences = {u: [m for m in sorted(movies,key=key(u), reverse=True)] for u in group}
        selected_movies = []
        while len(selected_movies) < size:
            for user in group:
                preferences[user] -= selected_movies
                selected_movies += [preferences[user][0]]
        return selected_movies[:size]



In [10]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):
        user_count = defaultdict(lambda: 1)
        def key(x):
            score = 0
            for user in group:
                if ratings[x][user] > self.threshold:
                    score += 1/user_count[user]
            return score
        
        selected_movies = []
        while len(selected_movies) < size:
            movie = [m for m in sorted(movies, key=key, reverse=True)][0]
            movies -= [movie]
            selected_movies += [movie]
            for user in group:
                if ratings[movie][user] > self.threshold:
                    user_count[user] += 1
        return selected_movies

## Część 3. - funkcje celu

In [28]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    def key(m):
        return ratings[m][user_id]
    sorted_movies = [m for m in sorted(movies, key=key, reverse=True)][:n]
    return sorted_movies

def total_score(recommendation, user_id, ratings):
    sum = 0
    for movie in recommendation:
        sum += ratings[movie][user_id]
    return sum

In [12]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    top_n = top_n_movies_for_user(ratings, movies, user_id, len(recommendation))
    top_n_score = total_score(top_n, user_id, ratings)
    recommendation_score = total_score(recommendation, user_id, ratings)
    return recommendation_score/top_n_score

# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [13]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
    def __init__(self):
        self.name = 'sequential_hybrid_aggregation'

    def recommend(self, movies, ratings, group, size):
        selected_movies = []
        alpha = 0.5
        user_list_score = defaultdict(lambda: [])
        for user in group:
            user_list_score[user] = sorted(
                map(lambda m: ratings[m][user], movies),
                reverse=True
            )

        def key(a):
            return lambda x: (1-a)*mean(ratings[x]) + a * min(ratings[x])

        def sat(user):
            gls = sum([ratings[m][user] for m in selected_movies])
            uls = sum(user_list_score[user][:len(selected_movies)])
            return gls/uls

        while len(selected_movies) < size:
            movie = [m for m in sorted(
                movies, key=key(alpha), reverse=True)][0]
            user_satisfactions = sorted([sat(u) for u in group], reverse=True)
            alpha = user_satisfactions[0] - user_satisfactions[-1]
            movies -= [movie]
            selected_movies += [movie]

        return selected_movies


## Część 5. - porównanie algorytmów

In [29]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(4),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

for recommender in recommenders:
    recommendations = [
        (g, recommender.recommend(movies, ratings, g, recommendation_size))
        for g in groups
    ]
    obj_fun_values = [
        overall_group_satisfaction(r, g, movies, ratings)
        for (r, g) in recommendations
    ]
    obj_fun_mean = mean(obj_fun_values)
    obj_fun_std = stdev(obj_fun_values)
    print(f'{recommender.name} results:\n mean: {obj_fun_mean} stdev: {obj_fun_std}')


KeyError: 88932

In [27]:
print(sorted(movies, key=lambda x: ratings[x][1]))

[940, 74789, 3551, 2990, 31116, 63876, 142997, 167370, 50685, 52287, 112450, 3576, 106927, 164707, 27317, 51939, 3150, 3711, 91483, 101076, 6337, 27822, 71379, 2460, 32314, 4012, 4453, 2523, 86593, 116207, 2930, 4704, 4860, 8153, 8465, 8730, 25769, 2483, 160422, 3353, 44238, 93208, 121035, 144522, 803, 47384, 67168, 1009, 1030, 1377, 2193, 2616, 3053, 3247, 3386, 2851, 3210, 5746, 417, 2921, 87, 237, 276, 330, 355, 374, 477, 835, 8965, 33836, 38388, 6535, 51834, 58047, 59421, 63992, 193, 3156, 71057, 120466, 2412, 47200, 53318, 59784, 66934, 68319, 73321, 88094, 103772, 112334, 372, 486, 1081, 1373, 1550, 1556, 1591, 1862, 1970, 2123, 2245, 2294, 2374, 2458, 2883, 3005, 3400, 3401, 3536, 3584, 3686, 3692, 5785, 6156, 6358, 7569, 46972, 52730, 61160, 102716, 103372, 109864, 111781, 173307, 334, 2579, 6234, 1083, 2358, 5055, 27408, 44761, 54736, 55116, 56607, 1672, 1841, 3766, 65230, 413, 433, 1513, 3694, 4132, 4322, 4477, 833, 1390, 1866, 3196, 391, 82767, 137337, 115203, 122898, 135456