# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

## Część 1. - przygotowanie danych

In [1]:
# importujemy wszystkie potrzebne pakiety

import math
import numpy as np
import pandas

from collections import defaultdict

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [2]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('data/ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,10,8,9,9,8,9,9,8,8,10,...,9,8,8,8,8,8,8,8,7,9
2,5,4,5,5,5,4,4,4,4,5,...,5,4,4,5,4,4,4,4,3,4
3,5,3,4,4,4,4,4,4,4,5,...,4,3,4,4,4,4,4,4,3,4
4,9,7,7,8,7,8,8,7,7,8,...,7,6,6,7,7,6,6,7,6,8
5,6,4,5,5,5,5,5,4,4,5,...,5,5,4,5,4,4,5,4,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,9,7,8,8,7,8,8,7,7,8,...,7,7,7,7,7,7,7,7,6,8
607,9,7,8,8,7,7,8,7,7,8,...,7,7,6,7,6,7,7,7,5,7
608,8,6,7,7,7,7,7,6,6,7,...,6,6,6,6,6,6,6,6,5,7
609,6,4,5,5,5,5,5,4,4,5,...,4,4,4,4,4,4,4,4,4,5


In [3]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
groups

[[530, 102, 482, 554, 557],
 [599, 25, 385, 507, 154],
 [610, 583, 495, 329, 234],
 [431, 393, 409, 161, 312],
 [2, 62, 57, 324, 280],
 [346, 392, 523, 515, 32],
 [545, 408, 191, 579, 574],
 [144, 283, 83, 273, 316],
 [595, 477, 213, 419, 371],
 [36, 542, 563, 483, 556],
 [281, 505, 241, 352, 285],
 [386, 118, 382, 25, 171],
 [116, 420, 133, 546, 417],
 [492, 257, 288, 121, 217],
 [67, 353, 417, 388, 409],
 [504, 86, 400, 48, 441],
 [494, 356, 522, 325, 495],
 [601, 107, 50, 7, 264],
 [394, 451, 431, 293, 442],
 [433, 534, 70, 547, 315],
 [43, 35, 291, 519, 564],
 [253, 490, 489, 493, 477],
 [347, 404, 28, 423, 402],
 [420, 316, 112, 18, 447],
 [130, 43, 62, 155, 196],
 [517, 407, 311, 330, 359],
 [224, 21, 497, 17, 211],
 [441, 191, 585, 403, 51],
 [469, 349, 549, 243, 547],
 [475, 165, 331, 551, 483],
 [571, 592, 94, 10, 96],
 [400, 289, 594, 424, 260],
 [366, 17, 448, 167, 125],
 [508, 340, 480, 202, 551],
 [505, 296, 510, 43, 403],
 [422, 277, 358, 149, 260],
 [338, 12, 353, 281, 4

## Część 2. - algorytmy proste

In [4]:
# zdefiniujmy interfejs dla wszystkich algorytmow rekomendacyjnych

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass


# jako pierwszy zaimplementujemy algorytm losowy - dla porownania
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [5]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            for u in group:
                mean += ratings[m][u]
            mean /= len(group)
            movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies

In [6]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold

    def recommend(self, movies, ratings, group, size):
        def threshold(m, u):
            return self.score_threshold < ratings[m][u]
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            skip = False
            for u in group:
                if threshold(m, u):
                    mean += ratings[m][u]
                else:
                    skip = True
            mean /= len(group)
            if not skip:
                movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies


In [7]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        def key(x):
            return ratings[x][self.dictator_id]

        selected_movies = [
            x for x in sorted(movies, key=key, reverse=True)
        ][:size]
        return selected_movies

In [23]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
    
    def recommend(self, movies, ratings, group, size):
        def key(u):
            return lambda m: ratings[m][u]
        preferences = {u: [m for m in sorted(movies,key=key(u), reverse=True)] for u in group}
        selected_movies = []
        while len(selected_movies) < size:
            for user in group:
                for m in selected_movies:
                    if m in preferences[user]: preferences[user].remove(m)
                selected_movies += [preferences[user][0]]
        return selected_movies[:size]



In [20]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):
        user_count = defaultdict(lambda: 1)
        def key(x):
            score = 0
            for user in group:
                if ratings[x][user] > self.threshold:
                    score += 1/user_count[user]
            return score
        
        selected_movies = []
        while len(selected_movies) < size:
            movie = [m for m in sorted(movies, key=key, reverse=True)][0]
            movies.remove(movie)
            selected_movies += [movie]
            for user in group:
                if ratings[movie][user] > self.threshold:
                    user_count[user] += 1
        return selected_movies

## Część 3. - funkcje celu

In [16]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    def key(m):
        return ratings[m][user_id]
    sorted_movies = [m for m in sorted(movies, key=key, reverse=True)][:n]
    return sorted_movies

def total_score(recommendation, user_id, ratings):
    sum = 0
    for movie in recommendation:
        sum += ratings[movie][user_id]
    return sum

In [11]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    top_n = top_n_movies_for_user(ratings, movies, user_id, len(recommendation))
    top_n_score = total_score(top_n, user_id, ratings)
    recommendation_score = total_score(recommendation, user_id, ratings)
    return recommendation_score/top_n_score

# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [28]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
    def __init__(self):
        self.name = 'sequential_hybrid_aggregation'
        self.user_list_score = defaultdict(lambda: [])
        for user in users:
            self.user_list_score[user] = sorted(
                map(lambda m: ratings[m][user], movies),
                reverse=True
            )

    def recommend(self, movies, ratings, group, size):
        selected_movies = []
        alpha = 0.5

        def key(a):
            return lambda x: (1-a)*mean(ratings[x]) + a * min(ratings[x])

        def sat(user):
            gls = sum([ratings[m][user] for m in selected_movies])
            uls = sum(self.user_list_score[user][:len(selected_movies)])
            return gls/uls if uls != 0 else gls

        while len(selected_movies) < size:
            movie = [m for m in sorted(
                movies, key=key(alpha), reverse=True)][0]
            user_satisfactions = sorted([sat(u) for u in group], reverse=True)
            alpha = user_satisfactions[0] - user_satisfactions[-1]
            movies.remove(movie)
            selected_movies += [movie]

        return selected_movies


## Część 5. - porównanie algorytmów

In [38]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(4),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 20

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

for recommender in recommenders:
    recommendations = [
        (g, recommender.recommend(movies, ratings, g, recommendation_size))
        for g in groups
    ]
    ogs_values = [
        overall_group_satisfaction(r, g, movies, ratings)
        for (g, r) in recommendations
    ]
    ogs_mean = mean(ogs_values)
    ogs_std = stdev(ogs_values)
    gd_values = [
        group_dissatisfaction(r, g, movies, ratings)
        for (g, r) in recommendations
    ]
    gd_mean = mean(gd_values)
    gd_std = stdev(gd_values)
    print(f'''{recommender.name} results:
    overall group satisfaction:              group dissatisfaction:
    mean: {ogs_mean:.7f}    stdev: {ogs_std:.7f}      mean: {gd_mean:.7f}     stdev: {gd_std:.7f}
    ''')


random results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.8065868    stdev: 0.0240862      mean: 0.0884410     stdev: 0.0361738
    
average results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.9810668    stdev: 0.0175623      mean: 0.0582530     stdev: 0.0465903
    
average_without_misery results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.9810456    stdev: 0.0176502      mean: 0.0588590     stdev: 0.0481075
    
dictatorship results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.8975765    stdev: 0.0279877      mean: 0.1265888     stdev: 0.0480380
    
fairness results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.9137147    stdev: 0.0187721      mean: 0.1030793     stdev: 0.0330228
    
PAV results:
    overall group satisfaction:              group dissatisfaction:
    mean: 0.8879309    stdev: 0.0631348