# Laboratorium 6 - rekomendacje grupowe

## Przygotowanie

 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab6`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas matplotlib`

## Część 1. - przygotowanie danych

In [1]:
# importujemy wszystkie potrzebne pakiety

import math
import numpy as np
import pandas

from collections import defaultdict

from random import choice, sample
from statistics import mean, stdev

from reco_utils import *

In [3]:
# wczytujemy oceny uytkownikow i obliczamy (za pomocą collaborative filtering) wszystkie przewidywane oceny filmow

raw_ratings = pandas.read_csv('data/ml-latest-small/ratings.csv').drop(columns=['timestamp'])
movies = list(raw_ratings['movieId'].unique())
users = list(raw_ratings['userId'].unique())
ratings = get_predicted_ratings(raw_ratings)
ratings

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,9,8,8,7,10,10,7,9,9,9,...,8,7,8,7,8,8,9,7,7,9
2,5,5,5,4,6,6,4,5,5,6,...,5,4,5,4,4,5,6,4,4,5
3,4,4,3,3,5,5,3,4,4,4,...,4,3,4,3,4,4,4,3,3,4
4,8,7,7,6,8,8,6,7,7,8,...,6,6,7,6,6,7,7,5,6,8
5,5,5,5,4,6,6,4,6,5,6,...,5,5,5,4,5,5,6,4,4,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,8,7,7,6,9,9,6,8,8,8,...,7,6,7,6,7,7,8,6,6,8
607,8,7,7,6,9,8,6,7,7,8,...,7,6,6,6,7,7,8,6,6,8
608,6,6,6,5,8,7,5,7,7,7,...,6,5,6,5,6,6,7,5,5,7
609,5,4,4,4,6,5,3,5,4,5,...,4,4,4,4,4,4,5,4,4,5


In [4]:
# definiujemy testowe grupy uzytkownikow, dla ktorych bedziemy generowac rekomendacje

groups_no = 50
group_size = 5
groups = [sample(users, group_size) for i in range(groups_no)]
groups

[[539, 187, 415, 467, 369],
 [464, 426, 550, 248, 288],
 [37, 545, 401, 535, 268],
 [26, 408, 148, 304, 345],
 [326, 289, 390, 253, 517],
 [471, 382, 555, 410, 347],
 [235, 370, 450, 486, 154],
 [380, 540, 51, 550, 217],
 [159, 20, 179, 412, 249],
 [204, 78, 530, 206, 100],
 [599, 333, 205, 242, 393],
 [605, 553, 254, 226, 231],
 [460, 62, 491, 98, 366],
 [91, 468, 22, 265, 570],
 [38, 77, 147, 513, 407],
 [451, 134, 177, 264, 578],
 [75, 106, 128, 95, 379],
 [249, 295, 537, 538, 278],
 [540, 246, 578, 316, 201],
 [562, 320, 574, 501, 437],
 [234, 372, 263, 605, 496],
 [276, 393, 95, 531, 521],
 [583, 50, 153, 91, 21],
 [560, 34, 454, 393, 259],
 [515, 420, 208, 138, 179],
 [349, 544, 69, 307, 447],
 [454, 123, 373, 240, 507],
 [532, 498, 578, 439, 512],
 [536, 164, 74, 415, 226],
 [182, 130, 346, 8, 297],
 [230, 52, 410, 99, 96],
 [462, 318, 225, 605, 579],
 [489, 164, 558, 581, 197],
 [121, 600, 491, 106, 409],
 [196, 78, 295, 3, 554],
 [169, 126, 357, 353, 402],
 [302, 440, 403, 391

## Część 2. - algorytmy proste

In [5]:
# zdefiniujmy interfejs dla wszystkich algorytmow rekomendacyjnych

class Recommender:
    def recommend(self, movies, ratings, group, size):
        pass


# jako pierwszy zaimplementujemy algorytm losowy - dla porownania
    
class RandomRecommender(Recommender):
    def __init__(self):
        self.name = 'random'
        
    def recommend(self, movies, ratings, group, size):
        return sample(movies, size)

In [6]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen

class AverageRecommender(Recommender):
    def __init__(self):
        self.name = 'average'
    
    def recommend(self, movies, ratings, group, size):
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            for u in group:
                mean += ratings[m][u]
            mean /= len(group)
            movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies

In [7]:
# algorytm rekomendujacy filmy o najwyzszej sredniej ocen,
#   ale rownoczesnie wykluczajacy te filmy, ktore otrzymaly choc jedna ocene ponizej thresholdu

class AverageWithoutMiseryRecommender(Recommender):
    def __init__(self, score_threshold):
        self.name = 'average_without_misery'
        self.score_threshold = score_threshold

    def recommend(self, movies, ratings, group, size):
        def threshold(m, u):
            return self.score_threshold < ratings[m][u]
        movie_means = defaultdict(lambda: 0)
        for m in movies:
            mean = 0
            skip = False
            for u in group:
                if threshold(m, u):
                    mean += ratings[m][u]
                else:
                    skip = True
            mean /= len(group)
            if not skip:
                movie_means[m] = mean

        selected_movies = [
            x[0] for x in sorted(movie_means.items(), key=lambda x: x[1], reverse=True)
        ][:size]
        return selected_movies


In [None]:
# algorytm uwzgledniajacy preferencje tylko jednego uzytkownika

class DictatorshipRecommender(Recommender):
    def __init__(self, dictator_id):
        self.name = 'dictatorship'
        self.dictator_id = dictator_id
        
    def recommend(self, movies, ratings, group, size):
        def key(x):
            return ratings[x][self.dictator_id]

        selected_movies = [
            x for x in sorted(movies, key=key, reverse=True)
        ][:size]
        return selected_movies

In [8]:
# algorytm, ktory w kazdej turze uwzglednia preferencje tylko jednego, kolejnego uzytkownika

class FairnessRecommender(Recommender):
    def __init__(self):
        self.name = 'fairness'
    
    def recommend(self, movies, ratings, group, size):
        def key(u):
            return lambda m: ratings[m][u]
        preferences = {u: [m for m in sorted(movies,key=key(u), reverse=True)] for u in group}
        selected_movies = []
        while len(selected_movies) < size:
            for user in group:
                preferences[user] -= selected_movies
                selected_movies += [preferences[user][0]]
        return selected_movies[:size]



In [9]:
# algorytm zachlanny, aproksymujacy metode Proportional Approval Voting
#   w kazdej iteracji wybieramy ten film, ktory najbardziej zwieksza zadowolenie zgodnie z punktacja PAV

class PAVRecommender(Recommender):
    def __init__(self, threshold):
        self.threshold = threshold
        self.name = 'PAV'
        
    def recommend(self, movies, ratings, group, size):
        user_count = defaultdict(lambda: 1)
        def key(x):
            score = 0
            for user in group:
                if ratings[x][user] > self.threshold:
                    score += 1/user_count[user]
            return score
        
        selected_movies = []
        while len(selected_movies) < size:
            movie = [m for m in sorted(movies, key=key, reverse=True)][0]
            movies -= [movie]
            selected_movies += [movie]
            for user in group:
                if ratings[movie][user] > self.threshold:
                    user_count[user] += 1
        return selected_movies

## Część 3. - funkcje celu

In [10]:
# dwie funkcje pomocnicze:
#  - znajdujaca ulubione filmy danego uzytkownika
#  - obliczajaca sume ocen wystawionych przez uzytkownika wszystkim filmom w rekomendacji

def top_n_movies_for_user(ratings, movies, user_id, n):
    def key(m):
        return ratings[m][user_id]
    sorted_movies = [m for m in sorted(movies, key=key, reverse=True)][:n]
    return sorted_movies

def total_score(recommendation, user_id, ratings):
    sum = 0
    for movie in recommendation:
        sum += ratings[movie][user_id]
    return sum

In [11]:
# funkcja obliczajaca zadowolenie pojedynczego uzytkownika
#  - iloraz zadowolenia z wygenerowanej rekomendacji oraz zadowolenia z hipotetycznej rekomendacji idealnej
def overall_user_satisfaction(recommendation, user_id, movies, ratings):
    top_n = top_n_movies_for_user(ratings, movies, user_id, len(recommendation))
    top_n_score = total_score(top_n, user_id, ratings)
    recommendation_score = total_score(recommendation, user_id, ratings)
    return recommendation_score/top_n_score

# funkcja celu - srednia z zadowolenia wszystkich uzytkownikow w grupie
def overall_group_satisfaction(recommendation, group, movies, ratings):
    return 1.0 * sum([overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]) / len(group)

# funkcja celu - roznica miedzy maksymalnym i minimalnym zadowolenie w grupie
def group_dissatisfaction(recommendation, group, movies, ratings):
    satisfaction_scores = [overall_user_satisfaction(recommendation, user_id, movies, ratings) for user_id in group]
    return max(satisfaction_scores) - min(satisfaction_scores)

## Część 4. - Sequential Hybrid Aggregation

In [None]:
# algorytm balansujacy pomiedzy wyborem elementow o najwyzszej sredniej ocen
#   i o najwyzszej minimalnej ocenie
#   wyliczajacy w kazdej iteracji parametr alfa - jak na wykladzie
class SequentialHybridAggregationRecommender(Recommender):
    def __init__(self):
        self.name = 'sequential_hybrid_aggregation'
    
    def recommend(self, movies, ratings, group, size):
        pass

## Część 5. - porównanie algorytmów

In [None]:
recommenders = [
    RandomRecommender(),
    AverageRecommender(),
    AverageWithoutMiseryRecommender(2),
    DictatorshipRecommender(1),
    FairnessRecommender(),
    PAVRecommender(4),
    SequentialHybridAggregationRecommender()
]

recommendation_size = 10

# dla kazdego algorytmu:
#  - wygenerujmy jedna rekomendacje dla kazdej grupy
#  - obliczmy wartosci funkcji celu dla kazdej rekomendacji
#  - obliczmy srednia i odchylenie standardowe wartosci funkcji celu
#  - wypiszmy wyniki na konsole

for recommender in recommenders:
    # ...