In [None]:
import numpy as np
import pandas as pd
from typing import List

from collections import defaultdict
import itertools
import math

from tqdm import tqdm

u_features = np.load('../datasets/movie_lens/mf/U_features.npy')
i_features = np.load('../datasets/movie_lens/mf/I_features.npy')
print(u_features.shape)
print(i_features.shape)

def get_items_for_user(user_id):
    items_ratings = u_features[:, user_id] @ i_features
    items_ids_w_ratings = [(item_id, rating) for item_id, rating in enumerate(items_ratings)]
    items_ids_w_ratings.sort(key=lambda x: x[1], reverse=True)
    return items_ids_w_ratings

def get_items_for_users(users_id: List):
    items_ratings = np.minimum(5, np.maximum(0, i_features.T @ u_features[:, users_id]))
    return items_ratings
    
ratings = get_items_for_users([10,20,30])
ratings.shape

def select_top_n_idx(score_list, top_n, top='max', sort=True, exclude_idx=[]):
    if top != 'max' and top != 'min':
        raise ValueError('top must be either Max or Min')
    if top == 'max':
        score_list = -score_list

    select_top_n = top_n + len(exclude_idx)
    top_n_ind = np.argpartition(score_list, select_top_n)[:select_top_n]

    if sort:
        top_n_ind = top_n_ind[np.argsort(score_list[top_n_ind])]

    if exclude_idx:
        top_n_ind = [idx for idx in top_n_ind if idx not in exclude_idx]
    return top_n_ind[0:top_n]


a = np.array([2,1,6,7,8,9,3,4,5,10])
assert np.array_equal(select_top_n_idx(a, 3, top='max'), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min'), [1, 0, 6])
assert set(select_top_n_idx(a, 3, top='max', sort=False)) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False)) == {0, 1, 6}

assert np.array_equal(select_top_n_idx(a, 3, top='max', exclude_idx=[1]), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min', exclude_idx=[1]), [0, 6, 7])
assert set(select_top_n_idx(a, 3, top='max', sort=False, exclude_idx=[1])) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False, exclude_idx=[1])) == {0, 6, 7}


In [None]:
from time import sleep
from matplotlib.pyplot import axis


def ep_fuzzdhondt_algorithm(group_items, top_n, n_candidates=None, member_weights=None):
    group_size = group_items.shape[1]
    # print(group_size)

    if not member_weights:
        member_weights = [1./group_size] * group_size
    member_weights = np.array(member_weights)


    # print('Member weights: ', member_weights)
    # print('Member weights shape: ', member_weights.shape)
    

    # top_candidates_ids_per_member = np.apply_along_axis(lambda u_items: select_top_n(u_items, n_candidates), 0, group_items)
    # these are the original items ids
    # top_candidates_idx = np.array(sorted(set(top_candidates_ids_per_member.flatten())))
    # candidate_group_items = group_items[top_candidates_idx, :] # this is the first id mapping (to go back to original, index by top_candidates_idx)
    candidate_group_items = group_items # this is the first id mapping (to go back to original, index by top_candidates_idx)
    candidate_sum_utility = candidate_group_items.sum(axis=1)
    # print('Candidate sum utility: ', candidate_sum_utility)
    # print('Candidate sum utility shape: ', candidate_sum_utility.shape)

    total_user_utility_awarded = np.zeros(group_size)
    total_utility_awarded = 0.

    selected_items = []
    # top-n times select one item to the final list
    for i in range(top_n):
        # print()
        # print('Selecting item {}'.format(i))
        # print('Total utility awarded: ', total_utility_awarded)
        # print('Total user utility awarded: ', total_user_utility_awarded)

        prospected_total_utility = total_utility_awarded + candidate_sum_utility
        # print('Prospected total utility: ', prospected_total_utility)
        # print('Prospected total utility shape: ', prospected_total_utility.shape)

        # we need to stretch the dimension of the array to match the groups size, so that we can multiply it with the member weights
        stretched_prospected_total_utility = np.broadcast_to(np.expand_dims(prospected_total_utility, 1), (len(prospected_total_utility), group_size))
        allowed_utility_for_users = member_weights * stretched_prospected_total_utility
        unfulfilled_utility_for_users = np.maximum(0 , allowed_utility_for_users - total_user_utility_awarded)
        # print('Unfulfilled utility for users: ', unfulfilled_utility_for_users)
        # print('Unfulfilled utility for users shape: ', unfulfilled_utility_for_users.shape)
        candidate_relevance = np.minimum(unfulfilled_utility_for_users, candidate_group_items)
        candidate_relevance = np.sum(candidate_relevance, axis=1)
        # print('Candidate relevance: ', candidate_relevance)
        # print('Candidate relevance shape: ', candidate_relevance.shape)

        # we are repeating the candidate selection with the already selected items
        # we therefore have to exclude the already selected items from the candidate selection
        idx_of_top_item = list(select_top_n_idx(candidate_relevance, 1, exclude_idx=selected_items))[0]
        selected_items.append(idx_of_top_item)

        total_user_utility_awarded += candidate_group_items[idx_of_top_item]
        total_utility_awarded += candidate_group_items[idx_of_top_item].sum()

    return selected_items

group_size = 5

# load groups
groups = pd.read_csv('../notebooks/dfs/groups/kgrec/top_k_10.csv')
#concatenate first 5 columns to array of ints
groups = groups.iloc[:,:group_size].values
rec_it = []

for group_members in tqdm(groups):
    items = get_items_for_users(group_members)
    top_n_items = ep_fuzzdhondt_algorithm(items, 10)
    # print(items[top_n_items, :])
    rec_it.append(top_n_items)