In [8]:
import numpy as np
import pandas as pd
from typing import List

from collections import defaultdict
import itertools
import math

from tqdm import tqdm

u_features = np.load('../datasets/movie_lens/mf/U_features.npy')
i_features = np.load('../datasets/movie_lens/mf/I_features.npy')
print(u_features.shape)
print(i_features.shape)

def get_items_for_user(user_id):
    items_ratings = u_features[:, user_id] @ i_features
    items_ids_w_ratings = [(item_id, rating) for item_id, rating in enumerate(items_ratings)]
    items_ids_w_ratings.sort(key=lambda x: x[1], reverse=True)
    return items_ids_w_ratings

def get_items_for_users(users_id: List):
    items_ratings = i_features.T @ u_features[:, users_id]
    # items_ratings = np.minimum(5, np.maximum(0, i_features.T @ u_features[:, users_id]))
    return items_ratings
    
ratings = get_items_for_users([10,20,30])
ratings.shape


(200, 162541)
(200, 59047)


(59047, 3)

In [9]:
def select_top_n_idx(score_list, top_n, top='max', sort=True, exclude_idx=[]):
    if top != 'max' and top != 'min':
        raise ValueError('top must be either Max or Min')
    if top == 'max':
        score_list = -score_list

    select_top_n = top_n + len(exclude_idx)
    top_n_ind = np.argpartition(score_list, select_top_n)[:select_top_n]

    if sort:
        top_n_ind = top_n_ind[np.argsort(score_list[top_n_ind])]

    if exclude_idx:
        top_n_ind = [idx for idx in top_n_ind if idx not in exclude_idx]
    return top_n_ind[0:top_n]


a = np.array([2,1,6,7,8,9,3,4,5,10])
assert np.array_equal(select_top_n_idx(a, 3, top='max'), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min'), [1, 0, 6])
assert set(select_top_n_idx(a, 3, top='max', sort=False)) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False)) == {0, 1, 6}

assert np.array_equal(select_top_n_idx(a, 3, top='max', exclude_idx=[1]), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min', exclude_idx=[1]), [0, 6, 7])
assert set(select_top_n_idx(a, 3, top='max', sort=False, exclude_idx=[1])) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False, exclude_idx=[1])) == {0, 6, 7}

In [12]:
import os
from time import sleep
from matplotlib.pyplot import axis


def dhondt_do_algorithm(group_items, top_n, n_candidates=1000, member_weight=None):
    group_size = group_items.shape[1]
    if not member_weight:
        # will be normalized to 1 in the next step anyway, we can skip it here
        member_weight = [1] * group_size
    starting_voting_support = np.array(member_weight) / sum(member_weight)

    top_candidates_ids_per_member = np.apply_along_axis(lambda u_items: select_top_n_idx(u_items, n_candidates), 0, group_items)
    # these are the original items ids
    top_candidates_idx = np.array(sorted(set(top_candidates_ids_per_member.flatten())))
    candidate_group_items = group_items[top_candidates_idx, :] # this is the first id mapping (to go back to original, index by top_candidates_idx)

    # candidate_group_items = group_items # this is the first id mapping (to go back to original, index by top_candidates_idx)

    current_voting_support = starting_voting_support.copy()
    selected_items_relevance = np.zeros(group_size)
    selected_items = []
    # top-n times select one item to the final list
    for i in range(top_n):
        candidate_relevance = np.sum(candidate_group_items * current_voting_support, axis=1)
        idx_of_top_item = list(select_top_n_idx(candidate_relevance, 1, exclude_idx=selected_items))[0]

        selected_items.append(idx_of_top_item)
        selected_items_relevance += candidate_group_items[idx_of_top_item]
        current_voting_support = starting_voting_support / selected_items_relevance

    # now we need to get the original item ids from the final_candidates list and then top_candidates_idx
    final_top_candidates = top_candidates_idx[selected_items]
    return final_top_candidates

group_size = 5
group_type = 'top_k'

# load groups
groups = pd.read_csv('../notebooks/dfs/groups/kgrec/top_k_10.csv')
#concatenate first 5 columns to array of ints
groups = groups.iloc[:,:group_size].values
rec_it = []

for group_members in tqdm(groups):
    items = get_items_for_users(group_members)
    top_n_items = dhondt_do_algorithm(items, 10, n_candidates=1000)
    # print(items[top_n_items, :])
    rec_it.append(top_n_items)

os.makedirs(f'../results/gs-{group_size}/{group_type}', exist_ok=True)
np.save(f'../results/gs-{group_size}/{group_type}/fuzz_dhondt.npy', rec_it)
    

100%|██████████| 1000/1000 [00:19<00:00, 52.55it/s]
