# run experiments
for each dataset, for each group size and for each group
get items from the mf for each member of the group
these items will be the input of the algorithm

In [1]:
import numpy as np
import pandas as pd
from typing import List

from collections import defaultdict
import itertools
import math

from tqdm import tqdm

u_features = np.load('../datasets/kgrec/mf/U_features.npy')
i_features = np.load('../datasets/kgrec/mf/I_features.npy')
print(u_features.shape)
print(i_features.shape)

def get_items_for_user(user_id):
    items_ratings = u_features[:, user_id] @ i_features
    items_ids_w_ratings = [(item_id, rating) for item_id, rating in enumerate(items_ratings)]
    items_ids_w_ratings.sort(key=lambda x: x[1], reverse=True)
    return items_ids_w_ratings

def get_items_for_users(users_id: List):
    items_ratings = i_features.T @ u_features[:, users_id]
    # items_ratings = np.minimum(5, np.maximum(0, i_features.T @ u_features[:, users_id]))
    return items_ratings
    
# ratings = get_items_for_users([10,20,30])
# ratings.shape

def select_top_n_idx(score_list, top_n, top='max', sort=True, exclude_idx=[]):
    if top != 'max' and top != 'min':
        raise ValueError('top must be either Max or Min')
    if top == 'max':
        score_list = -score_list

    select_top_n = top_n + len(exclude_idx)
    top_n_ind = np.argpartition(score_list, select_top_n)[:select_top_n]

    if sort:
        top_n_ind = top_n_ind[np.argsort(score_list[top_n_ind])]

    if exclude_idx:
        top_n_ind = [idx for idx in top_n_ind if idx not in exclude_idx]
    return top_n_ind[0:top_n]


a = np.array([2,1,6,7,8,9,3,4,5,10])
assert np.array_equal(select_top_n_idx(a, 3, top='max'), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min'), [1, 0, 6])
assert set(select_top_n_idx(a, 3, top='max', sort=False)) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False)) == {0, 1, 6}

assert np.array_equal(select_top_n_idx(a, 3, top='max', exclude_idx=[1]), [9, 5, 4])
assert np.array_equal(select_top_n_idx(a, 3, top='min', exclude_idx=[1]), [0, 6, 7])
assert set(select_top_n_idx(a, 3, top='max', sort=False, exclude_idx=[1])) == {9, 5, 4}
assert set(select_top_n_idx(a, 3, top='min', sort=False, exclude_idx=[1])) == {0, 6, 7}


(50, 5199)
(50, 8640)


In [3]:
from collections import defaultdict
import itertools
import math
import os

from tqdm import tqdm

def get_top_n_idx(group_items, n_candidates):
    top_candidates_ids_per_member = np.apply_along_axis(lambda u_items: select_top_n_idx(u_items, n_candidates, sort=False), 0, group_items)
    top_candidates_idx = np.array(sorted(set(top_candidates_ids_per_member.flatten())))
    return top_candidates_idx

def avg_algorithm(group_items, top_n: int, n_candidates: int):
    """
    Returns items ordered by average rating.
    """
    top_candidates_idx = get_top_n_idx(group_items, n_candidates)
    candidate_group_items = group_items[top_candidates_idx, :] # this is the first id mapping (to go back to original, index by top_candidates_idx)
    
    means = candidate_group_items.mean(axis=1)
    top_n_idx = select_top_n_idx(means, top_n)

    final_top_n_idx = top_candidates_idx[top_n_idx]
    return final_top_n_idx

def lm_algorithm(group_items, top_n: int, n_candidates: int):
    """
    Returns items ordered by least min value across user rating.
    """
    top_candidates_idx = get_top_n_idx(group_items, n_candidates)
    candidate_group_items = group_items[top_candidates_idx, :] # this is the first id mapping (to go back to original, index by top_candidates_idx)

    mins = candidate_group_items.min(axis=1)
    top_n_idx = select_top_n_idx(mins, top_n)

    final_top_n_idx = top_candidates_idx[top_n_idx]
    return final_top_n_idx

def fai_algorithm(group_items, top_n: int, n_candidates: int):
    """
    Returns items ordered by max of users each one by one per turn.
    So first item is selected as max of first user, second item by second and so on...
    """
    top_candidates_idx = get_top_n_idx(group_items, n_candidates)
    candidate_group_items = group_items[top_candidates_idx, :] # this is the first id mapping (to go back to original, index by top_candidates_idx)

    group_size = candidate_group_items.shape[0]
    # apply select_top_n to each user
    top_n_required_per_user = math.ceil(top_n / group_size)
    top_n_idx_per_user = np.apply_along_axis(lambda row: select_top_n_idx(row, top_n_required_per_user), 1, candidate_group_items)
    # flatten the list to get the turn by turn top_n_idx
    top_n_idx = top_n_idx_per_user.flatten(order='F')[:top_n]

    final_top_n_idx = top_candidates_idx[top_n_idx]
    return final_top_n_idx


def get_rank(score_list_np):
    """Best item has a rank of 1"""
    ranks = np.zeros(score_list_np.shape, dtype=np.int32)
    ranks[score_list_np.argsort()] = np.arange(start=score_list_np.shape[0], stop=0, step=-1)
    return ranks


group_size = 5
group_type = 'top_k'

# load groups
groups = pd.read_csv('../notebooks/dfs/groups/kgrec/top_k_10.csv')
#concatenate first 5 columns to array of ints
groups = groups.iloc[:,:group_size].values

rec_it_avg = []
rec_it_lm = []
rec_it_fai = []

for group_members in tqdm(groups):
    items = get_items_for_users(group_members)

    # avg_algorithm
    top_n_items_avg = avg_algorithm(items, 10, 1000)
    rec_it_avg.append(top_n_items_avg)

    # lm_algorithm
    top_n_items_lm = lm_algorithm(items, 10, 1000)
    rec_it_lm.append(top_n_items_lm)

    # fai_algorithm
    top_n_items_fai = fai_algorithm(items, 10, 1000)
    rec_it_fai.append(top_n_items_fai)

# create directory if it doesn't exist
os.makedirs(f'../results/gs-{group_size}/{group_type}', exist_ok=True)
np.save(f'../results/gs-{group_size}/{group_type}/avg.npy', rec_it_avg)
np.save(f'../results/gs-{group_size}/{group_type}/lm.npy', rec_it_lm)
np.save(f'../results/gs-{group_size}/{group_type}/fai.npy', rec_it_fai)

  0%|          | 0/1000 [00:00<?, ?it/s]