# Install library

# Import library

In [54]:
import numpy as np
from datasets import Dataset

# Code


In [None]:
def aggregate_reciprocal_fusion(scores_dataset, nonciting_dataset_final, top_k=100):
    mapping = nonciting_dataset_final.to_dict()

    def process_row(row, top_k):
        scores = np.array(row["scores"])
        sorted_indices = np.argsort(scores)[::-1]
        selected_scores = []
        selected_refs = []
        seen = set()
        for idx in sorted_indices:
            nonciting_app = mapping["app_nb"][idx]
            nonciting_pid = mapping["pid"][idx]
            if nonciting_app not in seen:
                seen.add(nonciting_app)
                selected_scores.append(float(scores[idx]))
                selected_refs.append((nonciting_app, nonciting_pid))
                if len(seen) >= top_k:
                    break
        return {
            "app_nb": row["app_nb"],
            "cid": row["cid"],
            "scores": selected_scores,
            "app_nb_pids": selected_refs
        }

    intermediate = [process_row(row, top_k) for row in scores_dataset]

    grouped = {}
    for item in intermediate:
        key = item["app_nb"]
        if key not in grouped:
            grouped[key] = {"scores": [], "app_nb_pids": []}
        grouped[key]["scores"].extend(item["scores"])
        grouped[key]["app_nb_pids"].extend(item["app_nb_pids"])

    final_results = []
    for app_nb, data in grouped.items():
        scores_arr = np.array(data["scores"])
        refs_list = data["app_nb_pids"]
        sorted_indices = np.argsort(scores_arr)[::-1]
        final_scores = []
        final_refs = []
        seen = set()
        for idx in sorted_indices:
            ref = refs_list[idx]
            nonciting_app = ref[0]
            if nonciting_app not in seen:
                seen.add(nonciting_app)
                final_scores.append(float(scores_arr[idx]))
                final_refs.append(ref)
                if len(seen) >= top_k:
                    break
        final_results.append({
            "app_nb": app_nb,
            "scores": final_scores,
            "app_nb_pids": final_refs
        })

    final_dataset = Dataset.from_dict({
        "app_nb": [item["app_nb"] for item in final_results],
        "scores": [item["scores"] for item in final_results],
        "app_nb_pids": [item["app_nb_pids"] for item in final_results]
    })
    return final_dataset

In [None]:
import random

# Paramètres pour la génération des données
num_app_ids = 10  # Nombre d'App_id différents
num_c_ids_per_app = 5  # Nombre de C_id par App_id
num_scores_per_c_id = 50  # Nombre de scores par C_id

# Générer des données
data = {
    'App_id': [],
    'C_id': [],
    'score': [],
    'index_ref': []
}

for app_id in range(1, num_app_ids + 1):
    for c_id in range(1, num_c_ids_per_app + 1):
        # Générer des scores aléatoires
        scores = np.array([random.uniform(0.0, 1.0) for _ in range(num_scores_per_c_id)])
        # Générer des indices de référence aléatoires
        index_ref = np.array(range(num_scores_per_c_id))
        random.shuffle(index_ref)

        # Ajouter les données
        data['App_id'].append(app_id)
        data['C_id'].append(c_id)
        data['score'].append(scores)
        data['index_ref'].append(index_ref)

# Convertir en un format de dataset

dataset = Dataset.from_dict(data)



out = aggregate_reciprocal_fusion(dataset, dataset, top_k=100)

{'App_id': 1, 'C_id': 1, 'score': [0.8046175876531323, 0.2897818882716302, 0.6603585133224931, 0.6339006015754431, 0.25650514822618453, 0.6934534622562989, 0.26959062444750237, 0.6352896450193604, 0.09428964944210427, 0.8192085941643029, 0.7564292298740162, 0.8951369504852561, 0.8103358915938108, 0.1615606804524834, 0.3929402316366044, 0.19627159371238834, 0.644846348701757, 0.7183061891457795, 0.9313179686353118, 0.11934739054135857, 0.3137957357864616, 0.012919090273134759, 0.9776939837646355, 0.12256883849994549, 0.3933584320486342, 0.8958751208365785, 0.5158328127033892, 0.9270546883019697, 0.5248901267526327, 0.3626724984589198, 0.9294980881165552, 0.1697552951243585, 0.9696857543231372, 0.9987690894765648, 0.3580324892371274, 0.5447961250207499, 0.5725959723156058, 0.09182497268786805, 0.9390434533982911, 0.5913630378842478, 0.11836373476686812, 0.7982467451231645, 0.8983464722036612, 0.6644076229358331, 0.03185714399132, 0.26711300629633594, 0.3843858081289959, 0.225477510906796