In [None]:
# Imports
import os
import sys
import networkx as nx

# Ensure notebook is running from Tournesol-Stats dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	_pwd.pop() # Go up from src dir to Tournesol-Stats
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
sys.path.append('src/py')
from dao.tournesol_api import TournesolCommonAPI, TournesolUserAPI, get, get_individual_score

In [None]:
# PARAMETERS
TOURNESOL_COMMON=TournesolCommonAPI('./data/TournesolAPI_cache')
TOURNESOL_API=TournesolUserAPI(TOURNESOL_COMMON, input('JWT (example: "Bearer xxxxxxxxx")'))
try:
	TOURNESOL_API.loadCache() # Load existing cache file
	print('Common cache records', len(TOURNESOL_COMMON.videos), 'videos')
	print('User cache records', len(TOURNESOL_API.videos), 'videos')
	print('User cache records', len(TOURNESOL_API.comparisons), 'comparisons')
except:
	f = TOURNESOL_API.saveCache() # Initiate cache file if could not load
	print('Created file', f)

In [None]:
# Utils
BASE = 5
def cntr_to_target_comparisons(contributors):
	if contributors <= 1:
		return 1
	target = min(BASE - 1,contributors)
	cc = contributors//BASE
	while cc > 0:
		target += min(BASE - 1, cc)
		cc //= BASE
	return target

def cmps_to_target_contributors(target: int) -> tuple[int, int]:
	if target < 1:
		return None
	return (
		(BASE**((target-1)//(BASE-1))) * (((target-1)%(BASE-1))+1),
		(BASE**((target-1)//(BASE-1))) * (((target-1)%(BASE-1))+2)-1
	)

In [None]:
# Load dataset
TOURNESOL_API.getMyComparedVideos()
comparisons = TOURNESOL_API.getAllMyComparisons()

# Prepare graphs
def prepare_graphs():
	public_undgraph = nx.Graph()
	private_digraph = nx.DiGraph()
	for cdata in comparisons:
		if cdata.get('is_public', False):
			public_undgraph.add_edge(cdata['entity_a'], cdata['entity_b'])

		score = [dta['score'] for dta in cdata['criteria_scores'] if dta['criteria'] == 'largely_recommended'][0]
		if score >= 0:
			private_digraph.add_edge(cdata['entity_a'], cdata['entity_b'])
		if score <= 0:
			private_digraph.add_edge(cdata['entity_b'], cdata['entity_a'])
	private_undgraph = private_digraph.to_undirected(as_view=True)

	videos = {vid: TOURNESOL_API.getVData(vid, useCache=True, saveCache=False) for vid in private_undgraph.nodes}
	for vid,video in videos.items():
		if get(video, False, 'individual_rating', 'is_public') and (vid not in public_undgraph):
			public_undgraph.add_node(vid)

	# print('Videos', len(videos))
	# print('Comparisons', len(comparisons))
	# print('Public', public_undgraph)
	# print('Private', private_undgraph)
	# print('Directed', private_digraph)
	return public_undgraph, private_undgraph, private_digraph, videos

In [None]:
# Suggest comparisons to make
MAX_SUGGESTIONS = 999
ACCOUNT_FOR_PRIVATE_COMPARISONS = True

def suggest(private=False, maxSuggestions=999):
	public_undgraph, private_undgraph, private_digraph, videos = prepare_graphs()

	need_more_cmps:dict[str,int] = {}
	for vid in public_undgraph.nodes:
		cmps_to_do = cntr_to_target_comparisons(videos[vid]['collective_rating']['n_contributors']) - (private_undgraph if private else public_undgraph).degree[vid]
		if cmps_to_do > 0:
			need_more_cmps[vid] = cmps_to_do
	
	vids_to_compare:list[str] = sorted(filter(lambda v: need_more_cmps[v] > 1, need_more_cmps), key=need_more_cmps.get, reverse=True)

	suggested = 0
	while vids_to_compare and suggested < maxSuggestions:
		vid = vids_to_compare.pop(0)

		# Find the video in sorted_needed the furthest away from vid in private graph (if multiple matches, keep the first matched one)
		# and that is not yet reachable from or to vid
		local = set(nx.single_source_shortest_path(private_digraph, vid)).union(set(nx.single_target_shortest_path(private_digraph, vid)))
		distances = nx.single_source_shortest_path_length(private_undgraph, vid, cutoff=99)
		best_c = None
		for candidate in need_more_cmps:
			if candidate in local:
				continue
			if candidate not in distances:
				best_c = candidate
				break
			if (best_c is None) or (distances[best_c] < distances[candidate]):
				best_c = candidate

		if not best_c:
			continue

		# Print comparison
		suggested += 1
		print(f"{suggested:4d}: https://tournesol.app/comparison?uidA={vid}&uidB={best_c} (dist={distances.get(best_c,' ~')} - cmps remaining={need_more_cmps[vid]}/{need_more_cmps[best_c]})")
		# Update the graphs & data accounting for this comparison
		public_undgraph.add_edge(vid, best_c)
		private_digraph.add_edge(vid, best_c)
		private_digraph.add_edge(best_c, vid)

		need_more_cmps[vid] -= 1
		if need_more_cmps[vid] <= 0:
			need_more_cmps.pop(vid)
		elif need_more_cmps[vid] > 1:
			vids_to_compare.append(vid)

		need_more_cmps[best_c] -= 1
		if need_more_cmps[best_c] <= 0:
			need_more_cmps.pop(best_c)
			if best_c in vids_to_compare:
				vids_to_compare.remove(best_c)

suggest(ACCOUNT_FOR_PRIVATE_COMPARISONS, MAX_SUGGESTIONS)

In [None]:
# Display the list of all videos having missing comparisons remaining, ordered by individual score (best first)
def remain_ranking():
	public_undgraph,_,_,_ = prepare_graphs()

	need_more_cmps:dict[str,int] = {}
	for vid in public_undgraph.nodes:
		vdata = TOURNESOL_API.getVData(vid, useCache=True, saveCache=False)
		cmps_to_do = cntr_to_target_comparisons(vdata['collective_rating']['n_contributors']) - public_undgraph.degree[vid]
		if vdata['individual_rating']['is_public'] and cmps_to_do > 0:
			score = get_individual_score(vdata)
			if score:
				need_more_cmps[vid] = score
	
	vids_to_compare:list[str] = sorted(need_more_cmps, key=need_more_cmps.get, reverse=True)
	for rnk,vid in enumerate(vids_to_compare, 1):
		print(f"{rnk:4d}.", TOURNESOL_API.prettyPrintVData(vid))

	TOURNESOL_API.saveCache()

remain_ranking()