In [None]:
# Imports
import os
import sys
import math
import random
import networkx as nx
from datetime import datetime, timezone

# Ensure notebook is running from Tournesol-Stats dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	_pwd.pop() # Go up from src dir to Tournesol-Stats
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
sys.path.append('src/py')
from dao.tournesol_api import TournesolAPI, get, get_individual_score

## Parameters

JWT: Get it from tournesol.app

- open [website](https://tournesol.app), authenticate, then open dev tools, get any request to Tournesol api, see Request Headers, get `Authentication="Bearer ..."` value
- DO NOT SHARE THIS TOKEN TO ANYONE. NEVER. IN ANY CONDITIONS. It's sensitive like your password.
- This token expires after some time of inactivity (about a day or so). If tool fails, try to update the token first.

In [None]:
# PARAMETERS
TOURNESOL_API=TournesolAPI(input('JWT (example: "Bearer xxxxxxxxx")'))
TOURNESOL_API.loadCache(f"./data/Tournesol_API_cache-{TOURNESOL_API.username}.json.gz")

MAX_NUMBER_OF_SUGGESTIONS = 25 # will generate this amount of suggested comparisons

In [None]:
def cntr_to_target_comparisons(contributors):
	if contributors <= 1:
		return 1
	target = min(9,contributors)
	cc = contributors//10
	while cc > 0:
		target += min(9, cc)
		cc //= 10
	return target

def cmps_to_target_contributors(target: int) -> tuple[int, int]:
	if target < 1:
		return None
	return (
		(10**((target-1)//9)) * (((target-1)%9)+1),
		(10**((target-1)//9)) * (((target-1)%9)+2)-1
	)

#def _test(x: int):
#	min,max = cmps_to_target_contributors( x )
#	if not cntr_to_target_comparisons(min) == x: return "MIN"
#	if not cntr_to_target_comparisons(max) == x: return "MAX"
#	return " ok"
#
#for t in range(1,31):
#	print(f"{t:4d} => {_test(t)} {cmps_to_target_contributors(t)}")

In [None]:
# Load dataset
videos = {vdata['entity']['uid']:vdata for vdata in TOURNESOL_API.getMyComparedVideos(saveCache=False)}
comparisons = TOURNESOL_API.getAllMyComparisons(saveCache=False)

public_undgraph = nx.Graph()
private_digraph = nx.DiGraph()
for vid,vdata in videos.items():
	if not 'individual_rating' in vdata:
		vdata = TOURNESOL_API.getVData(vdata,saveCache=False)
	if get(vdata, False, 'individual_rating', 'is_public'):
		public_undgraph.add_node(vid)
	private_digraph.add_node(vid)

for cdata in comparisons:
	ea = cdata['entity_a']
	eb = cdata['entity_b']
	if cdata.get('is_public', False) and get(TOURNESOL_API.getVData(ea,useCache=True),False, 'individual_rating', 'is_public') and get(TOURNESOL_API.getVData(eb,useCache=True),False, 'individual_rating', 'is_public'):
		public_undgraph.add_edge(ea, eb)
	elif cdata.get('is_public', None) is None:
		cdata['is_public'] = False

	score = [dta['score'] for dta in cdata['criteria_scores'] if dta['criteria'] == 'largely_recommended'][0]
	if score >= 0:
		private_digraph.add_edge(ea, eb)
	if score <= 0:
		private_digraph.add_edge(eb, ea)
private_undgraph = private_digraph.to_undirected(as_view=True)

print('Public', public_undgraph)
print('Private', private_undgraph)
print('Directed', private_digraph)
TOURNESOL_API.saveCache()

In [None]:
# Prepare comparisons
MAX_DISTANCE_CUTOFF = int(math.sqrt(public_undgraph.number_of_nodes())+1)
def _pubpriv(pub, priv):
	if pub == priv:
		if pub >= MAX_DISTANCE_CUTOFF:
			return ' ∞    '
		return f"{pub:2d}    "

	if pub >= MAX_DISTANCE_CUTOFF:
		return f" ∞({priv:2d})"
	if priv < pub:
		return f"{pub:2d}({priv:2d})"
	return f"{pub:2d}({priv-pub:+1d})"

def make_suggestions():
	contribs = {vid:get(videos[vid], 0, 'collective_rating', 'n_contributors') for vid in public_undgraph.nodes}
	candidates = {vid:cntr_to_target_comparisons(contribs[vid]) for vid in public_undgraph.nodes if
		# x comparisons: will not be suggested for x² days after last comparison
		(datetime.now(timezone.utc) - datetime.fromisoformat(get(videos[vid], '2000-01-01T00:00:00Z', 'individual_rating', 'last_compared_at'))).days >= private_undgraph.degree[vid]**1.5
	}
	candidates = {vid:target for vid,target in candidates.items() if (
		# minimum 1 public comparison needed
		public_undgraph.degree[vid] == 0
		# number of contributors > public comparisons
		or target > private_undgraph.degree[vid]
	)}

	# Get all candidates scores
	indiv_score:dict[str,float] = {vid:get_individual_score(videos[vid]) or 0 for vid in candidates}

	sim_public_undgraph:nx.Graph = public_undgraph.copy()
	sim_private_digraph:nx.DiGraph = private_digraph.copy()
	sim_private_undgraph:nx.Graph = sim_private_digraph.to_undirected(as_view=True)

	while len(candidates) >= 2:
		distances:dict[str,dict[str,tuple[int,int]]] = {}
		dist_public:dict[str,int] = {}
		dist_private:dict[str,int] = {}
		ancs_descs:dict[str,tuple[set[str],set[str]]] = {}
		clist = sorted(candidates)
		for i,c in enumerate(clist):
			distances[c] = {}
			dist_public[c] = 0
			dist_private[c] = 0
			ancs_descs[c] = (nx.ancestors(sim_private_digraph, c), nx.descendants(sim_private_digraph, c))
			if i==0: continue

			private_dists_from_c = nx.single_source_shortest_path_length(sim_private_undgraph, source=c, cutoff=MAX_DISTANCE_CUTOFF)
			public_dists_from_c = nx.single_source_shortest_path_length(sim_public_undgraph, source=c, cutoff=MAX_DISTANCE_CUTOFF)
			for d in clist[:i]:
				prv_dist = private_dists_from_c.get(d, MAX_DISTANCE_CUTOFF)
				pub_dist = public_dists_from_c.get(d, MAX_DISTANCE_CUTOFF)
				dist_private[c] += prv_dist
				dist_private[d] += prv_dist
				dist_public[c] += pub_dist
				dist_public[d] += pub_dist
				distances[d][c] = distances[c][d] = (pub_dist, prv_dist)


		# Get as the first video, the one that:
		cmp1 = max(distances, key=lambda c1: (
			# has the largest difference of contributors - indiv_public_comparisons
			sim_public_undgraph.degree[c1]/candidates[c1],
			# has the more private and the least public comparisons
			sim_private_undgraph.degree[c1]-sim_public_undgraph.degree[c1],
			# has the smallest local connected network
			-min(len(ancs_descs[c1][0]), len(ancs_descs[c1][1])),
			# has the more distance to others in public comparisons
			dist_public[c1],
			# has the more distance to others in private comparisons
			dist_private[c1]
		))

		# Second candidate should not be connected to first candidate in directed graph
		candidates_2 = {vid for vid in candidates if vid != cmp1 and vid not in ancs_descs[cmp1][0] and vid not in ancs_descs[cmp1][1]}

		if candidates_2:
			sort_ancs_desc = lambda c2: 0
			if len(ancs_descs[cmp1][1]) == 0 and len(ancs_descs[cmp1][0]) > 0:
				sort_ancs_desc = lambda c2: -len(ancs_descs[c2][0])
			elif len(ancs_descs[cmp1][0]) == 0 and len(ancs_descs[cmp1][1]) > 0:
				sort_ancs_desc = lambda c2: -len(ancs_descs[c2][1])

			# Get as the second video, the one that:
			cmp2 = max(candidates_2, key=lambda c2: (
				# - if cmp1 is all positive, cmp2 to be the less negative as possible
				#   if cmp1 is all negative, cmp2 to be the less positive as possible
				#   if cmp1 is mixed positive/negative, do not sort by this criteria
				sort_ancs_desc(c2),
				# - has the more distance to cmp1 in private graph
				distances[cmp1][c2][1],
				# - has the more comparisons needed till target
				sim_public_undgraph.degree[c2] - candidates[c2],
				# - has the indiv_score the most similar to cmp1
				-abs(indiv_score[c2] - indiv_score[cmp1])
			))

			# Shuffle left&right
			(cmp1, cmp2) = random.choice([(cmp1, cmp2), (cmp2, cmp1)])
			pair_dist_pub = distances[cmp1].get(cmp2)[0]
			pair_dist_prv = distances[cmp1].get(cmp2)[1]
			yield cmp1, cmp2, pair_dist_pub, pair_dist_prv

			sim_private_digraph.add_edge(cmp1, cmp2)
			sim_private_digraph.add_edge(cmp2, cmp1)
			sim_public_undgraph.add_edge(cmp1, cmp2)
			candidates.pop(cmp2)
		candidates.pop(cmp1)

In [None]:
# Suggest comparisons
print(' num:                        Comparison URL                                    distance    indiv. public(private)/target')
for suggested,(cmp1,cmp2,dist_pub,dist_prv) in enumerate(make_suggestions(),1):
	print(f"""{suggested:4d}: https://tournesol.app/comparison?uidA={cmp1}&uidB={cmp2} \
dist:{_pubpriv(dist_pub,dist_prv)}  \
cmps:{_pubpriv(public_undgraph.degree[cmp1], private_undgraph.degree[cmp1])}/{cntr_to_target_comparisons(get(videos[cmp1], 0, 'collective_rating', 'n_contributors')):2d} \
& {_pubpriv(public_undgraph.degree[cmp2], private_undgraph.degree[cmp2])}/{cntr_to_target_comparisons(get(videos[cmp2], 0, 'collective_rating', 'n_contributors')):2d}""")
	if suggested >= MAX_NUMBER_OF_SUGGESTIONS:
		break