In [None]:
# Requirements Installations
%pip install requests networkx

In [None]:
# Imports
import os
import random
import networkx as nx

# Ensure notebook is running from src/ dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
from model.tournesol_api import TournesolAPI, get

# Parameters

JWT: Get it from tournesol.app

- open website, open dev tools, get any request to Tournesol api, see Request Headers, get `Authentication="Bearer ..."` value
- DO NOT SHARE THIS TOKEN TO ANYONE. NEVER. IN ANY CONDITIONS. Even support will never need it.
- This token expires after some time of inactivity. If tool fails, try to update the token first.

LNGS: Pick all the languages of the videos to keep

In [None]:
# PARAMETERS
TOURNESOL_API=TournesolAPI(input('JWT (example: "Bearer xxxxxxxxx")'))
TOURNESOL_API.loadCache(f"../data/Tournesol_API_cache-{TOURNESOL_API.username}.json.gz")

-----
# Main part

In [None]:
# Load dataset
comparisons = TOURNESOL_API.getAllMyComparisons(useCache=True)

public_graph = nx.Graph()
directed_graph = nx.DiGraph()
for cdata in comparisons:
	if cdata.get('is_public', False):
		public_graph.add_edge(cdata['entity_a'], cdata['entity_b'])

	score = [dta['score'] for dta in cdata['criteria_scores'] if dta['criteria'] == 'largely_recommended'][0]
	if score >= 0:
		directed_graph.add_edge(cdata['entity_a'], cdata['entity_b'])
	if score <= 0:
		directed_graph.add_edge(cdata['entity_b'], cdata['entity_a'])
private_graph = directed_graph.to_undirected(as_view=True)

videos = {vid: TOURNESOL_API.getVData(vid, useCache=True, saveCache=False) for vid in private_graph.nodes}

print('Videos', len(videos))
print('Comparisons', len(comparisons))
print('Public', public_graph)
print('Private', private_graph)
print('Directed', directed_graph)

In [None]:
# Suggest comparisons
candidates = [vid for vid in public_graph.nodes
	if get(videos[vid], 0, 'collective_rating', 'n_contributors') > private_graph.degree[vid]
	and public_graph.degree[vid] <= 3
]
print('Candidates', len(candidates))

MAXD_PRIVATE = private_graph.number_of_nodes()
MAXD_PUBLIC = public_graph.number_of_nodes()

simupublic = public_graph.copy()
simulgraph = directed_graph.copy()
undirected = simulgraph.to_undirected(as_view=True)
i=0
while len(candidates) >= 2:
	i+=1
	distances:dict[str,dict[str,int]] = {}
	tot_length:dict[str,int] = {}
	for c in candidates:
		private_dists_from_c = nx.single_source_shortest_path_length(undirected, source=c)
		public_dists_from_c = nx.single_source_shortest_path_length(simupublic, source=c)
		distances[c] = {}
		tot_length[c] = 0
		for d in candidates:
			if c == d: continue
			length = private_dists_from_c.get(d, MAXD_PRIVATE) + public_dists_from_c.get(d, MAXD_PUBLIC)
			distances[c][d] = length
			tot_length[c] += length

	cmp1 = min(tot_length, key=tot_length.get)
	cmp2 = max((vid for vid in distances[cmp1] if not nx.has_path(simulgraph, cmp1, vid) and not nx.has_path(simulgraph, vid, cmp1)), key=distances[cmp1].get)
	
	(cmp1, cmp2) = random.choice([(cmp1, cmp2), (cmp2, cmp1)])
	sp1 = '∞'
	sp2 = '∞'
	try: sp1 = nx.shortest_path_length(undirected, cmp1, cmp2)
	except: pass
	try: sp2 = nx.shortest_path_length(simupublic, cmp1, cmp2)
	except: pass
	print(f"{i:4d}: https://tournesol.app/comparison?uidA={cmp1}&uidB={cmp2} distance:{sp2} (private:{sp1})")
	candidates.remove(cmp1)
	candidates.remove(cmp2)
	simulgraph.add_edge(cmp1, cmp2)
	simulgraph.add_edge(cmp2, cmp1)
	simupublic.add_edge(cmp1, cmp2)
	if i >= 30:
		break