In [None]:
%pip install plotly scipy ipywidgets

# Comparisons tool (v2)

This tool aims to give to a tournesol user a list of suggested comparisons.

- Comparisons between 2 videos marked as "Public" (will not suggest comparisons with "Private" videos)
- Compute the distance between compared videos, by the minimum number of comparisons to cross to get from one to the other
- Find good new comparisons to reduce these distances as much as possible

Requirements:
- Knowing how to get Authentication JWT from Tournesol website
- Having more than 20 compared videos (With few videos, this tool is not useful and may not suggest any comparisons)

Special rules:

- Will not suggest to add comparisons to videos where there are less contributors than your comparisons with it
	- (Prefer improving popular videos to improve global Tournesol connectivity as much as possible)

In [None]:
# Imports
import os
import math
import time
import random
import networkx as nx
import plotly.graph_objects as go

# Ensure notebook is running from src/ dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
from model.tournesol_api import TournesolAPI, get
from scripts.force_directed_graph import ForceLayout

## Parameters

JWT: Get it from tournesol.app

- open [website](https://tournesol.app), authenticate, then open dev tools, get any request to Tournesol api, see Request Headers, get `Authentication="Bearer ..."` value
- DO NOT SHARE THIS TOKEN TO ANYONE. NEVER. IN ANY CONDITIONS. It's sensitive like your password.
- This token expires after some time of inactivity (about a day or so). If tool fails, try to update the token first.

In [None]:
# PARAMETERS
TOURNESOL_API=TournesolAPI(input('JWT (example: "Bearer xxxxxxxxx")'))
TOURNESOL_API.loadCache(f"../data/Tournesol_API_cache-{TOURNESOL_API.username}.json.gz")

MAX_PUBLIC_COMPARISONS = 10 # will not suggest to compare videos having more than this
MAX_NUMBER_OF_SUGGESTIONS = 100 # will generate this amount of suggested comparisons at every launch

In [None]:
# Load dataset
TOURNESOL_API.getMyComparedVideos(useCache=False, saveCache=True) # Put video data in cache
comparisons = TOURNESOL_API.getAllMyComparisons()

public_graph = nx.Graph()
directed_graph = nx.DiGraph()
for cdata in comparisons:
	if cdata.get('is_public', False):
		public_graph.add_edge(cdata['entity_a'], cdata['entity_b'])

	score = [dta['score'] for dta in cdata['criteria_scores'] if dta['criteria'] == 'largely_recommended'][0]
	if score >= 0:
		directed_graph.add_edge(cdata['entity_a'], cdata['entity_b'])
	if score <= 0:
		directed_graph.add_edge(cdata['entity_b'], cdata['entity_a'])
private_graph = directed_graph.to_undirected(as_view=True)

videos = {vid: TOURNESOL_API.getVData(vid, useCache=True, saveCache=False) for vid in private_graph.nodes}
for vid,video in videos.items():
	if get(video, False, 'individual_rating', 'is_public') and (vid not in public_graph):
		public_graph.add_node(vid)

print('Videos', len(videos))
print('Comparisons', len(comparisons))
print('Public', public_graph)
print('Private', private_graph)
print('Directed', directed_graph)

In [None]:
# Suggest comparisons
unsorted_candidates = {vid:public_graph.degree[vid] for vid in public_graph.nodes
	if get(videos[vid], 0, 'collective_rating', 'n_contributors') > private_graph.degree[vid]
}
candidates_groups:list[list[str]] = []
for vid,deg in unsorted_candidates.items():
	while len(candidates_groups) <= deg:
		candidates_groups.append(list())
	candidates_groups[deg].append(vid)
	
candidates_groups = [l for l in candidates_groups if l]
print('Candidates:', sum(map(len,candidates_groups)), list(map(len,candidates_groups)))

MAXD_PRIVATE = private_graph.number_of_nodes()
MAXD_PUBLIC = public_graph.number_of_nodes()

simupublic = public_graph.copy()
simulgraph = directed_graph.copy()
undirected = simulgraph.to_undirected(as_view=True)

suggested = 0
while suggested < MAX_NUMBER_OF_SUGGESTIONS:
	while len(candidates_groups) > 1 and (len(candidates_groups[1]) < 4 or len(candidates_groups[0]) < len(candidates_groups[1])):
		candidates_groups[1].extend(candidates_groups[0])
		candidates_groups.pop(0)

	active_candidates = candidates_groups[0]

	distances:dict[str,dict[str,int]] = {}
	tot_length:dict[str,int] = {}
	for c in active_candidates:
		private_dists_from_c = nx.single_source_shortest_path_length(undirected, source=c)
		public_dists_from_c = nx.single_source_shortest_path_length(simupublic, source=c)
		distances[c] = {}
		tot_length[c] = 0
		for d in active_candidates:
			if c == d: continue
			length = private_dists_from_c.get(d, MAXD_PRIVATE) + public_dists_from_c.get(d, MAXD_PUBLIC)
			distances[c][d] = length
			tot_length[c] += length

	if not tot_length:
		break

	cmp1 = min(tot_length, key=tot_length.get)
	candidates_2 = list(vid for vid in distances[cmp1] if not nx.has_path(simulgraph, cmp1, vid) and not nx.has_path(simulgraph, vid, cmp1))
	if candidates_2:
		cmp2 = max(candidates_2, key=distances[cmp1].get)
		
		(cmp1, cmp2) = random.choice([(cmp1, cmp2), (cmp2, cmp1)])
		sp1 = '∞'
		sp2 = '∞'
		try: sp1 = nx.shortest_path_length(undirected, cmp1, cmp2)
		except: pass
		try: sp2 = nx.shortest_path_length(simupublic, cmp1, cmp2)
		except: pass
		def _pubpriv(pub, priv):
			if pub == priv:
				if pub == '∞':
					return ' ∞    '
				return f"{pub:2d}    "
		
			if pub == '∞':
				return f" {pub}({priv:2d})"
			
			return f"{pub:2d}({priv-pub:+1d})"

		suggested += 1
		print(f"{suggested:4d}: https://tournesol.app/comparison?uidA={cmp1}&uidB={cmp2} dist:{_pubpriv(sp2,sp1)} cmps:{_pubpriv(public_graph.degree[cmp1], private_graph.degree[cmp1])} & {_pubpriv(public_graph.degree[cmp2], private_graph.degree[cmp2])}")
		simulgraph.add_edge(cmp1, cmp2)
		simulgraph.add_edge(cmp2, cmp1)
		simupublic.add_edge(cmp1, cmp2)
		active_candidates.remove(cmp2)
	active_candidates.remove(cmp1)

-----

# Graph

Draw publicaly compared videos (as colored circles) and the comparisons between them.

Positions does not have meaning. Videos move around to try to untangle the graph as much as possible (compared videos nearer, and others further away).

Color Legend:
- Dark Blue: Central videos (with few jumps from comparison to comparison, rapidly access every other videos)
- Blue: Well connected videos
- Green~Yellow: Average
- Orange: Distant videos, more comparisons may be needed
- Red: Most distant videos (Most of the others videos have a high number of jumps from comparison to comparison to get to it)

Note: Distante calculation (& color) take into account private comparisons, even if private videos arn't displayed (a blue or green node may be rendered far away from the center for example if it is compared with 1 comparison to a private video connected to a central video)

In [None]:
# Display graph
def pos_to_graphlocs(public_graph:nx.Graph, pos:dict[str,list[int]]):
	nodes = {'x': [], 'y': [], 'l': []}
	for node in public_graph:
		if node in pos:
			x, y = pos[node]
			nodes['l'].append(node)
			nodes['x'].append(x)
			nodes['y'].append(y)

	edges = {'x': [], 'y': []}
	for edge in public_graph.edges():
		if not (edge[0] in pos and edge[1] in pos): continue
		x0, y0 = pos[edge[0]]
		x1, y1 = pos[edge[1]]
		
		if len(edges['x']) > 2 and edges['x'][-2] == x0 and edges['y'][-2] == y0:
			edges['x'].insert(-1, x1)
			edges['y'].insert(-1, y1)
		elif len(edges['x']) > 2 and edges['x'][-2] == x1 and edges['y'][-2] == y1:
			edges['x'].insert(-1, x0)
			edges['y'].insert(-1, y0)
		else:
			edges['x'].append(x0)
			edges['x'].append(x1)
			edges['y'].append(y0)
			edges['y'].append(y1)
			edges['x'].append(None)
			edges['y'].append(None)

	return {
		'nodes': nodes,
		'edges': edges
	}


MAX_CONNECTED_GRAPH = public_graph.subgraph(max(nx.connected_components(public_graph), key=len))
def init_comparisons_graph(public_graph:nx.Graph, private_graph:nx.Graph):
	pos = nx.spring_layout(MAX_CONNECTED_GRAPH, pos=nx.circular_layout(MAX_CONNECTED_GRAPH), iterations=10)
	loc = pos_to_graphlocs(public_graph, pos)

	scatters = []
	# Public edges
	scatters.append(go.Scatter(
		x=loc['edges']['x'], y=loc['edges']['y'],
		line=dict(
			width=0.3,
			color='#888',
		),
		hoverinfo='none',
		mode='lines',
	))

	# Nodes text & colors
	cnct:dict[str,float] = dict()
	mx = private_graph.number_of_nodes()
	for n1,tgt in nx.all_pairs_shortest_path_length(private_graph, cutoff=16):
		ttl = mx
		for n2,ln in tgt.items():
			if n2 == n1: continue
			ttl -= 1/ln
		cnct[n1] = (mx - ttl)/mx

	node_colors = []
	node_text = []
	for node in loc['nodes']['l']:
		deg_pub = public_graph.degree[node] if node in public_graph else 0
		deg_prv = private_graph.degree[node]
		node_text.append(
			f"{node}<br>"
			+ f"{deg_pub} public comparisons<br>"
			+ (f"{deg_prv - deg_pub} private comparisons<br>" if deg_prv > deg_pub else "<br>")
		)
		node_colors.append(cnct[node])

	# Public nodes
	scatters.append(go.Scatter(
		x=loc['nodes']['x'], y=loc['nodes']['y'],
		mode='markers',
		hoverinfo='text',
		marker=dict(
			colorscale='Portland',
			reversescale=True,
			color=node_colors,
			size=3,
			line=dict(width=0),
		),
		text=node_text,
	))

	fig = go.FigureWidget(data=scatters,
		layout=go.Layout(
			showlegend=False,
			hovermode='closest',
			margin=dict(b=0,l=0,r=0,t=0),
			xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
			yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
			height=720,
		),
	)

	# Fix aspect ratio
	fig.update_yaxes(scaleanchor="x", scaleratio=1)

	return fig,pos

fig,pos = init_comparisons_graph(public_graph, private_graph)
fig # Display graph (should be the last line of the notebook cell)

In [None]:
# Untangle graph above (may run multiple time if needed - Takes time)
TIME_TO_RUN = 120 # Seconds, the longer the prettier
STEP_TIME = 5 # Seconds

def improve_graph_pos(time_to_run:int, pos:dict[str,list[float]], callback=None, target_refresh_interval:int=None):
	start = time.time()

	target_total_it_count=math.ceil(time_to_run/target_refresh_interval) if target_refresh_interval > 0 else 10
	iterations_count=10
	total_iterations=0
	loops_count = 0
	#layout = ForceLayout(MAX_CONNECTED_GRAPH)
	#layout.update_graph(pos, edge_lengths=lambda e: 1)
	timer_a = time.time()
	while timer_a - start < time_to_run:
		loops_count += 1
		# Move nodes towards eachother if connected, move them apart from eachother if not connected
		pos = nx.spring_layout(MAX_CONNECTED_GRAPH, pos=pos, iterations=iterations_count, center=[0,0], weight=None)
		#layout.iterate2(repulsion_factor=0.01, repulse_upper_bound=3, inertia_factor=.9)
		total_iterations += iterations_count
		timer_b = time.time()
		speed = iterations_count / (timer_b-timer_a)
		expected_remaining_iterations = speed * (time_to_run - timer_b + start)
		if callback:
			#pos = layout.get_pos()
			callback(pos)
		print(f"Iterations: {total_iterations}/{total_iterations + expected_remaining_iterations:.0f} -- Time: {timer_b-start:.1f}/{time_to_run}s -- Speed: {speed:.1f}/s")
		next_iteration_count = int(math.ceil(expected_remaining_iterations / (target_total_it_count - loops_count if loops_count < target_total_it_count else 1)))
		if loops_count > target_total_it_count or next_iteration_count > iterations_count*2 and loops_count > 1:
			# Spring Layout may stop iterating if found an equilibrium. Try to detect this event and stop before max_duration
			break
		# Prepare next iteration
		iterations_count = next_iteration_count
		timer_a = timer_b

	return pos
	#return layout.get_pos()

def onupdate(pos):
	loc = pos_to_graphlocs(public_graph, pos)
	with fig.batch_update():
		#Public Mixed Private Nodes
		fig.data[0]['x'] = loc['edges']['x']
		fig.data[0]['y'] = loc['edges']['y']
		fig.data[1]['x'] = loc['nodes']['x']
		fig.data[1]['y'] = loc['nodes']['y']

pos = improve_graph_pos(TIME_TO_RUN, pos, callback=onupdate, target_refresh_interval=STEP_TIME)

In [None]:
# 3D Graph Display

def pos_to_graphlocs_3d(public_graph:nx.Graph, pos3d:dict[str,list[int]]):
	nodes = {'x': [], 'y': [], 'z': [], 'l': []}
	for node in public_graph:
		if node in pos3d:
			x, y, z = pos3d[node]
			nodes['l'].append(node)
			nodes['x'].append(x)
			nodes['y'].append(y)
			nodes['z'].append(z)

	edges = {'x': [], 'y': [], 'z': []}
	for edge in public_graph.edges():
		if not (edge[0] in pos3d and edge[1] in pos3d): continue
		x0, y0, z0 = pos3d[edge[0]]
		x1, y1, z1 = pos3d[edge[1]]
		
		if len(edges['x']) > 2 and edges['x'][-2] == x0 and edges['y'][-2] == y0 and edges['z'][-2] == z0:
			edges['x'].insert(-1, x1)
			edges['y'].insert(-1, y1)
			edges['z'].insert(-1, z1)
		elif len(edges['x']) > 2 and edges['x'][-2] == x1 and edges['y'][-2] == y1 and edges['z'][-2] == z0:
			edges['x'].insert(-1, x0)
			edges['y'].insert(-1, y0)
			edges['z'].insert(-1, z0)
		else:
			edges['x'].append(x0)
			edges['y'].append(y0)
			edges['z'].append(z0)
			edges['x'].append(x1)
			edges['y'].append(y1)
			edges['z'].append(z1)
			edges['x'].append(None)
			edges['y'].append(None)
			edges['z'].append(None)

	return {
		'nodes': nodes,
		'edges': edges
	}

def init_comparisons_graph(public_graph:nx.Graph, private_graph:nx.Graph):
	pos3d = nx.spring_layout(MAX_CONNECTED_GRAPH, dim=3, iterations=10)
	loc3d = pos_to_graphlocs_3d(public_graph, pos3d)

	scatters = []
	# Public edges
	scatters.append(go.Scatter3d(
		x=loc3d['edges']['x'], y=loc3d['edges']['y'], z=loc3d['edges']['z'],
		line=dict(
			width=0.3,
			color='#888',
		),
		hoverinfo='none',
		mode='lines',
	))

	# Nodes text & colors
	cnct:dict[str,float] = dict()
	mx = private_graph.number_of_nodes()
	for n1,tgt in nx.all_pairs_shortest_path_length(private_graph, cutoff=16):
		ttl = mx
		for n2,ln in tgt.items():
			if n2 == n1: continue
			ttl -= 1/ln
		cnct[n1] = (mx - ttl)/mx

	node_colors = []
	node_text = []
	for node in loc3d['nodes']['l']:
		deg_pub = public_graph.degree[node] if node in public_graph else 0
		deg_prv = private_graph.degree[node]
		node_text.append(
			f"{node}<br>"
			+ f"{deg_pub} public comparisons<br>"
			+ (f"{deg_prv - deg_pub} private comparisons<br>" if deg_prv > deg_pub else "<br>")
		)
		node_colors.append(cnct[node])

	# Public nodes
	scatters.append(go.Scatter3d(
		x=loc3d['nodes']['x'], y=loc3d['nodes']['y'], z=loc3d['nodes']['z'],
		mode='markers',
		hoverinfo='text',
		marker=dict(
			colorscale='Portland',
			reversescale=True,
			color=node_colors,
			size=3,
			line=dict(width=0),
		),
		text=node_text,
	))

	fig = go.FigureWidget(data=scatters,
		layout=go.Layout(
			showlegend=False,
			hovermode='closest',
			margin=dict(b=0,l=0,r=0,t=0),
			height=720,
		),
	)

	fig.update_layout(scene=dict(
		xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, visible=False),
		yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, visible=False),
		zaxis=dict(showgrid=False, zeroline=False, showticklabels=False, visible=False),
	))

	# Fix aspect ratio
	fig.update_yaxes(scaleanchor="x", scaleratio=1)

	return fig,pos3d

fig,pos3d = init_comparisons_graph(public_graph, private_graph)
fig # Display graph (should be the last line of the notebook cell)

In [None]:
# Untangle 3D Graph above (may run multiple time if needed - Takes time)
TIME_TO_RUN = 120 # Seconds, the longer the prettier
STEP_TIME = 5 # Seconds

def improve_graph_pos3d(time_to_run:int, pos3d:dict[str,list[float]], callback=None, target_refresh_interval:int=None):
	start = time.time()

	target_total_it_count=math.ceil(time_to_run/target_refresh_interval) if target_refresh_interval > 0 else 10
	iterations_count=10
	total_iterations=0
	loops_count = 0
	timer_a = time.time()
	while timer_a - start < time_to_run:
		loops_count += 1
		# Move nodes towards eachother if connected, move them apart from eachother if not connected
		pos3d = nx.spring_layout(MAX_CONNECTED_GRAPH, dim=3, pos=pos3d, iterations=iterations_count, center=[0,0,0], weight=None)
		total_iterations += iterations_count
		timer_b = time.time()
		speed = iterations_count / (timer_b-timer_a)
		expected_remaining_iterations = speed * (time_to_run - timer_b + start)
		if callback:
			callback(pos3d)
		print(f"Iterations: {total_iterations}/{total_iterations + expected_remaining_iterations:.0f} -- Time: {timer_b-start:.1f}/{time_to_run}s -- Speed: {speed:.1f}/s")
		next_iteration_count = int(math.ceil(expected_remaining_iterations / (target_total_it_count - loops_count if loops_count < target_total_it_count else 1)))
		if loops_count > target_total_it_count or next_iteration_count > iterations_count*2 and loops_count > 1:
			# Spring Layout may stop iterating if found an equilibrium. Try to detect this event and stop before max_duration
			break
		# Prepare next iteration
		iterations_count = next_iteration_count
		timer_a = timer_b

	return pos3d

def onupdate3d(pos3d):
	loc = pos_to_graphlocs_3d(public_graph, pos3d)
	with fig.batch_update():
		fig.data[0]['x'] = loc['edges']['x']
		fig.data[0]['y'] = loc['edges']['y']
		fig.data[0]['z'] = loc['edges']['z']
		fig.data[1]['x'] = loc['nodes']['x']
		fig.data[1]['y'] = loc['nodes']['y']
		fig.data[1]['z'] = loc['nodes']['z']

pos3d = improve_graph_pos3d(TIME_TO_RUN, pos3d, callback=onupdate3d, target_refresh_interval=STEP_TIME)