In [None]:
# Requirements Installations --- PYTHON 3.9.* REQUIRED
%pip install requests networkx
%pip install "git+https://github.com/tournesol-app/tournesol.git@solidago-pipeline#egg=solidago&subdirectory=solidago"

In [None]:
# Imports
import os
import sys
import time
import math
import random
import datetime
import itertools
import networkx as nx
import dateutil.parser as dateparse
from solidago.pipeline.inputs import TournesolInputFromPublicDataset

# Ensure notebook is running from Tournesol-Stats dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	_pwd.pop() # Go up from src dir to Tournesol-Stats
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
sys.path.append('src/py')
from model.tournesol_api import TournesolAPI, get, get_individual_score, VData
from utils.save import load_json_gz, save_json_gz

# Parameters

JWT: Get it from tournesol.app

- open website, open dev tools, get any request to Tournesol api, see Request Headers, get `Authentication="Bearer ..."` value
- DO NOT SHARE THIS TOKEN TO ANYONE. NEVER. IN ANY CONDITIONS. Even support will never need it.
- This token expires after some time of inactivity. If tool fails, try to update the token first.

LNGS: Pick all the languages of the videos to keep

In [None]:
# PARAMETERS
TOURNESOL_API=TournesolAPI(input('JWT (example: "Bearer xxxxxxxxx")'))
TOURNESOL_API.loadCache(f"./data/Tournesol_API_cache-{TOURNESOL_API.username}.json.gz")

LNGS=['fr', 'en'] # Consolidation phase will only suggest videos in these languages
DCC_CACHE='./data/DCC_cache.json.gz'


# Technical

Below is some technical stuff used for the suggestion mechanism.

Play all the next cells one by one, you do not need to change anything.

In [None]:
PUBLIC_DATASET = TournesolInputFromPublicDataset.download()
USER_DATASET:list[list[str]] = (
	PUBLIC_DATASET.comparisons[PUBLIC_DATASET.comparisons.public_username == TOURNESOL_API.username]
	              .loc[PUBLIC_DATASET.comparisons.criteria == 'largely_recommended']
	              [['entity_a', 'entity_b']]
	              .merge(PUBLIC_DATASET.entity_id_to_video_id, left_on='entity_a', right_index=True)
	              .drop('entity_a', axis=1).rename(columns={'video_id': 'entity_a'})
	              .merge(PUBLIC_DATASET.entity_id_to_video_id, left_on='entity_b', right_index=True)
	              .drop('entity_b', axis=1).rename(columns={'video_id': 'entity_b'})
).values
print(f"Loaded {len(USER_DATASET)} public comparisons")

In [None]:
# Functions utils
def rndAB():
	return ('A', 'B') if random.random() > 0.5 else ('B', 'A')

class IgnoreList:
	def __init__(self):
		self.vids:set[str] = set()
		self.channels:set[str] = set()
		self.cmp_age:dict[int,int] = dict()
	
	def isIgnored(self, vdata:VData):
		if vdata['entity']['uid'] in self.vids:
			return True
		if vdata['entity']['metadata']['uploader'] in self.channels:
			return True
		
		nowd = datetime.datetime.now().isoformat()
		vcmps = get(vdata, 0, 'individual_rating', 'n_comparisons')
		vdate = get(vdata, None, 'individual_rating', 'last_compared_at')
		if vcmps > 0 and vdate and self.cmp_age.get(vcmps,None):
			# Compute limit date
			ignore_date = (datetime.datetime.fromisoformat(vdate[:19]) + datetime.timedelta(days=self.cmp_age[vcmps])).isoformat()

			if ignore_date<nowd:
				return True
		return False

	def ignoreOlder(self, cmps_daysold:dict[int,int]):
		self.cmp_age = cmps_daysold
	def ignoreVideo(self, vid):
		self.vids.add(vid)
	def ignoreChannel(self, channel):
		self.channels.add(channel)

	def removeIgnored(self, vids:list[VData], ignored:list[VData]):
		i=0
		while i < len(vids):
			if self.isIgnored(vids[i]):
				ignored.append(vids[i])
				vids.pop(i)
			else:
				i+=1

IGNORE_LIST=IgnoreList()

In [None]:
# Import video lists

def crossmerge_lists(l1, l2): 
	minln = min(len(l1), len(l2))
	merged = [None]*(minln*2)
	merged[::2] = l2[:minln]
	merged[1::2] = l1[:minln]
	merged += l2[minln:] + l1[minln:]
	return merged

def get_lists() -> tuple[list[VData], list[VData], list[VData]]:
	# Get
	allRes:list[VData] = list(TOURNESOL_API.getMyComparedVideos())
	allRes.extend(TOURNESOL_API.getMyRateLater())

	# Exclusions
	excluded = []

	# Exclude videos when language not in LNGS & if has been compared within last 24h
	now = datetime.datetime.now()
	for v in list(allRes):
		if not(get(v, LNGS[0], 'entity', 'metadata', 'language') in LNGS
			and (now - dateparse.parse(get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'), ignoretz=True)).total_seconds() > (60*60*24)):
			excluded.append(v)
			allRes.remove(v)

	# Collect
	cmp_vids:list[dict[str,VData]] = [dict(), dict(), dict(), dict()] # min 4 to not explode
	for v in allRes:
		cmps = get(v, 0, 'individual_rating', 'n_comparisons')
		while cmps >= len(cmp_vids):
			cmp_vids.append(dict())
		cmp_vids[cmps][v['entity']['uid']] = v

	for i in range(len(cmp_vids)):
		print(f"\t- {i} cmps:", len(cmp_vids[i]))

	# rate_later
	v_0 = sorted(cmp_vids[0].values(), key=lambda v:(
		#	Well noted from few contributors => Go first
		get(v, 0, 'collective_rating', 'tournesol_score') / (get(v, None, 'collective_rating', 'n_contributors') if get(v, 0, 'collective_rating', 'n_contributors') > 1 else 2),
		#	When was added to rate later: recent first
		get(v, '9999-12-31T23:59:59', 'rate_later_metadata', 'created_at'),
		#	Publication date: recent first
		get(v, '2000-01-01T00:00:00', 'entity', 'metadata', 'publication_date'),
	), reverse=True)
	v_1 = sorted(cmp_vids[1].values(), key=lambda v:(
		#	Well noted from few contributors => Go first
		-get(v, 0, 'collective_rating', 'tournesol_score') / (get(v, None, 'collective_rating', 'n_contributors') if get(v, 0, 'collective_rating', 'n_contributors') > 1 else 2),
		#	Oldest last comparison first
		 get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'),
		#	Publication date: older first
		 get(v, '2000-01-01T00:00:00', 'entity', 'metadata', 'publication_date'),
	))
	# Put first from v_1 first, then alternate between v_0 & v_1 in order until one of them is exhausted, then push the rest
	rate_later = crossmerge_lists(v_1, v_0)

	# low_comparisons
	v_2 = sorted(cmp_vids[2].values(), key=lambda v:(
		#	oldest last comparison first
		get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'),
		#	Publication date: older first
		get(v, '2000-01-01T00:00:00', 'entity', 'metadata', 'publication_date'),
	))
	v_3 = sorted(cmp_vids[3].values(), key=lambda v:(
		#	oldest last comparison first
		get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'),
		#	Publication date: older first
		get(v, '2000-01-01T00:00:00', 'entity', 'metadata', 'publication_date'),
	))
	low_comparisons = crossmerge_lists(v_2, v_3)

	# well_compared
	v_4_p = itertools.chain.from_iterable(cmp_vids[i].values() for i in range(4, len(cmp_vids)))
	well_compared = sorted(v_4_p, key=lambda v:(
		#	oldest last comparison first
		get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'),
		#	Publication date: older first
		get(v, '2000-01-01T00:00:00', 'entity', 'metadata', 'publication_date'),
	))

	return (rate_later, low_comparisons, well_compared, excluded)

In [None]:
# Cache DistComparisonare_sharing_comparisonser
class DistComparisonChecker:
	def __init__(self, comparisons:list[list[str]], cache_path:str):
		self.graph = nx.Graph()
		self.pos = None
		
		for pair in comparisons:
			vid1 = 'yt:' + pair[0]
			vid2 = 'yt:' + pair[1]
			self.graph.add_edge(vid1, vid2)
		n1 = self.graph.number_of_nodes()
		e1 = self.graph.number_of_edges()
		print(f"Loaded {n1} nodes & {e1} comparisons to DCC from public user data")

		# Load cache
		self.cache_path = cache_path
		if self.cache_path:
			cachdata:dict[str,list[str]] = load_json_gz(self.cache_path)
			if 'pos' in cachdata:
				self.pos = cachdata['pos']
			if 'edges' in cachdata:
				for _n1,dests in cachdata['edges'].items():
					for _n2 in dests:
						self.graph.add_edge(_n1, _n2)
			if 'nodes' in cachdata:
				for n,dta in cachdata['nodes'].items():
					if n in self.graph and dta:
						for prop,val in dta.items():
							self.graph.nodes[n][prop] = val
		n2 = self.graph.number_of_nodes()
		e2 = self.graph.number_of_edges()
		print(f"     + {n2-n1} nodes & {e2-e1} comparisons        from cache")

		self.toupdate: set[str] = set(self.graph.nodes)
		print('Total loaded:', self.graph)
		self.save_cache()
		# Prepare visualisation
		self.pos=nx.circular_layout(self.graph)

	def save_cache(self):
		if not self.cache_path:
			return
		cachdata:dict[str,dict[list]] = {'edges':{}, 'nodes':{}}
		for (u,v) in self.graph.edges:
			cachdata['edges'].setdefault(u, list()).append(v)
		for n,dta in self.graph.nodes(data=True):
			if dta:
				cachdata['nodes'][n] = dta
		if self.pos:
			cachdata['pos'] = {p:[e for e in arr] for p, arr in self.pos.items()}

		save_json_gz(self.cache_path, cachdata)

	def _update_cache(self, vid:str, ispublic:bool=None):
		if vid in self.graph and vid not in self.toupdate:
			return # No update needed

		allRes = TOURNESOL_API.getMyComparisonsWith(vid)
		if allRes:
			# Clear all known information about the node before update
			if vid in self.graph:
				if ispublic is not None and 'public' in self.graph.nodes[vid]:
					ispublic = self.graph.nodes[vid]['public']
				self.graph.remove_node(vid)

			# Add new information
			for g in allRes:
				vid2 = g['entity_b' if g['entity_a'] == vid else 'entity_a']
				if not vid2 in self.graph:
					self.toupdate.add(vid2)
				self.graph.add_edge(vid, vid2)

			# Remove toupdate flag
			if vid in self.toupdate:
				self.toupdate.remove(vid)

			if vid in self.graph and ispublic is not None:
				self.graph.nodes[vid]['public'] = ispublic

	def update_cache_neighboors(self, vdata:VData, depth:int=0, recurse=True):
		vid = vdata['entity']['uid']
		self._update_cache(vid, ispublic=vdata['individual_rating']['is_public'])

		updtd = {vid}
		for d in range(1, depth+1):
			toupd = set()
			# Find neighboors
			for v in updtd:
				toupd.update([n for n in self.graph.neighbors(vid)])
			toupd.difference_update(updtd) # Remove already updated

			# Update them
			if recurse or d == depth:
				for vid in toupd:
					self._update_cache(vid, depth-d)
			updtd.update(toupd)

		if depth > 0:
			for v in [n for n in self.graph.neighbors(vdata['entity']['uid'])]:
				self._update_cache(v, 1)

	def are_sharing_comparisons(self, vdata1:VData, vdata2:VData, cached:bool = False) -> bool:
		vid1 = vdata1['entity']['uid']
		vid2 = vdata2['entity']['uid']
		if not cached:
			self._update_cache(vid1, ispublic=(get(vdata1, None, 'individual_rating', 'is_public')))
			self._update_cache(vid2, ispublic=(get(vdata2, None, 'individual_rating', 'is_public')))

		if vid1 not in self.graph or vid2 not in self.graph:
			return False
		c1 = {v for v in self.graph[vid1]}
		c2 = {v for v in self.graph[vid2]}
		return (vid1 in c2) or (vid2 in c1) or (not c1.isdisjoint(c2))

	def addAsCompared(self, vdata1:VData, vdata2:VData):
		vid1 = vdata1['entity']['uid']
		vid2 = vdata2['entity']['uid']
		
		if not self.graph.has_edge(vid1, vid2):
			self.graph.add_edge(vid1, vid2)
			self.toupdate.add(vid1)
			self.toupdate.add(vid2)

			if not vdata1.get('individual_rating', None):
				vdata1['individual_rating'] = dict()
			vdata1['individual_rating']['last_compared_at'] = datetime.datetime.now(datetime.timezone.utc).isoformat()
			vdata1['individual_rating']['n_comparisons'] = vdata1['individual_rating'].get('n_comparisons',0) + 1

			if not vdata2.get('individual_rating', None):
				vdata2['individual_rating'] = dict()
			vdata2['individual_rating']['last_compared_at'] = datetime.datetime.now(datetime.timezone.utc).isoformat()
			vdata2['individual_rating']['n_comparisons'] = vdata2['individual_rating'].get('n_comparisons',0) + 1

DCC = DistComparisonChecker(USER_DATASET, DCC_CACHE)

In [None]:
# Phase Init (Create first comparison from rate_later & low_comparisons)
def phase_init(rate_later:list[VData], low_comparisons:list[VData], DCC: DistComparisonChecker) -> tuple[VData, str]:
	"""
	Get the latest compared video in low_comparisons, and compare it to next rate_later video
	"""
	vid_new = None
	for i in range(len(rate_later)):
		if get(rate_later[i], True, 'individual_rating', 'is_public'):
			vid_new = rate_later.pop(i)
			break

	if not vid_new:
		vid_new = rate_later.pop(0)

	vid_old:VData = None
	while vid_old is None or DCC.are_sharing_comparisons(vid_new, vid_old):
		vid_old = max(
			[vdata for vdata in low_comparisons if not DCC.are_sharing_comparisons(vid_new, vdata, cached=True) and get(vdata, True, 'individual_rating', 'is_public')],
			key=lambda vdata:(
				-get(vdata, 0, 'individual_rating', 'n_comparisons'), # Fewer comparisons first
				get(vdata, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at') # Compared the more recently
				# Publication date older first (low_comparisons list order)
			)
		)
	low_comparisons.remove(vid_old)

	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[1] https://tournesol.app/comparison?uid{ab[0]}=' + vid_new['entity']['uid'] + f"&uid{ab[1]}=" + vid_old['entity']['uid'])


In [None]:
# Phase Expand (Create next comparison from rate_later)
def phase_expand(vid_old:VData, rate_later:list[VData], DCC: DistComparisonChecker) -> tuple[VData, str]:
	"""
	Take first 2 in RATE_LATER

	If ko, change the 2nd one with the next and retry. If no more next: END
	When ok, print comparison URL, pop the first one & push it to the end of COMPARED, then go to Phase 1
	"""
	i = 0
	while DCC.are_sharing_comparisons(vid_old, rate_later[i]):
		print('\t', vid_old, 'is sharing comparisons with', rate_later[i])
		i+=1
		if i > len(rate_later):
			# Pick any if all have shared comparisons
			i=0
			break

	vid_new = rate_later.pop(i)

	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[+] https://tournesol.app/comparison?uid{ab[0]}=' + vid_new['entity']['uid'] + f"&uid{ab[1]}=" + vid_old['entity']['uid'])


In [None]:
# Phase Intricate (Create next comparison from low_comparisons)
def phase_intricate(vid_new:VData, compared:list[VData], DCC: DistComparisonChecker) -> tuple[VData, str]:
	"""
	Take first from rate later
	Take first from compared

	Check for both: https://api.tournesol.app/users/me/comparisons/videos/yt:<vid>/
	There should be NO vid in common in both lists of entities.

	If ko, take next one from Compared and retry. If no more next: END
	When ok, print comparison URL and pop the one from COMPARED (if still less than 4 cmps, push it to the end of COMPARED), then go Phase 2
	"""
	i=0
	while DCC.are_sharing_comparisons(vid_new, compared[i]):
		i += 1
		if i > len(compared):
			# Pick any if all have shared comparisons
			i = 0
			break

	vid_old = compared.pop(i)
	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[x] https://tournesol.app/comparison?uid{ab[0]}=' + vid_old['entity']['uid'] + f"&uid{ab[1]}=" + vid_new['entity']['uid'])


In [None]:
# Phase Consolidate (Create comparison for 2 high_comparisons videos)
def phase_consolidate(high_comparisons:list[VData], DCC: DistComparisonChecker) -> str:
	"""
	From all compared videos, take only the ones that has:
		- been compared at least 3 time by me
		- been compared by at least 2 different contributors
		- not been compared in the last 6 days
	From these videos, get pairs such as videos in a pair has:
		- Same language
		- Exact same number of (individual) comparison made
		- No comparison in common (DCC)
	From all pairs, take the one having the smallest score, score being calculated by adding:
		+ Difference of individual score
		+ Difference of collective score
		+ Sqrt(Difference of duration)
		+ Sqrt(Difference of time between both video aired)
	
	Compare them (Do not remove them from the list)

	If none found, return None
	"""
	# Compute minmax & fast access to some data in vdata
	mins:dict[str,float] = dict()
	maxs:dict[str,float] = dict()
	vdata:list[dict[str,VData|float]] = []
	today = datetime.datetime.now()
	for v in high_comparisons:
		lng:str = get(v, None, 'entity', 'metadata', 'language')
		cmp:float = get(v, 0, 'individual_rating', 'n_comparisons')
		cnt:float = get(v, 0, 'collective_rating', 'n_contributors')
		indiv_score:float|None = get_individual_score(v)
		coll_score:float = get(v, 0, 'collective_rating', 'tournesol_score')
		duration:float = math.sqrt(get(v, 0, 'entity', 'metadata', 'duration'))
		aired:float = math.sqrt( (today - dateparse.parse(get(v, None, 'entity', 'metadata', 'publication_date'), ignoretz=True)).total_seconds() / (60*60*24) )
		last_cmp:float = (today - dateparse.parse(get(v, None, 'individual_rating', 'last_compared_at'), ignoretz=True)).total_seconds() / (60*60*24)
		
		if indiv_score is not None and cnt >= cmp and last_cmp > 6 and lng is not None:
			vdata.append({
				'#id': v['entity']['uid'],
				'lng': lng,
				'cmp': cmp,
				'ind': indiv_score,
				'col': coll_score,
				'dur': duration,
				'air': aired,
				'pub': get(v, True, 'individual_rating', 'is_public'),
				'full': v
			})
			for (key,val) in (('ind', indiv_score), ('col', coll_score), ('dur', duration), ('air', aired), ('cmp', cmp)):
				if not key in mins:
					mins[key] = val
					maxs[key] = val
				elif val < mins[key]:
					mins[key] = val
				elif val > maxs[key]:
					maxs[key] = val

	# Distance between nodes
	shortest_paths:dict[str,dict[str,int]] = dict(nx.all_pairs_shortest_path_length(DCC.graph))
	mins['pth'] = math.sqrt(4) # Min possible shorted path length for 2 nodes having no common comparison
	maxs['pth'] = math.sqrt(max(d for bd in shortest_paths.values() for d in bd.values())) + 1 # If nodes are not connected, distance is "longuest path" +1

	mins['pub'] = 0
	maxs['pub'] = 2 # 2 if both are public, 1 if none are public, 0 if one is public but not the other

	coefs = { # + => Higher value is better, - => Lower value is better
		'ind': -2.0, # Prefer when there is not a lot of difference in individual score
		'pth': +1.0, # Prefer longer comparison chain length
		'dur': -0.8, # Prefer when there is not a lot of difference in video duration
		'cmp': -0.5, # Prefer when there is not a lot of comparisons
		'pub': +0.5, # Prefer when both video are public
		'air': -0.2, # Prefer when there is not a long time between both video aired
	}

	# Find best pair
	bestpair:tuple[dict[str,VData|float],dict[str,VData|float]]|None = None
	while bestpair is None:
		bestfitness:float = -999999
		for i1 in range(1,len(vdata)):
			v1 = vdata[i1]

			for v2 in vdata[0:i1]:
				if (v1['lng'] != v2['lng']
					or v1['cmp'] != v2['cmp']
					or DCC.are_sharing_comparisons(v1['full'], v2['full'], cached=True)
				):
					continue

				# shortest path
				distance = maxs['pth']
				try:
					distance = math.sqrt(nx.shortest_path_length(DCC.graph, v1['#id'], v2['#id']))
				except:
					pass

				if distance <= mins['pth']:
					continue

				# Get pair score
				diffs:dict[str,float] = {
					'ind': abs(v1['ind']-v2['ind']),
					'dur': abs(v1['dur']-v2['dur']),
					'air': abs(v1['air']-v2['air']),
					'pth': abs(distance-mins['pth']),
					'cmp': v1['cmp']-mins['cmp'],
					'pub': 2 if v1['pub'] and v2['pub'] else 0 if v1['pub'] or v2['pub'] else 1,
				}
				vals = {k: diffs[k]/(maxs[k]-mins[k]) for k in coefs}
				pcts = {k: v if coefs[k] > 0 else (1-v) for k,v in vals.items()}
				fitness = sum( abs(coefs[k]) * pcts[k] for k in coefs )
				if fitness > bestfitness:
					toprint = (
						  f"ind:{pcts['ind']:.0%}({v1['ind']:.0f}/{v2['ind']:.0f}), "
						+ f"dur:{pcts['dur']:.0%}, "
						+ f"air:{pcts['air']:.0%}, "
						+ f"pth:{pcts['pth']:.0%}({int(distance**2) if distance < maxs['pth'] else '∞'}), "
						+ f"cmp:{pcts['cmp']:.0%}({v1['cmp']:.0f}), "
						+ f"pub:{pcts['pub']:.0%}({'Public' if v1['pub'] else 'Private'}/{'Public' if v2['pub'] else 'Private'})"
					)
					bestpair = (v1, v2, toprint, distance)
					bestfitness = fitness

		if bestpair is None:
			return None

		DCC.update_cache_neighboors(bestpair[0]['full'], 0)
		DCC.update_cache_neighboors(bestpair[1]['full'], 0)
		if DCC.are_sharing_comparisons(bestpair[0]['full'], bestpair[1]['full']):
			print('! Direct shared comparisons found')
			bestpair = None
			continue

		DCC.update_cache_neighboors(bestpair[0]['full'], 1, recurse=False)
		DCC.update_cache_neighboors(bestpair[1]['full'], 1, recurse=False)
		if DCC.are_sharing_comparisons(bestpair[0]['full'], bestpair[1]['full']):
			print('! Neighboors shared comparisons found')
			bestpair = None
			continue

		try:
			if nx.shortest_path_length(DCC.graph, bestpair[0]['#id'], bestpair[1]['#id']) < bestpair[3]:
				print('! Shorter path found')
				bestpair = None
				continue
		except:
			pass

	# print path between a and b
	try:
		print(' > '.join(nx.shortest_path(DCC.graph, bestpair[0]['#id'], bestpair[1]['#id'])))
	except:
		print('No path between', bestpair[0]['#id'], 'and', bestpair[1]['#id'])

	DCC.addAsCompared(bestpair[0]['full'], bestpair[1]['full'])
	maxfitness:float = sum(abs(c) for c in coefs.values())

	ab = rndAB()
	return (f'[*] https://tournesol.app/comparison?uid{ab[0]}=' + bestpair[0]['#id'] + f"&uid{ab[1]}=" + bestpair[1]['#id'] 
		 + f" [f={bestfitness/maxfitness:.2%}] ({bestpair[2]})"
	)


In [None]:
# Comparisons Generator
def getComparisons(rate_later: list[VData], low_comparisons: list[VData], high_comparisons:list[VData], DCC: DistComparisonChecker):
	# Phase 0
	vid = None
	while True:
		(vid, cmp) = phase_init(rate_later, low_comparisons, DCC)
		yield cmp
		if not IGNORE_LIST.isIgnored(vid):
			break
		print('##### Current vid is ignored #####')

	while True:
		consolidated = phase_consolidate(high_comparisons, DCC)
		if consolidated:
			yield consolidated

		if get(vid, 0, 'individual_rating', 'n_comparisons') >= 2:
			(newvid, cmp) = phase_expand(vid, rate_later, DCC)
		else:
			(newvid, cmp) = phase_intricate(vid, low_comparisons, DCC)

		if not newvid:
			break
		yield cmp
		if not IGNORE_LIST.isIgnored(newvid):
			vid = newvid
		else:
			print('##### Current vid is ignored #####')

	return 'NO MORE'


# Main part

There are 3 cells bellow:

- "INIT/RESET": To run once to initialize the tool
- "IGNORE": To add videos to not be compared
- "CONTINUE": To run as many time as you want, to get more comparisons links generated

If at anytime you do comparisons not suggested by the tool, plase run again INIT/RESET to synchronize the tool with your current tournesol account

In [None]:
# INIT/RESET ORDO (Replay this cell everytime any comparison other than suggested by this notebook has been made)
(RATE_LATER, LOW_CMPS, HIGH_CMPS, EXCLUDED) = get_lists()
comparison:str = getComparisons(RATE_LATER, LOW_CMPS, HIGH_CMPS, DCC)
comparisons_histo:list[str] = []
print('Initialized !\n')
print('[+]:', len(RATE_LATER))
print('[x]:', len(LOW_CMPS))
print('[*]:', len(HIGH_CMPS))
print('[-]:', len(EXCLUDED))

In [None]:
# IGNORE: Add here videos to ignore if a suggestion told to compare a video you don't want to compare for now.
# You wan add more videos between ORDO steps, then continue ordo without RESET

# IGNORE_LIST.ignoreChannel('channelName')
# IGNORE_LIST.ignoreVideo('yt:abcdefghijk')

# How many comparisons: In how much time will the video be ignored
IGNORE_LIST.ignoreOlder({4:367, 5:307, 6:247, 7:187, 8:127, 9:67, 10:14, **{n:7 for n in range(11,20)}})

IGNORE_LIST.removeIgnored(RATE_LATER, EXCLUDED)
IGNORE_LIST.removeIgnored(LOW_CMPS, EXCLUDED)
IGNORE_LIST.removeIgnored(HIGH_CMPS, EXCLUDED)

print('[+]:', len(RATE_LATER))
print('[x]:', len(LOW_CMPS))
print('[*]:', len(HIGH_CMPS))
print('[-]:', len(EXCLUDED))

In [None]:
# Prefetch (Improves comparisons recommendation speed, but may take a long time (1s per video to fetch))
def prefetch(max=0):
	res:list[VData] = TOURNESOL_API.getMyComparedVideos()
	vids_to_prefetch = [vdata
		for vdata in res
			if (vdata['entity']['uid'] not in DCC.graph or get(vdata, 0, 'individual_rating', 'n_comparisons') != DCC.graph.degree[vdata['entity']['uid']])
			and not IGNORE_LIST.isIgnored(vdata)
	]
	print(len(vids_to_prefetch), 'videos to Prefetch', '' if not max or max > len(vids_to_prefetch) else f"(Will prefetch first {max} only)")
	if vids_to_prefetch:
		if max > 0 and max < len(vids_to_prefetch):
			vids_to_prefetch = vids_to_prefetch[:max]
		for vdata in vids_to_prefetch:
			DCC.update_cache_neighboors(vdata, 0, False)
			DCC._update_cache(vdata['entity']['uid'], ispublic=vdata['individual_rating']['is_public'])
		print('Prefetch finished.')
		DCC.save_cache()

prefetch(100)

In [None]:
# CONTINUE ORDO (Replay this cell everytime to get next comparison)
# Print previous
if comparisons_histo:
	print('Previous:')
	for (i,cmp) in enumerate(comparisons_histo[-5:], start=max(1, len(comparisons_histo)-4)):
		print(f'{i:4d}.', cmp)

# Print new
print()
comparisons_histo.append(next(comparison))
print('\nNew comparison:')
print(f'{len(comparisons_histo):4d}.', comparisons_histo[-1])
DCC.save_cache()

In [None]:
# Draw user comparisons graph
import colorsys
import warnings
import matplotlib.pyplot as plt

def makegroupedgraph(graph:nx.Graph, nodes):
	sorted_graph = nx.Graph()
	sorted_graph.add_edges_from(graph.edges.data())
	sorted_graph.remove_nodes_from(n for n in list(graph.nodes) if not n in nodes)

	# Find fully connex subgraphs
	groups = sorted(nx.find_cliques(sorted_graph), key=len, reverse=True)
	for g in groups:
		if len(g) <= 2:
			continue
		skip = False
		for n in g:
			if n not in sorted_graph:
				skip = True
				break
		if skip:
			continue

		groupid = f"g:{min(g)[3:]}"
		sorted_graph.add_node(groupid, group=g)

		if len(g) >= 4:
			print(groupid, g)

		# Copy edges from inside to outside group to the group itself
		alledges:tuple[str,str,any] = list(sorted_graph.edges.data())
		for e in alledges:
			if e[0] in g and e[1] not in g:
				sorted_graph.add_edge(groupid, e[1], **e[2])
			elif e[1] in g and e[0] not in g:
				sorted_graph.add_edge(e[0], groupid, **e[2])

		# Remove group members from graph
		sorted_graph.remove_nodes_from(g)

	return sorted_graph

def draw_user_graph(prepduration:int):
	grouped_graph = makegroupedgraph(DCC.graph, max(nx.connected_components(DCC.graph), key=len))
	nodes = list(grouped_graph.nodes)

	# Prepare positions
	DCC.pos = graphUserComparisons(grouped_graph, prepduration, pos=DCC.pos)
	DCC.save_cache()

	# node color
	ggroups = grouped_graph.nodes.data('group', default=[1])
	colors_map = {n: len(grouped_graph[n])/len(ggroups[n]) for n in nodes}
	min_c = min(colors_map.values())
	mm_c = max(colors_map.values()) - min_c
	print('min & max colors:', min_c, min_c + mm_c)

	# Make colors from red(min) to green(max)
	# colors = [colorsys.hsv_to_rgb((colors_map[n]-min_c)/mm_c * (128/360), .9, .9) for n in nodes]
	colors = [colorsys.hsv_to_rgb(
				.625 if not n in DCC.graph else 
				0 if not 'public' in DCC.graph.nodes[n] or not DCC.graph.nodes[n]['public']
				else .25,
			.9 if not n in DCC.graph or 'public' in DCC.graph.nodes[n]
				else 0, 
			.1+ .8*(colors_map[n]-min_c)/mm_c) 
		for n in nodes
	]
	# colors = ['gray' if not 'public' in DCC.graph.nodes[n] else 'green' if DCC.graph.nodes[n]['public'] else 'red' for n in nodes]

	# Prepare image
	plt.box(False)
	plt.clf()
	plt.tight_layout()
	plt.rcParams['svg.fonttype'] = 'none'
	plt.rc('axes', unicode_minus=False)

	# Output svg dimensions
	size = (grouped_graph.number_of_nodes()+1)**0.3
	print(f"Image size: {size*1.4+1:.1f}x{size+1:.1f}")
	fig = plt.figure(figsize=(size*1.4+1, size+1), frameon=False)

	# Axis
	fig.clear()
	ax = fig.add_axes([0, 0, 1, 1])
	ax.axis('off')
	ax.set_facecolor('#FFF') # Background color

	nodes_width = {n:grouped_graph.degree[n] for n in nodes}
	min_w = min(nodes_width.values())
	mm_w = max(nodes_width.values()) - min_w
	min_display = 1
	mm_display = 25 - min_display
	nx.draw_networkx_nodes(grouped_graph,
		pos=DCC.pos,
		nodelist=nodes,
		#node_size=[min_display+mm_display*(nodes_width[n]-min_w)/mm_w for n in nodes],
		node_size=[len(ggroups[n])**2 for n in nodes],
		node_color=colors
	)

	nx.draw_networkx_edges(grouped_graph,
		pos=DCC.pos,
		edge_color='#8888',
		width=0.5,
	)

	# Print labels on public nodes having 0 or 1 comparison with other public nodes (graph[n]['public'] == True)
	nx.draw_networkx_labels(grouped_graph,
		pos=DCC.pos,
		labels={n1: n1[3:] for n1 in nodes 
			if n1[:3] == 'yt:' # Is not a group
			and ('public' in DCC.graph.nodes[n1] and DCC.graph.nodes[n1]['public']) # Node itself is public
			and len({n2 for n2 in DCC.graph[n1]
				if ('public' in DCC.graph.nodes[n2] and DCC.graph.nodes[n2]['public']) # Is public
			}) <= 1 # 0 or 1 public neighbors
		},
		font_size=8,
		horizontalalignment='right',
	)

	warnings.filterwarnings("ignore", category=UserWarning)

def graphUserComparisons(graph:nx.Graph, minduration:int, pos=None):
	## Preparing Graph Layout
	start = time.time()

	iterations_count=10
	total_iterations=0
	timer_a = time.time()
	loops_count = 0
	while timer_a - start < minduration:
		loops_count += 1
		# Move nodes towards eachother if connected, move them apart from eachother if not connected
		pos = nx.spring_layout(graph, pos=pos, weight='spring', iterations=iterations_count)
		total_iterations += iterations_count
		timer_b = time.time()
		speed = iterations_count / (timer_b-timer_a)
		expected_remaining_iterations = speed * (minduration - timer_b + start)
		print(f"Iterations: {total_iterations}/{total_iterations + expected_remaining_iterations:.0f} -- Progress: {timer_b-start:.1f}/{minduration}s -- Speed: {speed:.1f} it/s")
		next_iteration_count = int(math.ceil(expected_remaining_iterations / (10 - loops_count if loops_count < 10 else 1)))
		if loops_count > 10 or next_iteration_count > iterations_count*2 and loops_count > 1:
			# Spring Layout may stop iterating if found an equilibrium. Try to detect this event and stop before max_duration
			break
		# Prepare next iteration
		iterations_count = next_iteration_count
		timer_a = timer_b
	return pos

draw_user_graph(30)