In [37]:
# Imports
import time
import math
import requests
import random
import dateutil.parser as dateparse
import datetime

# Parameters

JWT: Get it from tournesol.app

- open website, open dev tools, get any request to Tournesol api, see Request Headers, get `Authentication="Bearer ..."` value
- DO NOT SHARE THIS TOKEN TO ANYONE. NEVER. IN ANY CONDITIONS. Even support will never need it.
- This token expires after some time of inactivity. If tool fails, try to update the token first.

LNGS: Pick all the languages of the videos to keep

In [38]:
# PARAMETERS
JWT="Bearer pYKJFucZ1sIhJO9ByS7BaL2nAcScXb"
LNGS=['fr', 'en'] # Consolidation phase will only suggest videos in these languages


# Technical

Below is some technical stuff used for the suggestion mechanism.

Play all the next cells one by one, you do not need to change anything.

In [65]:
# Functions utils
LAST_TNSL_CALL=datetime.datetime.utcnow()
def callTournesol(path: str):
	global LAST_TNSL_CALL
	BASE_URL='https://api.tournesol.app/'
	wait=1-(datetime.datetime.utcnow()-LAST_TNSL_CALL).total_seconds()
	if wait > 0:
		time.sleep(wait)
	response = requests.get(BASE_URL + path, headers={
		'Authorization': JWT
	})
	LAST_TNSL_CALL = datetime.datetime.utcnow()
	return response.json()

def callTournesolMulti(path: str, args:str=None, start:int=0, end:int=0):
	LIMIT=1000
	URL=f'{path}?limit={LIMIT}' + (('&' + args) if args else '')
	offset=start
	rs = callTournesol(URL + f'&offset={offset}')
	if not 'count' in rs or not 'results' in rs:
		print('##### ERROR #####')
		print(URL)
		print(rs)
		exit(-1)
		return []

	total=rs['count']
	allRes = rs['results']
	print(f'{len(allRes)}/{total}', end=' ')

	while len(allRes) < total and (end <= 0 or offset < end):
		offset += LIMIT
		rs = callTournesol(URL + f'&offset={offset}')
		total=rs['count']
		allRes += rs['results']
		print(f'{len(allRes)}', end=' ')
	print()

	return allRes

def get(json, default, *fields):
	for f in fields:
		if f in json:
			json = json[f]
			if not json:
				return default
		else:
			return default
	return json

def rndAB():
	return ('A', 'B') if random.random() > 0.5 else ('B', 'A')


def get_individual_score(vdata):
	arr = [s for s in get(vdata, [], 'individual_rating', 'criteria_scores') if s['criteria'] == 'largely_recommended']
	return arr[0]['score'] if arr else None


In [40]:
# Get Already Compared list (COMPARED)
def get_already_compared():
	# First call
	print('Extracting compared videos...', end=' ')
	allRes = callTournesolMulti('users/me/contributor_ratings/videos', 'order_by=last_compared_at')

	# Exclude videos compared by less than 2 contributors & language not in LNGS & compared within last 24h
	today = datetime.datetime.utcnow()
	allRes = [v for v in allRes if
		    get(v, 0, 'individual_rating', 'n_comparisons') >= 3
		and get(v, LNGS[0], 'entity', 'metadata', 'language') in LNGS 
		and (today - dateparse.parse(get(v, '2000-01-01T00:00:00', 'individual_rating', 'last_compared_at'), ignoretz=True)).days > 1
	]

	# Exclude videos compared 3 time or less by me
	# Exclude videos compared by less than 3 contributors or more than 10
	lowcomps = [v for v in allRes if
		    get(v, 0, 'collective_rating', 'n_contributors') < 10
		and get(v, 0, 'collective_rating', 'n_contributors') >= 2
		and get(v, 0, 'individual_rating', 'n_comparisons') < 4
	]

	highcomps = [v for v in allRes if
		    v['individual_rating']['n_comparisons'] >= 4
	]

	return (highcomps, lowcomps)

In [41]:
# Get Rate Later list (RATE_LATER)
def get_rate_later(exclude):
	# First call
	print('Extracting rate_later list...', end=' ')
	allRes = callTournesolMulti('users/me/rate_later/videos')

	exclude_ids = {get(v, '?', 'entity', 'uid') for v in exclude}
	allRes = [v for v in allRes if get(v, '?', 'entity', 'uid') not in exclude_ids]

	def magicscore(v):
		score = get(v, 0, 'collective_rating', 'tournesol_score')
		contrib = get(v, 0, 'collective_rating', 'n_contributors')
		publi = get(v, 'unknown', 'entity', 'metadata', 'publication_date')
		return (0 if contrib <= 0 else score/2 if contrib == 1 else score / contrib, publi)

	allRes.sort(key=magicscore)
	return allRes


In [42]:
# Cache DistComparisonare_sharing_comparisonser
class DistComparisonChecker:
	def __init__(self):
		self.cache: dict[str,set[str]] = dict()
		self.toupdate: set[str] = set()

	def _get_data_cached(self, vid):
		if vid in self.toupdate or vid not in self.cache:
			print(f'Obtaining comparisons with {vid}...', end=' ')
			allRes = callTournesolMulti(f"users/me/comparisons/videos/{vid}/")
			self.cache[vid] = set()
			for g in allRes:
				vid2 = g['entity_b' if g['entity_a']['uid'] == vid else 'entity_a']['uid']
				self.cache[vid].add(vid2)
				if not vid2 in self.cache:
					self.cache[vid2] = set()
					self.toupdate.add(vid2)
				self.cache[vid2].add(vid)
			if vid in self.toupdate:
				self.toupdate.remove(vid)
		return self.cache[vid]
	
	def are_sharing_comparisons(self, vdata1, vdata2):
		vid1 = vdata1['entity']['uid']
		vid2 = vdata2['entity']['uid']
		c1 = self._get_data_cached(vid1)
		c2 = self._get_data_cached(vid2)
		return (vid1 in c2) or (vid2 in c1) or (not c1.isdisjoint(c2))
	
	def have_cached_comparisons_shared(self, vdata1, vdata2):
		vid1 = vdata1['entity']['uid']
		vid2 = vdata2['entity']['uid']
		c1:set[str] = self.cache.get(vid1, set())
		c2:set[str] = self.cache.get(vid2, set())
		return (vid1 in c2) or (vid2 in c1) or (not c1.isdisjoint(c2))

	def addAsCompared(self, vdata1, vdata2):
		vid1 = vdata1['entity']['uid']
		vid2 = vdata2['entity']['uid']
		c1 = self._get_data_cached(vid1)
		c2 = self._get_data_cached(vid2)
		c1.add(vid2)
		c2.add(vid1)

		if not vdata1.get('individual_rating', None):
			vdata1['individual_rating'] = dict()
		vdata1['individual_rating']['last_compared_at'] = datetime.datetime.utcnow().isoformat()
		vdata1['individual_rating']['n_comparisons'] = vdata1['individual_rating'].get('n_comparisons',0) + 1
		self.toupdate.add(vid1)

		if not vdata2.get('individual_rating', None):
			vdata2['individual_rating'] = dict()
		vdata2['individual_rating']['last_compared_at'] = datetime.datetime.utcnow().isoformat()
		vdata2['individual_rating']['n_comparisons'] = vdata2['individual_rating'].get('n_comparisons',0) + 1
		self.toupdate.add(vid2)

	def clear(self):
		self.cache.clear()

In [43]:
# Phase Consolidate
def phase_consolidate(all_compared:list, DCC: DistComparisonChecker, ignore:set[str]):
	"""
	From all compared videos, take only the ones that has:
		- been compared at least 3 time by me
		- been compared by at least 2 different contributors
		- not been compared in the last 6 days
	From these videos, get pairs such as videos in a pair has:
		- Same language
		- Exact same number of (individual) comparison made
		- No comparison in common (DCC)
	From all pairs, take the one having the smallest score, score being calculated by adding:
		+ Difference of individual score
		+ Difference of collective score
		+ Sqrt(Difference of duration)
		+ Sqrt(Difference of time between both video aired)
	
	Compare them (Do not remove them from the list)

	If none found, return None
	"""
	# Compute minmax & fast access to some data in vdata
	mins = dict()
	maxs = dict()
	vdata = []
	today = datetime.datetime.utcnow()
	for v in all_compared:
		if v['entity']['uid'] in ignore:
			continue
		lng:str = get(v, None, 'entity', 'metadata', 'language')
		cmp:int = get(v, 0, 'individual_rating', 'n_comparisons')
		cnt:int = get(v, 0, 'collective_rating', 'n_contributors')
		indiv_score:float = get_individual_score(v)
		coll_score:float = get(v, 0, 'collective_rating', 'tournesol_score')
		duration:int = math.sqrt(get(v, 0, 'entity', 'metadata', 'duration'))
		aired = math.sqrt( (today - dateparse.parse(get(v, None, 'entity', 'metadata', 'publication_date'), ignoretz=True)).days )
		last_cmp = (today - dateparse.parse(get(v, None, 'individual_rating', 'last_compared_at'), ignoretz=True)).days
		
		if indiv_score is not None and cnt >= 2 and cmp >= 3 and last_cmp > 6 and lng in LNGS:
			vdata.append({
				'lng': lng,
				'cmp': cmp,
				'ind': indiv_score,
				'col': coll_score,
				'dur': duration,
				'air': aired,
				'full': v
			})
			for (key,val) in (('ind', indiv_score), ('col', coll_score), ('dur', duration), ('air', aired)):
				if not key in mins:
					mins[key] = val
					maxs[key] = val
				elif val < mins[key]:
					mins[key] = val
				elif val > maxs[key]:
					maxs[key] = val

	# Find best pair
	bestpair:tuple[any,any] = None
	while not bestpair:
		bestfitness:float = 999999

		for i1 in range(1,len(vdata)):
			v1 = vdata[i1]

			for v2 in vdata[0:i1]:
				if (v1['lng'] != v2['lng']
					or v1['cmp'] != v2['cmp']
					or DCC.have_cached_comparisons_shared(v1['full'], v2['full'])
				):
					continue

				# Get pair score
				fitness = (
					( (v1['ind'] - v2['ind'])/(maxs['ind']-mins['ind']) )**2 * 2
					+ ( (v1['col'] - v2['col'])/(maxs['col']-mins['col']) )**2 * 1
					+ ( (v1['dur'] - v2['dur'])/(maxs['dur']-mins['dur']) )**2 * 1
					+ ( (v1['air'] - v2['air'])/(maxs['air']-mins['air']) )**2 * 0.5
					+ v1['cmp']/100
				)

				if fitness < bestfitness:
					bestfitness = fitness
					bestpair = (v1['full'], v2['full'])

		if not bestpair:
			return None

		if DCC.are_sharing_comparisons(bestpair[0], bestpair[1]):
			bestpair = None

	DCC.addAsCompared(bestpair[0], bestpair[1])
	ab = rndAB()
	return f'[*] https://tournesol.app/comparison?uid{ab[0]}=' + bestpair[0]['entity']['uid'] + f"&uid{ab[1]}=" + bestpair[1]['entity']['uid']


In [44]:
# Comparisons Generator

def phase_init(compared:list, all_compared:list, rate_later:list, DCC: DistComparisonChecker, ignore: set[str]):
	"""
	Print half comparison URL with first video from RATE_LATER (user will pick the last compared one from last session and compare it with this one)
	Then go to phase 1
	"""
	vid_new = rate_later.pop(0)
	while vid_new['entity']['uid'] in ignore:
		vid_new = rate_later.pop(0)

	vid_old = max([v for v in all_compared if v['entity']['uid'] not in ignore], key=lambda vdata:(
		# Priority to videos compared less than 3 time (under 2 comparisons => 1 || over 2 comparisons => -cmps)
		1 if get(vdata, 0, 'individual_rating', 'n_comparisons') <= 2 else -get(vdata, 0, 'individual_rating', 'n_comparisons'), 
		# Compared the more recently
		get(vdata, '0000-00-00 00:00', 'individual_rating', 'last_compared_at')
	))

	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[+] https://tournesol.app/comparison?uid{ab[0]}=' + vid_new['entity']['uid'] + f"&uid{ab[1]}=" + vid_old['entity']['uid'])


def phase_intricate(vid_new, compared:list, DCC: DistComparisonChecker, ignore: set[str]):
	"""
	Take first from rate later
	Take first from compared

	Check for both: https://api.tournesol.app/users/me/comparisons/videos/yt:<vid>/
	There should be NO vid in common in both lists of entities.

	If ko, take next one from Compared and retry. If no more next: END
	When ok, print comparison URL and pop the one from COMPARED (if still less than 4 cmps, push it to the end of COMPARED), then go Phase 2
	"""
	ko=True
	i=-1
	while ko:
		i += 1
		if len(compared) <= i:
			return None
		ko = (compared[i]['entity']['uid'] in ignore) or DCC.are_sharing_comparisons(vid_new, compared[i])

	vid_old = compared.pop(i)
	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[x] https://tournesol.app/comparison?uid{ab[0]}=' + vid_old['entity']['uid'] + f"&uid{ab[1]}=" + vid_new['entity']['uid'])


def phase_expand(vid_old, all_compared:list, compared:list, rate_later:list, DCC: DistComparisonChecker, ignore: set[str]):
	"""
	Take first 2 in RATE_LATER

	If ko, change the 2nd one with the next and retry. If no more next: END
	When ok, print comparison URL, pop the first one & push it to the end of COMPARED, then go to Phase 1
	"""
	vid_new = rate_later.pop(0)
	while vid_new['entity']['uid'] in ignore:
		vid_new = rate_later.pop(0)

	DCC.addAsCompared(vid_new, vid_old)
	ab = rndAB()
	return (vid_new, f'[+] https://tournesol.app/comparison?uid{ab[0]}=' + vid_new['entity']['uid'] + f"&uid{ab[1]}=" + vid_old['entity']['uid'])


def getComparisons(all_compared:list, low_compared: list, rate_later: list, DCC: DistComparisonChecker, ignore: set[str]):
	# Copy input lists
	low_compared = list(low_compared)
	rate_later = list(rate_later)

	# Phase 0
	while True:
		(vid, cmp) = phase_init(all_compared, low_compared, rate_later, DCC, ignore)
		yield cmp
		if vid['entity']['uid'] not in ignore:
			break

	while True:
		consolidated = phase_consolidate(all_compared, DCC, ignore)
		if consolidated:
			yield consolidated

		if get(vid, 0, 'individual_rating', 'n_comparisons') >= 2:
			(newvid, cmp) = phase_expand(vid, all_compared, low_compared, rate_later, DCC, ignore)
		else:
			(newvid, cmp) = phase_intricate(vid, low_compared, DCC, ignore)

		if not newvid:
			break
		yield cmp
		if newvid['entity']['uid'] not in ignore:
			vid = newvid

	yield 'NO MORE'

# Main part

There are 3 cells bellow:

- "INIT/RESET": To run once to initialize the tool
- "IGNORE": To add videos to not be compared
- "CONTINUE": To run as many time as you want, to get more comparisons links generated

If at anytime you do comparisons not suggested by the tool, plase run again INIT/RESET to synchronize the tool with your current tournesol account

In [58]:
# INIT/RESET ORDO (Replay this cell everytime any comparison other than suggested by this notebook has been made)
(ALL_COMPARED, LOW_CMPS) = get_already_compared()
RATE_LATER = get_rate_later(ALL_COMPARED)
DCC = DistComparisonChecker()
ignore:set[str] = set()
comparison = getComparisons(ALL_COMPARED, LOW_CMPS, RATE_LATER, DCC, ignore)
comparisons = []
print('Initialized !\n')
print('[+]:', len(RATE_LATER))
print('[x]:', len(LOW_CMPS))
print('[*]:', len(ALL_COMPARED))

Extracting compared videos... 849/849 
Extracting rate_later list... 1000/1310 1310 
Initialized !

[+]: 1308
[x]: 74
[*]: 435


In [46]:
# IGNORE: Add here videos to ignore if a suggestion told to compare a video you don't want to compare for now.
# You wan add more videos between ORDO steps, then continue ordo without RESET

# ignore.add('yt:abcdefghijk')
print('Ignored videos:', len(ignore))

Ignored videos: 0


In [57]:
# CONTINUE ORDO (Replay this cell everytime to get next comparison)
# Print previous
if comparisons:
	print('Previous:')
	for (i,cmp) in enumerate(comparisons[-5:], start=max(1, len(comparisons)-4)):
		print(f'{i:4d}.', cmp)

# Print new
print()
comparisons.append(next(comparison))
print('\nNew comparison:')
print(f'{len(comparisons):4d}.', comparisons[-1])

Previous:
   6. [*] https://tournesol.app/comparison?uidB=yt:aZK3aFuhVdg&uidA=yt:15lziZ2ch6M
   7. [+] https://tournesol.app/comparison?uidA=yt:aOYbR-Q_4Hs&uidB=yt:anSjZS63T7s
   8. [*] https://tournesol.app/comparison?uidA=yt:6YzrVUVO9M0&uidB=yt:cFslUSyfZPc
   9. [x] https://tournesol.app/comparison?uidB=yt:bIAF7kBbGKk&uidA=yt:aOYbR-Q_4Hs
  10. [*] https://tournesol.app/comparison?uidB=yt:EU3Ysuqv4sI&uidA=yt:hJe5MDMWOaU

Obtaining comparisons with yt:B1HCOiq5izk... 1/1 
Obtaining comparisons with yt:aOYbR-Q_4Hs... 2/2 

New comparison:
  11. [+] https://tournesol.app/comparison?uidA=yt:B1HCOiq5izk&uidB=yt:aOYbR-Q_4Hs


In [70]:
# Playground
def somestats(user):
	print(f"Extracting {user} recommendations...", end=' ')
	allRes:list = callTournesolMulti(f"users/{user}/recommendations/videos", 'exclude_compared_entities=false&unsafe=false')
	allRes = [vdata for vdata in allRes if get(vdata, 0, 'collective_rating', 'n_contributors') == 2 and get(vdata, 0, 'collective_rating', 'tournesol_score') > 0]
	allRes.sort(key=lambda vdata: -get(vdata, 0, 'collective_rating', 'tournesol_score'))
	for vdata in allRes:
		print(f"{get(vdata, 0, 'collective_rating', 'tournesol_score'):3.0f}🌻 [{vdata['entity']['uid']}] {get(vdata, '???', 'entity', 'metadata', 'uploader')}: {get(vdata, '???', 'entity', 'metadata', 'name')}")


def public2contrib():
	print('Extracting recommendations...', end=' ')
	allRes:list = callTournesolMulti('polls/videos/recommendations/', 'exclude_compared_entities=false&unsafe=true', start=2000, end=19999)
	# Filter
	allRes = [vdata for vdata in allRes if 1 <= get(vdata, 0, 'collective_rating', 'n_contributors') <= 2
		and get(vdata, 0, 'collective_rating', 'tournesol_score') / vdata['collective_rating']['n_contributors'] >= 5
		and vdata['collective_rating']['n_comparisons'] / vdata['collective_rating']['n_contributors'] > 2
	]
	# Order
	allRes.sort(key=lambda vdata: (
		round(vdata['collective_rating']['tournesol_score']),
		vdata['collective_rating']['n_comparisons'],
		vdata['collective_rating']['n_contributors'],
		get(vdata, '0000-00-00', 'entity', 'metadata', 'publication_date')
	), reverse=True)

	authors:dict[str,list] = dict()
	for vdata in allRes:
		a = get(vdata, '???', 'entity', 'metadata', 'uploader')
		authors.setdefault(a, list()).append(vdata)
	print()

	print(f'''# Videos under 2 contributors

## Rules

{len(allRes)} videos by {len(authors)} different channels found matching these criteria:

- Total number of contributors = 1 or 2
- Global score: minimum 5🌻 for a single contributor, 10🌻 if 2 contributors
- Number of comparisons: minimum 3 for a single contributor, 5 if 2 contributors

## The list
''')

	# for a in sorted(authors, key=lambda a: (len(authors[a]), get(authors[a][0], 0, 'collective_rating', 'tournesol_score')), reverse=True):
	# 	if len(authors[a]) > 1:
	# 		print(a, ':', len(authors[a]))
	# singles = {a for a in authors if len(authors[a]) == 1}
	# print('## Others creators with only 1 vid :', len(singles))
	# print()
	def escapeMd(s:str):
		return s.replace('\\', '\\\\').replace('[', '\\[').replace(']', '\\]').replace('_', '\\_').replace('*', '\\*')
	
	for vdata in allRes:
		print(f"- {vdata['collective_rating']['tournesol_score']:2.0f}🌻 ({vdata['collective_rating']['n_comparisons']}c/{vdata['collective_rating']['n_contributors']}u) {get(vdata, '???', 'entity', 'metadata', 'uploader')}: [{escapeMd(get(vdata, '???', 'entity', 'metadata', 'name'))}](https://tournesol.app/entities/{vdata['entity']['uid']})")

# somestats('NatNgs')
public2contrib()

Extracting recommendations... 1000/34524 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 

# Videos under 2 contributors

9581 videos by 2541 different channels found matching these criteria:

- Total number of contributors = 1 or 2
- Average user score >= 5🌻
- Average number of comparisons per user > 2

-----

- 20🌻 (33c/2u) Livres et Science: [ON A DÉCOUVERT 200 MOLÉCULES EXTRATERRESTRES !](https://tournesol.app/entities/yt:rofeHjDNE20)
- 20🌻 (17c/2u) BLAST, Le souffle de l'info: [ÉVASION FISCALE : LE FILM QUI EXPLIQUE TOUT](https://tournesol.app/entities/yt:c9eH-cEzM3I)
- 20🌻 (17c/2u) Antoine Goya: [Mes films préférés sortis en 2018](https://tournesol.app/entities/yt:33iifA563hQ)
- 20🌻 (14c/2u) Mathador: [MAJORANA :  LE GENIE DISPARU DE LA PHYSIQUE](https://tournesol.app/entities/yt:OMAz8DhLPmA)
- 20🌻 (14c/2u) Canard Réfractaire: [Notre PLAN pour CONQUÉRIR INTERNET ! 🤯](https://tournesol.app/entities/yt:vMpsnYKqpZY)
- 20🌻 (14c/2u) 