In [None]:
import os
import sys
import datetime
from IPython.display import display_markdown

# Ensure notebook is running from Tournesol-Stats dir
_pwd = os.path.realpath('.').split(os.sep)
if 'src' in _pwd:
	while _pwd[-1] != 'src':
		_pwd.pop()
	_pwd.pop() # Go up from src dir to Tournesol-Stats
	os.chdir(os.sep.join(_pwd))
print(os.path.realpath('.'))

# Local project requirements
sys.path.append('src/py')
from dao.tournesol_api import TournesolAPI, get

In [None]:
TOURNESOL = TournesolAPI()

In [None]:
# All tournesol videos having 1 or 2 contributors and positive score

def sanitizeMd(s:str):
	MD_SPECIAL_CHARS = '\`*_{}[]()#+-.!'
	for char in MD_SPECIAL_CHARS:
		s = s.replace(char, '\\'+char)
	return s

def _getContribs(lang:str, contributors:tuple[int,int], minscore:int):
	print('Extracting...')
	allRes:list = TOURNESOL.callTournesolMulti(
		'polls/videos/recommendations/', 
		f'unsafe=true&metadata%5Blanguage%5D={lang}', 
		fn_continue=lambda res: res[-1]['collective_rating']['tournesol_score'] >= minscore)
	allRes = [vdata for vdata in allRes if
		    get(vdata, 0, 'collective_rating', 'n_contributors') >= contributors[0]
		and get(vdata, 0, 'collective_rating', 'n_contributors') <= contributors[-1]
		and get(vdata, 0, 'collective_rating', 'tournesol_score') >= minscore
	]
	allRes.sort(key=lambda vdata: (
		round(get(vdata, 0, 'collective_rating', 'tournesol_score')),
		get(vdata, 0, 'collective_rating', 'n_comparisons')
	), reverse=True)
	authors:dict[str,list] = dict()
	for vdata in allRes:
		a = get(vdata, '???', 'entity', 'metadata', 'uploader')
		authors.setdefault(a, list()).append(vdata)

	allData = []
	for vdata in allRes:
		tscore=round(get(vdata, 0, 'collective_rating', 'tournesol_score'))
		tcmps=get(vdata, 0, 'collective_rating', 'n_comparisons')
		tctrs=get(vdata, 0, 'collective_rating', 'n_contributors')
		tchannel=sanitizeMd(get(vdata, '???', 'entity', 'metadata', 'uploader'))
		tname=sanitizeMd(get(vdata, '???', 'entity', 'metadata', 'name'))
		tpubli=get(vdata, '???', 'entity', 'metadata', 'publication_date')
		tdur=get(vdata, 0, 'entity', 'metadata', 'duration')
		tminutes=f"{int(tdur/60):2d}:{tdur%60:02d}" if tdur < 3600 else f"{int(tdur/3600)}:{int((tdur%3600)/60):02d}:{tdur%60:02d}"
		turl=f"https://tournesol.app/entities/{vdata['entity']['uid']}"
		allData.append((tscore, tctrs, tcmps, tpubli, tminutes, tchannel, tname, turl))

	return sorted(allData, reverse=True)

def getContribByYear(lang:str, contributors:tuple[int,int], minscore:int):
	allData = _getContribs(lang=lang, contributors=contributors, minscore=minscore)

	authors:set[str] = set()
	yearmap:dict[str,list[tuple]] = {}
	for vdata in allData:
		authors.add(vdata[5])
		yearmap.setdefault(vdata[3][0:4], []).append(vdata)
	
	return (len(allData), len(authors), yearmap)

def getContribByAuthor(lang:str, contributors:tuple[int,int], minscore:int):
	allData = _getContribs(lang=lang, contributors=contributors, minscore=minscore)

	authors:dict[str,list[tuple]] = {}
	for vdata in allData:
		authors.setdefault(vdata[5], []).append(vdata)
	
	return (len(allData), len(authors), authors)



In [None]:
# 2 Contrib EN
dt = datetime.datetime.now()
(cnt_vids,cnt_authors,allRes) = getContribByAuthor('en', contributors=(2,2), minscore=15)

markdown = []
markdown.append('# Tournesol missing contributors list\n')
markdown.append('Here are extracted all the videos in English that may go publically recommended with only one more contributor on [Tournesol](https://tournesol.app).\n')
markdown.append(f"Extraction date: {str(dt)[:16]}\n")
markdown.append(f"Found {cnt_vids} videos by {cnt_authors} channels\n")

# Sort from most to least compared channel
for y in sorted(allRes, key=lambda y:sum(v[0] for v in allRes[y]), reverse=True):
	if len(allRes[y]) > 1:
		markdown.append(f'\n## {y}')
		for tdata in sorted(allRes[y], reverse=True):
			(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
			markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} cmps / {tctrs} cntrs) \\[{tminutes}\\] [{tname}]({turl})")
markdown.append('\n## _Others_')
others = [allRes[y][0] for y in allRes if len(allRes[y]) == 1]
for tdata in sorted(others, reverse=True):
	(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
	markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} cmps / {tctrs} cntrs) \\[{tminutes}\\] {tchannel}: [{tname}]({turl})")

display_markdown('\n'.join(markdown), raw=True)

In [None]:
# 1 Contrib EN
dt = datetime.datetime.now()
(cnt_vids,cnt_authors,allRes) = getContribByAuthor('en', contributors=(1,1), minscore=5)

markdown = []
markdown.append('# Not enough comparisons on Tournesol\n')
markdown.append('Here are extracted all the videos in English that need multiple more contributors to be recommended on [Tournesol](https://tournesol.app).\n')
markdown.append(f"Extraction date: {str(dt)[:16]}\n")
markdown.append(f"Found {cnt_vids} videos by {cnt_authors} channels\n")

for y in sorted(allRes, key=lambda x:len(allRes[x]), reverse=True):
	markdown.append(f'\n## Channel {y}')
	for tdata in sorted(allRes[y], reverse=True):
		(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
		markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} cmps / {tctrs} user{'s' if tctrs > 1 else ''}) \\[{tminutes}\\] {tchannel}: [{tname}]({turl})")

display_markdown('\n'.join(markdown), raw=True)

In [None]:
# 2 Contrib FR
dt = datetime.datetime.now()
(cnt_vids,cnt_authors,allRes) = getContribByAuthor('fr', contributors=(2,2), minscore=15)

markdown = []
markdown.append('# Liste de contributeurs manquants Tournesol\n')
markdown.append("Ci-dessous sont extraites l'ensemble des vidéos Francophones qui pourraient être publiquement recommandées avec un seul nouveau contributeur sur [Tournesol](https://tournesol.app).\n")
markdown.append(f"Date de l'extraction: {str(dt)[:16]}\n")
markdown.append(f"{cnt_vids} vidéos trouvées par {cnt_authors} chaines différentes")

# Sort from most to least compared channel
for y in sorted(allRes, key=lambda y:sum(v[0] for v in allRes[y]), reverse=True):
	if len(allRes[y]) > 1:
		markdown.append(f'\n## {y}')
		for tdata in sorted(allRes[y], reverse=True):
			(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
			markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} cmps / {tctrs} cntrs) \\[{tminutes}\\] [{tname}]({turl})")
markdown.append('\n## _Autres_')
others = [allRes[y][0] for y in allRes if len(allRes[y]) == 1]
for tdata in sorted(others, reverse=True):
	(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
	markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} cmps / {tctrs} cntrs) \\[{tminutes}\\] {tchannel}: [{tname}]({turl})")

print('\n'.join(markdown))

In [None]:
# 1 Contrib FR
dt = datetime.datetime.now()
(cnt_vids,cnt_authors,allRes) = getContribByAuthor('fr', contributors=(1,1), minscore=5)

markdown = []
markdown.append('# Vidéos faiblement comparées\n')
markdown.append("Ci-dessous sont extraites l'ensemble des vidéos Francophones auquelles il manque plusieurs contributeurs pour pouvoir être recommandées sur [Tournesol](https://tournesol.app).\n")
markdown.append(f"Date de l'extraction: {str(dt)[:16]}\n")
markdown.append(f"{cnt_vids} vidéos trouvées par {cnt_authors} chaines différentes")

for a in sorted(allRes, key=lambda x:sum(x[0] for x in allRes[x]), reverse=True):
	markdown.append(f'\n## {a}')
	for tdata in sorted(allRes[a], reverse=True):
		(tscore, tctrs, tcmps, tdate, tminutes, tchannel, tname, turl) = tdata
		markdown.append(f"- {tscore:3.0f}🌻 ({tcmps} comparaison{'s' if tcmps > 1 else ''} / {tctrs} contributeur{'s' if tctrs > 1 else ''}) \\[{tminutes}\\] {tchannel}: [{tname}]({turl})")

display_markdown('\n'.join(markdown), raw=True)