In [None]:
# Python dependencies -- REQUIREMENT: Python >=3.9, <3.12
%pip install --upgrade pip
%pip install --upgrade setuptools wheel networkx matplotlib scipy ipywidgets
%pip install "git+https://github.com/tournesol-app/tournesol.git@solidago-pipeline#egg=solidago&subdirectory=solidago"

# If anything was installed, restart the notebook kernel

In [57]:
# Imports
import math
import time

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from matplotlib.axes import Axes
from solidago.pipeline.inputs import TournesolInputFromPublicDataset

PUBLIC_DATASET = TournesolInputFromPublicDataset.download()

In [58]:
USER_TO_PLOT = 'NatNgs'

-----

# User comparisons Graph

In [None]:
# Load comparisons
user_comparisons = (PUBLIC_DATASET.comparisons[PUBLIC_DATASET.comparisons.public_username == USER_TO_PLOT]
                                  .loc[PUBLIC_DATASET.comparisons.criteria == 'largely_recommended']
                                  [['entity_a', 'entity_b']]
								  .merge(PUBLIC_DATASET.entity_id_to_video_id, left_on='entity_a', right_index=True)
								  .drop('entity_a', axis=1).rename(columns={'video_id': 'vid_a'})
								  .merge(PUBLIC_DATASET.entity_id_to_video_id, left_on='entity_b', right_index=True)
								  .drop('entity_b', axis=1).rename(columns={'video_id': 'vid_b'})
)
user_comparisons_list = zip(user_comparisons['vid_a'], user_comparisons['vid_b'])

graph = nx.Graph()
graph.add_edges_from(user_comparisons_list)
largest_group = max(nx.connected_components(graph), key=len)
graph.remove_nodes_from(n for n in list(graph.nodes) if not n in largest_group)
print('Loaded', graph)

In [61]:
# Prepare graph layout
pos = nx.spring_layout(graph, pos=nx.spiral_layout(graph, equidistant=True), iterations=10)

def improve_graph_pos(time_to_run:int, pos:dict[str,list[float]], callback=None, target_refresh_interval:int=None):
	start = time.time()

	target_total_it_count=math.ceil(time_to_run/target_refresh_interval) if target_refresh_interval > 0 else 10
	iterations_count=10
	total_iterations=0
	timer_a = time.time()
	loops_count = 0
	while timer_a - start < time_to_run:
		loops_count += 1
		# Move nodes towards eachother if connected, move them apart from eachother if not connected
		pos = nx.spring_layout(graph, pos=pos, iterations=iterations_count)
		total_iterations += iterations_count
		timer_b = time.time()
		speed = iterations_count / (timer_b-timer_a)
		expected_remaining_iterations = speed * (time_to_run - timer_b + start)
		if callback:
			callback(pos)
		print(f"Iterations: {total_iterations}/{total_iterations + expected_remaining_iterations:.0f} -- Time: {timer_b-start:.1f}/{time_to_run}s -- Speed: {speed:.1f}/s")
		next_iteration_count = int(math.ceil(expected_remaining_iterations / (target_total_it_count - loops_count if loops_count < target_total_it_count else 1)))
		if loops_count > target_total_it_count or next_iteration_count > iterations_count*2 and loops_count > 1:
			# Spring Layout may stop iterating if found an equilibrium. Try to detect this event and stop before max_duration
			break
		# Prepare next iteration
		iterations_count = next_iteration_count
		timer_a = timer_b

	return pos

In [None]:
# Display graph
def user_comparisons_graph(G: nx.Graph):
	edge_x = []
	edge_y = []
	for edge in G.edges():
		if edge[0] in pos and edge[1] in pos:
			x0, y0 = pos[edge[0]]
			x1, y1 = pos[edge[1]]
			edge_x.append(x0)
			edge_x.append(x1)
			edge_x.append(None)
			edge_y.append(y0)
			edge_y.append(y1)
			edge_y.append(None)

	scatter_edges = go.Scatter(
		x=edge_x, y=edge_y,
		line=dict(
			width=0.5,
			color='#888',
		),
		hoverinfo='none',
		mode='lines',
	)

	node_x = []
	node_y = []
	for node in G.nodes:
		x, y = pos[node]
		node_x.append(x)
		node_y.append(y)

	node_adjacencies = []
	node_text = []
	for node, adjacencies in graph.adjacency():
		node_adjacencies.append(len(adjacencies))
		node_text.append(f"{node}<br>{len(adjacencies)} public comparisons")

	scatter_nodes = go.Scatter(
		x=node_x, y=node_y,
		mode='markers',
		hoverinfo='text',
		marker=dict(
			colorscale='Portland',
			reversescale=True,
			color=node_adjacencies,
			size=[2+math.sqrt(adj)*2 for adj in node_adjacencies],
			line=dict(width=0),
		),
		text=node_text,
	)

	fig = go.FigureWidget(data=[scatter_edges, scatter_nodes],
		layout=go.Layout(
			showlegend=False,
			hovermode='closest',
			margin=dict(b=0,l=0,r=0,t=0),
			xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
			yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
			height=720,
		),
	)

	# Fix aspect ratio
	fig.update_yaxes(scaleanchor="x", scaleratio=1)

	return fig

fig = user_comparisons_graph(graph)
fig

In [None]:
# Improve graph above
TIME_TO_RUN = 60 # Seconds, the longer the prettier

def onupdate(pos):
	edge_x = []
	edge_y = []
	for edge in graph.edges():
		if edge[0] in pos and edge[1] in pos:
			x0, y0 = pos[edge[0]]
			x1, y1 = pos[edge[1]]
			edge_x.append(x0)
			edge_x.append(x1)
			edge_x.append(None)
			edge_y.append(y0)
			edge_y.append(y1)
			edge_y.append(None)

	node_x = []
	node_y = []
	for node in graph.nodes:
		x, y = pos[node]
		node_x.append(x)
		node_y.append(y)

	with fig.batch_update():
		fig.data[0]['x'] = edge_x
		fig.data[0]['y'] = edge_y
		fig.data[1]['x'] = node_x
		fig.data[1]['y'] = node_y

pos = improve_graph_pos(TIME_TO_RUN, pos, callback=onupdate, target_refresh_interval=1)

-----

# User criteria statistics

In [None]:
# User cursors position
def user_histogram(ax: Axes, username: str, CRITERION: str, title: bool=False):
	votes = PUBLIC_DATASET.comparisons[PUBLIC_DATASET.comparisons.public_username == username].loc[PUBLIC_DATASET.comparisons.criteria == CRITERION].score

	ax.set_axisbelow(True)

	ax.set_xlim(xmin=-10.5, xmax=10.5)
	ax.xaxis.set_ticks(range(-10,11,1 if title else 5))
	ax.tick_params(axis='x', length=0)

	counts, bins = np.histogram(votes, bins=21)
	bins = [i/10.0 for i in range(-105,106,10)]
	counts_highlights = [(b if i%5 == 0 else 0) for i,b in enumerate(counts, -10)]
	counts_others = [(b if i%5 != 0 else 0) for i,b in enumerate(counts, -10)]

	ax.hist(bins[:-1], bins, weights=counts_others, align='mid', color='#0088AA')
	ax.hist(bins[:-1], bins, weights=counts_highlights, align='mid', color='#0022FF')
	for i in bins: # Plot white lines to separate columns
		ax.axvline(i, color='white')

	# plt.title('How many comparisons every Tournesol users have done')
	if title:
		ax.set_xlabel(CRITERION + ' by "' + username + '"')
		ax.set_ylabel('Number of comparisons')
	else:
		ax.set_title(CRITERION)

	# Plot orange bell curve
	stdv = np.std(votes)*1.25
	reg_x = np.arange(-10.5, 10.5, 0.1)

	reg_y = np.exp(-np.square(reg_x/stdv)/2)/(stdv*np.sqrt(2*np.pi)) * len(votes)
	ax.plot(reg_x, reg_y, color='orange', alpha=.5)

	ax.set_ylim(bottom=0)


# Largely recommended
fig, ax = plt.subplots()
fig.set_size_inches(8, 6)
user_histogram(ax, USER_TO_PLOT, 'largely_recommended', True)

# Sub criteria
fig, ax = plt.subplots(3,3)
user_histogram(ax[0][0], USER_TO_PLOT, 'reliability')
user_histogram(ax[0][1], USER_TO_PLOT, 'pedagogy')
user_histogram(ax[0][2], USER_TO_PLOT, 'importance')
user_histogram(ax[1][0], USER_TO_PLOT, 'layman_friendly')
user_histogram(ax[1][1], USER_TO_PLOT, 'entertaining_relaxing')
user_histogram(ax[1][2], USER_TO_PLOT, 'engaging')
user_histogram(ax[2][0], USER_TO_PLOT, 'diversity_inclusion')
user_histogram(ax[2][1], USER_TO_PLOT, 'better_habits')
user_histogram(ax[2][2], USER_TO_PLOT, 'backfire_risk')
fig.tight_layout()
fig.set_size_inches(8, 6)