# Install requirements

`pip install -r requirements.txt`

# Data set up 

## Load existing dataset

In [1]:
import pandas as pd
df = pd.read_csv('tournesol_scores_above_20_2024-03-15.csv')

## Create dataset

Download videos scores

In [2]:
import io
import zipfile

import requests
import pandas as pd

#Download tournesol scores
response = requests.get("https://api.tournesol.app/exports/all")
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
collective_scores = pd.read_csv(zip_file.open("collective_criteria_scores.csv"))

#Requests youtube metadata : publication date, title and channel
from dataset import build_dataset
df = build_dataset(collective_scores) #only keeps videos with a tournesol score above 20

#save dataframe
import datetime
df.to_csv('tournesol_scores_above_20_'+datetime.date.today().isoformat()+'.csv')

# Tests set up

In [47]:
#Test parameters
bundle_size = 4
recent_videos_proportion = 0.75

recent_videos_to_sample = int(bundle_size * recent_videos_proportion)
old_videos_to_sample = int(bundle_size * (1 - recent_videos_proportion))

recent_videos_max_age_in_days = 21

#Computes the age of each video
import datetime

from utils import get_age_in_days, construct_L_Ensemble

today = datetime.datetime.today()
df['age_in_days'] = df.apply(lambda x: get_age_in_days(x, today), axis="columns")

#Construct L-Ensemble
power = 2.8 #Model parameters
discount = 7.3
caracteristic_time = 31

dpp = construct_L_Ensemble(df, power, discount, caracteristic_time)

#load preferences results
try:
    preferences_results_series = pd.read_csv('preferences_results.csv')
except FileNotFoundError:
    preferences_results_series = preferences_results_series = pd.Series(data={'dpp':0, 'uniform':0}) 

L = Phi.T Phi was computed: Phi (dxN) with d>=N


# Sample and display bundles

In [26]:
pd.set_option('display.max_colwidth', 999) #ensures the display of whole video titles

#Uniform sampling
recent_videos_sample = df.loc[df['age_in_days']<=21].sample(n=recent_videos_to_sample, replace=False)
old_videos_sample = df.loc[df['age_in_days']<=21].sample(n=old_videos_to_sample, replace=False)

uniform_sample = pd.concat([recent_videos_sample, old_videos_sample])

#DPP sampling
dpp_sample = df.iloc[dpp.sample_exact_k_dpp(size=bundle_size)]



In [82]:
import random

import requests
import ipywidgets as widgets

from IPython.display import display
from utils import make_box_for_grid

def increment_preferences_results(button, preferences_results_series, bundle_type, df, dpp):
    preferences_results_series[bundle_type] += 1
    construct_bundles_widget(df, dpp, preferences_results_series)
    

def download_thumbnails(id_series, path):
    for video_id in id_series:
        thumbnail_url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
        response = requests.get(thumbnail_url)
        open(path + video_id + '.jpg', 'wb').write(response.content)

def bundle_hbox(sample_df, bundle_type):
    boxes = []
    for video_id in sample_df['video']:
        video_title = sample_df.loc[sample_df['video']==video_id, 'title'].to_string(index=False)
        video_channel = sample_df.loc[sample_df['video']==video_id, 'channel'].to_string(index=False)
        
        file = open('thumbnails/'+video_id+'.jpg', 'rb')
        image = widgets.Image(value=file.read())
        image.layout.object_fit = 'contain'  
        
        boxes.append(make_box_for_grid(image, video_title, video_channel))
                             
    button = widgets.Button(description="Preferred bundle")
    button.on_click(lambda button: increment_preferences_results(button, preferences_results_series, bundle_type, df, dpp))
    boxes.append(button)

    hbox_layout = widgets.Layout()
    hbox_layout.width = '100%'
    hbox_layout.justify_content = 'space-around'
    
    hb = widgets.HBox()
    hb.layout = hbox_layout
    hb.children = boxes
    return hb
    
def construct_bundles_widget(df, dpp, preferences_results_series):
    #Uniform sampling
    recent_videos_sample = df.loc[df['age_in_days']<=21].sample(n=recent_videos_to_sample, replace=False)
    old_videos_sample = df.loc[df['age_in_days']<=21].sample(n=old_videos_to_sample, replace=False)

    uniform_sample = pd.concat([recent_videos_sample, old_videos_sample])

    #DPP sampling
    dpp_sample = df.iloc[dpp.sample_exact_k_dpp(size=bundle_size)]

    #Download thumbnails in the thumbnails directory
    download_thumbnails(uniform_sample['video'], 'thumbnails/')
    download_thumbnails(dpp_sample['video'], 'thumbnails/')

    #Widget layout
    uniform_hb = bundle_hbox(uniform_sample, 'uniform')
    dpp_hb = bundle_hbox(dpp_sample, 'dpp')
        
    # Randomly compose into a vertical box 
    vb = widgets.VBox()
    vb.layout.align_items = "center"
    if bool(random.getrandbits(1)):
        vb.children = [dpp_hb, uniform_hb]
    else:
        vb.children = [uniform_hb, dpp_hb]
    display(vb)



In [83]:
#Display bundles

construct_bundles_widget(df, dpp, preferences_results_series)


VBox(children=(HBox(children=(VBox(children=(Box(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\…

In [85]:
preferences_results_series

dpp        18
uniform    11
dtype: int64

In [None]:
#Save preferences
preferences_results_series.to_csv('preferences_results.csv')