# Install requirements

`pip install -r requirements.txt`

# Data set up 

## Load existing dataset

In [1]:
import pandas as pd
df = pd.read_csv('tournesol_scores_above_20_2024-03-15.csv')

## Create dataset

Download videos scores

In [2]:
import io
import zipfile

import requests
import pandas as pd

#Download tournesol scores
response = requests.get("https://api.tournesol.app/exports/all")
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
collective_scores = pd.read_csv(zip_file.open("collective_criteria_scores.csv"))

#Requests youtube metadata : publication date, title and channel
from dataset import build_dataset
df = build_dataset(collective_scores) #only keeps videos with a tournesol score above 20

#save dataframe
import datetime
df.to_csv('tournesol_scores_above_20_'+datetime.date.today().isoformat()+'.csv')

# Tests set up

In [2]:
#Test parameters
bundle_size = 4
recent_videos_proportion = 0.75

recent_videos_to_sample = int(bundle_size * recent_videos_proportion)
old_videos_to_sample = int(bundle_size * (1 - recent_videos_proportion))

recent_videos_max_age_in_days = 21

#Computes the age of each video
import datetime

from utils import get_age_in_days, construct_L_Ensemble

today = datetime.datetime.today()
df['age_in_days'] = df.apply(lambda x: get_age_in_days(x, today), axis="columns")

#Construct L-Ensemble
power = 2.8 #Model parameters
discount = 7.3
caracteristic_time = 31

dpp = construct_L_Ensemble(df, power, discount, caracteristic_time)

L = Phi.T Phi was computed: Phi (dxN) with d>=N


# Sample and display bundles

In [9]:
import requests
import ipywidgets as widgets

from utils import make_box_for_grid

def construct_bundles_widget(uniform_sample_df, dpp_sample_df):
    hb = 0
    button = 0

    #Download thumbnails in the thumbnails directory
    for id in uniform_sample_df['video']:
        thumbnail_url = "https://i.ytimg.com/vi/" + id + "/mqdefault.jpg"
        response = requests.get(thumbnail_url)
        open('thumbnails/'+id+'.jpg', 'wb').write(response.content)

    for id in dpp_sample_df['video']:
        thumbnail_url = "https://i.ytimg.com/vi/" + id + "/mqdefault.jpg"
        response = requests.get(thumbnail_url)
        open('thumbnails/'+id+'.jpg', 'wb').write(response.content)

    #Widget layout
    uniform_boxes = []
    for video_id in uniform_sample_df['video']:
        video_title = uniform_sample_df.loc[uniform_sample_df['video']==video_id, 'title'].to_string(index=False)
        video_channel = uniform_sample_df.loc[uniform_sample_df['video']==video_id, 'channel'].to_string(index=False)
        
        file = open('thumbnails/'+video_id+'.jpg', 'rb')
        image = widgets.Image(value=file.read())
        image.layout.object_fit = 'contain'  
        
        uniform_boxes.append(make_box_for_grid(image, video_title, video_channel))
                             
    uniform_button = widgets.Button(description="Preferred bundle")
    uniform_boxes.append(uniform_button)

    hbox_layout = widgets.Layout()
    hbox_layout.width = '100%'
    hbox_layout.justify_content = 'space-around'
    
    uniform_hb = widgets.HBox()
    uniform_hb.layout = hbox_layout
    uniform_hb.children = uniform_boxes
    
    dpp_boxes = []
    for video_id in dpp_sample_df['video']:
        video_title = dpp_sample_df.loc[dpp_sample_df['video']==video_id, 'title'].to_string(index=False)
        video_channel = dpp_sample_df.loc[dpp_sample_df['video']==video_id, 'channel'].to_string(index=False)
        
        file = open('thumbnails/'+video_id+'.jpg', 'rb')
        image = widgets.Image(value=file.read())
        image.layout.object_fit = 'contain'  
        
        dpp_boxes.append(make_box_for_grid(image, video_title, video_channel))

    dpp_button = widgets.Button(description="Preferred bundle")
    dpp_boxes.append(dpp_button)

    dpp_hb = widgets.HBox()
    dpp_hb.layout = hbox_layout
    dpp_hb.children = dpp_boxes
        
    #counter = 0
    #def on_button_clicked(b):
    #    global counter
    #    counter += 1
    
    #button.on_click(on_button_clicked)
        
    # Compose into a vertical box
    vb = widgets.VBox()
    vb.layout.align_items = "center"
    vb.children = [dpp_hb, uniform_hb]
    return vb


In [11]:
pd.set_option('display.max_colwidth', 999) #ensures the display of whole video titles

#Uniform sampling
recent_videos_sample = df.loc[df['age_in_days']<=21].sample(n=recent_videos_to_sample, replace=False)
old_videos_sample = df.loc[df['age_in_days']<=21].sample(n=old_videos_to_sample, replace=False)

uniform_sample = pd.concat([recent_videos_sample, old_videos_sample])

#DPP sampling
dpp_sample = df.iloc[dpp.sample_exact_k_dpp(size=bundle_size)]

#Display bundles
vb = construct_bundles_widget(uniform_sample, dpp_sample)
vb

VBox(children=(HBox(children=(VBox(children=(Box(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\…