In [1]:
from bd import *
from rs import *
from  scrapper import *
import requests

 ········


In [2]:
# Fairly fast for many datapoints, less fast for many costs, somewhat readable
def is_pareto_efficient_simple(costs):
    """
    Find the pareto-efficient points
    :param costs: An (n_points, n_costs) array
    :return: A (n_points, ) boolean array, indicating whether each point is Pareto efficient
    """
    is_efficient = np.ones(costs.shape[0], dtype = bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient]<c, axis=1)  # Keep any point with a lower cost
            is_efficient[i] = True  # And keep self
    return is_efficient

**Obtener películas de una lista**

In [3]:
def get_movies(idList):      
    sql  = "SELECT id_tittlelist FROM list_tittlelist where id_list=%s "
    with conn.cursor() as cursor:
        cursor.execute( sql, (idList) )
        value = cursor.fetchall()
        if len(value) <= 0:
            return None

    movies = []
    for m in value: movies.append(m[0])
    return movies

**Obtener listas que contienen una película**

In [4]:
def get_lists(idMovie):      
    sql  = "SELECT id_list FROM list_tittlelist where id_tittlelist=%s "
    with conn.cursor() as cursor:
        cursor.execute( sql, (idMovie) )
        value = cursor.fetchall()
        if len(value) <= 0:
            return None

        lists = []
        for l in value: lists.append(l[0])
    return lists

Projecting colors in the movie graph
--

Sea $F$ el conjunto de películas fuente. 
El grafo es bipartito, que se compone de nodos de tipo *Película* y nodos de tipo *Lista de pelis*. Los nodos de tipo película `m`cuentan con dos atributos importantes:
* `color[m]`    Vector con valores en $[0,1]$, que indican la presencia de cada uno de los colores de las películas fuente.
* `c_propag[m]` Vector con el valor que se debe propagar en la siguiente iteración o *timestep* del algoritmo 

In [5]:
import numpy as np

#recommend(source_movies, max_listsize=npred, block_list=list_name, t=t) 
def recommend(F, max_listsize=10, block_list=None, t=0.1, steps=1):
    #inicalizando la fuente
    color = dict()
    c_propag = dict()
    
    for i in range (len(F)):
        m =F[i]
        color[m] = np.zeros(len(F))
        color[m][i] = 1.0
        c_propag[m] = np.zeros(len(F))
        c_propag[m][i] = 1.0 

    #propagación por algunas iteraciones
    for timestep in range(steps):
        c_propag= propagate(c_propag, color, block_list=block_list, t_factor=t)
    #print (c_propag)

        
    #recomendación
    recommendations = set()
    while True:
        is_efficient = is_pareto_efficient_simple(-(np.array(list(color.values()))))
        i = 0; to_remove = []
        for m in color:
            if is_efficient[i]:
                to_remove.append(m)
                if m not in F: #and np.count_nonzero(color[m])>=3:
                    recommendations.add(m)
                    if len(recommendations) == max_listsize: return recommendations, color
                
            i += 1    
        if len(to_remove) == 0: return recommendations, color
        for m in to_remove: color.pop(m)
            
    return color
    
        


Notar que `F` es una lista con las películas fuente. Por lo que al colocar: `color[m][i]=1.0`, estamos creando un vector: $[0,...,1,...,0]$ donde el $1$ se encuentra en la posición correspondiente a la película fuente en la lista `F`. Lo mismo ocurre con `c_propag`. `M` es la colección con todas las películas.

`P` es un diccionario que guarda en cada iteración la películas que debieran propagar sus valores. Cada película se asocia al vector de colores que se debe propagar.

La función `propagate` propaga los cambios a partir del diccionario de películas `P` y retorna un nuevo diccionario con las películas modificadas para seguir propagando.

In [6]:
def propagate(c_propag, color, block_list=None, t_factor=0.1):
    c_propag2 = dict() #películas que se propagarán en 
                # la siguiente iteración
    for m in c_propag:
        for l in get_lists(m):
            if l == block_list: continue
            
            mov_list = get_movies(l)
            size_list = len(mov_list)
            for adj_m in mov_list:
                pvalue = (c_propag[m]*t_factor)/size_list
                if adj_m not in c_propag2:
                    c_propag2[adj_m] = pvalue 
                else: 
                    c_propag2[adj_m] = np.maximum(pvalue, c_propag2[adj_m])

    #se actualiza el color de las películas
    for m in c_propag2: 
        if m not in color: color[m] = c_propag2[m]
        else:  color[m] += c_propag2[m]
            
    return c_propag2

In [7]:
rec, color = recommend(["Beanpole","Give Me Liberty","High Flying Bird","Honeyland"], max_listsize=10, steps=2, t=0.1, block_list="ms2019_ls1006")
rec

#validate("ms2019_ls1002", t=0.1, nsample=3, npred=5)

{'A Beautiful Day in the Neighb',
 'Apollo 11',
 'Jojo Rabbit',
 'Knives Out',
 'Little Women',
 'Marriage Story',
 'Once Upon a Time in Hollywood',
 'Parasite',
 'The Irishman',
 'Uncut Gems'}

In [8]:
all_movies=[]
id=1001
while existeEnListTittleList("ms2019_ls"+str(id)):
    all_movies += get_movies("ms2019_ls"+str(id))
    id += 1

In [9]:
import random
random.seed(1)
def validate(list_name, nsample=3, npred=5, t=0.2):
    rw_tot_success = 0
    rand_tot_success = 0
    tot_size = 0
    
    M = get_movies(list_name) #se obtienen las pelis de la lista
    

    for m in M: 
        if m in all_movies: all_movies.remove(m)
            
    if verbose: print("movies:", M)

    #se filtran las películas que solo aprecen en la lista M
    M = [x for x in M if x in all_movies]
    
    if len(M)>=nsample+2:
    
        for i in range(10):
            # recommendation using random walk
            source_movies = random.sample(M,nsample) #seleccionamos nsample pelis al azar
            if verbose: print("source_movies:",source_movies)

            num = np.minimum(npred,len(M)-nsample) #número de películas a recomendar
            recommendations,_ = recommend(source_movies, max_listsize=npred, block_list=list_name, t=t) 
            rw_tot_success += len(recommendations.intersection(M))/float(num)
            tot_size += len(recommendations)
            if verbose: print("rw_recommendations:",recommendations)

            # random recommendation

            recommendations = random.sample(all_movies,npred)
            recommendations = set(recommendations)
            MM = [x for x in M if x not in source_movies]
            rand_tot_success += len(recommendations.intersection(MM))/float(num)
            if verbose: print("r_recommendations",recommendations)
        
        print(list_name +"\t"+ str(rand_tot_success/10.0) +"\t"+ str(rw_tot_success/10.0) +"\t" \
              + str(tot_size/10.0) + "\t" + str(len(M)) )

    M = get_movies(list_name) 
    for m in M: all_movies.append(m)

verbose=False
print("lista \tr_success \trw_success \t#recom \t lenM")
for i in range(1,246):
    validate("ms2019_ls"+str(1000+i), t=1.0, nsample=5, npred=10)

lista 	r_success 	rw_success 	#recom 	 lenM
ms2019_ls1001	0.13999999999999999	0.3	10.0	10
ms2019_ls1002	0.275	0.65	10.0	9
ms2019_ls1003	0.1	0.0	10.0	7
ms2019_ls1006	0.05	0.125	10.0	9
ms2019_ls1007	0.19999999999999998	0.2333333333333333	10.0	8
ms2019_ls1008	0.15	0.35	10.0	9
ms2019_ls1009	0.16666666666666666	0.23333333333333334	10.0	8
ms2019_ls1010	0.225	0.525	10.0	9
ms2019_ls1011	0.18333333333333335	0.5333333333333332	10.0	11
ms2019_ls1012	0.35	0.725	10.0	9
ms2019_ls1013	0.225	0.225	10.0	9
ms2019_ls1014	0.2333333333333333	0.41666666666666663	10.0	11
ms2019_ls1015	0.125	0.35	10.0	9
ms2019_ls1016	0.25	0.4	10.0	9
ms2019_ls1017	0.225	0.375	10.0	9
ms2019_ls1018	0.25	0.525	10.0	9
ms2019_ls1019	0.1	0.18	10.0	10
ms2019_ls1020	0.16666666666666666	0.19999999999999998	10.0	8
ms2019_ls1021	0.225	0.225	10.0	9
ms2019_ls1022	0.27999999999999997	0.48	10.0	10
ms2019_ls1023	0.2333333333333333	0.19999999999999998	10.0	8
ms2019_ls1024	0.325	0.375	10.0	9
ms2019_ls1025	0.13333333333333333	0.16666666666666666