## Un código sencillo para entender el funcionamiento del SAR

In [8]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

In [204]:
userIndexes = {i:i for i in range(10)}

items = [
    "Bob Esponja",
    "Harry Potter",
    "Naruto",
    "Batman",
    "The Office",
    "Mafiosos asesinos",
    "Crimen en LA",
    "Suspenso importante",
    "Titanic",
    "Interstellar"
]

vistas_por_usuario = {
    0: ["Bob Esponja", "Naruto"],
    1: ["Mafiosos asesinos", "Crimen en LA"],
    2: ["Bob Esponja", "Harry Potter"],
    3: ["Titanic", "Interstellar", "The Office"],
    4: ["Bob Esponja", "The Office"],
    5: ["Bob Esponja", "Naruto"]
}

In [205]:
def build_items_viewers(vistas_por_usuario:dict) -> dict:
    out = dict()
    for usuario, pelis in vistas_por_usuario.items():
        for peli in pelis:
            if peli not in out.keys():
                out.update({peli: [usuario]})
            else:
                out[peli].append(usuario)
    return out

In [206]:
items_viewers = build_items_viewers(vistas_por_usuario)
items_viewers

{'Bob Esponja': [0, 2, 4, 5],
 'Naruto': [0, 5],
 'Mafiosos asesinos': [1],
 'Crimen en LA': [1],
 'Harry Potter': [2],
 'Titanic': [3],
 'Interstellar': [3],
 'The Office': [3, 4]}

In [207]:
def build_coocurrence_matrix(items:list, items_viewers:dict) -> csr_matrix:
    
    M:int = len(items)
    C:csr_matrix = csr_matrix((M,M)).tolil()
        
    print("* Armando matriz C...")
    
    for i, item_i in enumerate(items_viewers.keys()):

        users_w_item_i:set = items_viewers[item_i] # usuarios que vieron el item i
        index_i = items.index(item_i)

        for j, item_j in enumerate(items_viewers.keys()):
            
            users_w_item_j:set = set(items_viewers[item_j]) # usuarios que vieron el item j
            index_j = items.index(item_j)
            
            # print(item_i, item_j, len(users_w_item_j.intersection(users_w_item_i)))
            C[index_i,index_j] = len(users_w_item_j.intersection(users_w_item_i))
           
    print("* Matriz creada ✔")
    return C

In [208]:
len(set(items_viewers["Naruto"]).intersection(set(items_viewers["Bob Esponja"])))

2

In [209]:
C = build_coocurrence_matrix(items, items_viewers)

* Armando matriz C...
* Matriz creada ✔


In [210]:
C.toarray()

array([[4., 1., 2., 0., 1., 0., 0., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [2., 0., 2., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 2., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 1., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]])

In [211]:
S = C

In [212]:
items[0], items[2]

('Bob Esponja', 'Naruto')

In [213]:
def to_ls_dataset(items_viewers:dict) -> list:
    out = []
    for peli, v in items_viewers.items():
        for user in v:
            out.append((user, peli))
    return out

In [214]:
ds = to_ls_dataset(items_viewers)

In [215]:
def compute_affinity_scores(dataset:list,
                            verb=False) -> np.ndarray:
            
    M = len(itemIndexes.keys())   
    N = len(userIndexes.keys())
    
    A:csr_matrix = csr_matrix((N, M)).tolil()  
    
    for interaction in dataset:
        
        user_id, peli = interaction
        
        if True:            
            index_item = items.index(peli)
            index_user = user_id

            A[index_user, index_item] += 1 # score(time)
    
    pass
        
    return A     

In [216]:
A = compute_affinity_scores(ds)

In [217]:
A.toarray()

array([[1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 1., 1.],
       [1., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [218]:
Y = A @ S

In [219]:
def maximo_score(lss:list) -> tuple:
    mm = -1
    index = -1
    for i, s in lss:
        if s > mm:
            mm = s
            index = i
        pass
    return (index, mm)

In [220]:
maximo_score([(1,2),(3,5),(1,4)])

(3, 5)

In [221]:
def sort_2nd(ls:list) -> list:
    
    if len(ls) == 1:
        return ls
    
    else:        
        head = ls[0]
        head_index, head_score = head
        
        tail_max = maximo_score(ls[1:])
        tail_max_index, tail_max_score = tail_max
        
        if head_score >= tail_max_score:
            out = [head]
            out.extend(sort_2nd(ls[1:]))
        else:
            q = ls.copy()
            q.remove(tail_max)
            out = [tail_max]
            out.extend(sort_2nd(q))
        
        return out

In [222]:
item = "The Office"
i = items.index(item)
fila_i = list(S[i].toarray()[0])
fila_i_index = [(index, s) for index, s in enumerate(fila_i)]
sorted_fila_i_index = sort_2nd(fila_i_index)

des = list()
for i, s in sorted_fila_i_index:
    des.append((items[i], s))

p = pd.DataFrame(des)
p.head(10)

Unnamed: 0,0,1
0,The Office,2.0
1,Bob Esponja,1.0
2,Titanic,1.0
3,Interstellar,1.0
4,Harry Potter,0.0
5,Naruto,0.0
6,Batman,0.0
7,Mafiosos asesinos,0.0
8,Crimen en LA,0.0
9,Suspenso importante,0.0


In [223]:
items_viewers

{'Bob Esponja': [0, 2, 4, 5],
 'Naruto': [0, 5],
 'Mafiosos asesinos': [1],
 'Crimen en LA': [1],
 'Harry Potter': [2],
 'Titanic': [3],
 'Interstellar': [3],
 'The Office': [3, 4]}