# Modelo 1 - Personalized PageRank

In [1]:
import pickle
import json
import numpy as np
import os
from scipy.sparse import dok_matrix

from tqdm.notebook import tqdm

## Preparação de Dados

### Calcula rede 
- Se já existir o arquivo `dados-processados/network.pickle` a rede não é calculada novamente

In [2]:
if not os.path.isfile("../dados-processados/network.pickle"):

    # Carregando Dados Processados
    with open("../dados-processados/dataset.pickle", 'rb') as _file:
        dataset = pickle.load(_file)

    # Carregando Dicionario Encoder de Músicas
    with open("../dados-processados/encoding_tracks.json", 'r') as _file:
        tracks_encoder = json.load(_file)

    # Descobre quantidade de musicas únicas
    track_count = len(tracks_encoder.keys())

    # Cria matriz esparsa
    network = dok_matrix((track_count,track_count), dtype=int)

    # Preenche matriz com frequências de transição
    for playlist_name, tracklist in tqdm(dataset):
        for track_idx in range( len(tracklist)-1 ) :
            current_track_id = tracklist[track_idx][1]
            next_track_id = tracklist[track_idx+1][1]

            network[current_track_id, next_track_id] += 1
    
    # Salva rede
    with open("../dados-processados/network.pickle", 'wb') as _file:
        pickle.dump(network, _file)
else:
    # Carrega rede já criada
    with open("../dados-processados/network.pickle", 'rb') as _file:
        network = pickle.load(_file)

### Normalizando Pesos de Arestas

Calcula a soma total de transições por musica

In [3]:
sums = network.sum(axis=1)

Gera copia da Rede com arestas normalizadas

In [4]:
if not os.path.isfile("../dados-processados/normalized_network.pickle"):

    normalized_network = dok_matrix(network.shape, dtype=float)

    for key in tqdm(network.keys()):
        row_idx = key[0]
        normalized_network[key] = network[key] / float(sums[row_idx])

    with open("../dados-processados/normalized_network.pickle", 'wb') as _file:
        pickle.dump(normalized_network, _file)
else:
    # Carrega rede já criada
    with open("../dados-processados/normalized_network.pickle", 'rb') as _file:
        normalized_network = pickle.load(_file)

## Implementação Algoritmo

In [97]:
from models.personalized_pagerank import PersonalizedPageRank


model = PersonalizedPageRank(
    walk_depth  = 5, 
    network     = normalized_network, 
    n_walks     = 5
)

In [98]:
model.predict([
    [43,328232,22]
])

[[27114,
  2170,
  65472,
  207,
  1782,
  308,
  603,
  1133,
  1275,
  3688,
  11904,
  3689,
  1952,
  4035,
  43,
  25808,
  8293,
  213,
  339,
  2586,
  671,
  2337,
  1271,
  1054,
  8480,
  883,
  2684,
  829,
  195,
  212,
  4171,
  1035,
  824,
  578,
  4150,
  2011,
  27312,
  3158,
  2194]]