In [36]:
import streamlit as st
import numpy as np
import pandas as pd
import lightfm as lf
import nmslib
import pickle
import scipy.sparse as sparse

def nearest_books_nms(book_id, index, n=10):
    """Функция для поиска ближайших соседей, возвращает построенный индекс"""
    nn = index.knnQuery(item_embeddings[book_id], k=n)
    return nn

def get_names(index):
    """
    input - idx of books
    Функция для возвращения имени книг
    return - list of names
    """
    names = []
    for idx in index:
        names.append('Book name:  {} '.format(
            name_mapper[idx]) + '  Book Author: {}'.format(author_mapper[idx]))
    return names

def read_files(folder_name='data'):
    """
    Функция для чтения файлов + преобразование к  нижнему регистру
    """
    ratings = pd.read_csv(folder_name+'/ratings.csv')
    books = pd.read_csv(folder_name+'/books.csv')
    books['title'] = books.title.str.lower()
    return ratings, books 

def make_mappers():
    """
    Функция для создания отображения id в title
    """
    #print(books.columns)
    
    name_mapper = dict(zip(books.book_id, books.title))
    author_mapper = dict(zip(books.book_id, books.authors))

    return name_mapper, author_mapper

def load_embeddings():
    """
    Функция для загрузки векторных представлений
    """
    with open('item_embeddings.pickle', 'rb') as f:
        item_embeddings = pickle.load(f)

    # Тут мы используем nmslib, чтобы создать наш быстрый knn
    nms_idx = nmslib.init(method='hnsw', space='cosinesimil')
    nms_idx.addDataPointBatch(item_embeddings)
    nms_idx.createIndex(print_progress=True)
    return item_embeddings,nms_idx

#Загружаем данные
ratings, books  = read_files(folder_name='data') 
name_mapper, author_mapper = make_mappers()
item_embeddings,nms_idx = load_embeddings()

2021-06-29 20:45:56.117 INFO    nmslib: M                   = 16
2021-06-29 20:45:56.120 INFO    nmslib: indexThreadQty      = 8
2021-06-29 20:45:56.121 INFO    nmslib: efConstruction      = 200
2021-06-29 20:45:56.123 INFO    nmslib: maxM			          = 16
2021-06-29 20:45:56.124 INFO    nmslib: maxM0			          = 32
2021-06-29 20:45:56.124 INFO    nmslib: mult                = 0.360674
2021-06-29 20:45:56.127 INFO    nmslib: skip_optimized_index= 0
2021-06-29 20:45:56.129 INFO    nmslib: delaunay_type       = 2
2021-06-29 20:45:56.130 INFO    nmslib: Set HNSW query-time parameters:
2021-06-29 20:45:56.132 INFO    nmslib: ef(Search)         =20
2021-06-29 20:45:56.135 INFO    nmslib: algoType           =2
2021-06-29 20:45:56.553 INFO    nmslib: 
The vector space is CosineSimilarity
2021-06-29 20:45:56.554 INFO    nmslib: Vector length=30
2021-06-29 20:45:56.555 INFO    nmslib: searchMethod			  = 3
2021-06-29 20:45:56.557 INFO    nmslib: Making optimized index
2021-06-29 20:45:56.567 I

In [33]:
# ! pip install streamlit

In [None]:
! streamlit run first.py