In [8]:
# imports
import numpy as np
import pandas as pd
import scipy
import os
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

In [3]:
# recommendations function 
def make_book_recs(book_title, books, indices, weights, similarities):
    # get index of input book_title
    idx = indices[book_title] - 1
    # generate pairwise similarity scores for each book for selected title
    sim_scores = [list(enumerate(sim[idx])) for sim in similarities]

    true_sim_scores = []
    # total num books
    n_books = len(similarities[0])
    # generate true similatities by summing & avging sim scores of each feature
    for i in range(n_books):
        book_id = sim_scores[0][i][0]
        s = 0
        for j in range(len(weights)):
            s += sim_scores[j][i][1] * weights[j]
        true_sim_scores.append((book_id, s))

    # sort by similarity
    true_sim_scores = sorted(true_sim_scores, key=lambda a: a[1], reverse=True)
    N = 20
    true_sim_scores = true_sim_scores[:N]
    book_indices = [i[0] for i in true_sim_scores]

    return books['title'].iloc[book_indices]

In [4]:
# fetch books 
books = pd.read_pickle('pickled/books.pkl')
books.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10000 entries, 1 to 10000
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   title            10000 non-null  object
 1   image_url        10000 non-null  object
 2   url              10000 non-null  object
 3   author           10000 non-null  object
 4   description      10000 non-null  object
 5   popular_shelves  10000 non-null  object
 6   tags             10000 non-null  object
dtypes: object(7)
memory usage: 625.0+ KB


In [6]:
# load in SVD & features combined matrix 
readme_recs_m = np.load('models/readme_m.npy')
readme_recs_m.shape

(10000, 1300)

In [9]:
# cosine sim
cosine_sim = cosine_similarity(readme_recs_m)
cosine_sim

array([[1.        , 0.34619745, 0.46108439, ..., 0.08783531, 0.06104606,
        0.01922178],
       [0.34619745, 1.        , 0.46494621, ..., 0.04130631, 0.02947556,
        0.0121831 ],
       [0.46108439, 0.46494621, 1.        , ..., 0.0417777 , 0.01729379,
        0.01312398],
       ...,
       [0.08783531, 0.04130631, 0.0417777 , ..., 1.        , 0.08188677,
        0.09385856],
       [0.06104606, 0.02947556, 0.01729379, ..., 0.08188677, 1.        ,
        0.13150175],
       [0.01922178, 0.0121831 , 0.01312398, ..., 0.09385856, 0.13150175,
        1.        ]])

In [11]:
# reverse mapping of book_titles & indexes
indices = pd.Series(books.index, index=books['title']).drop_duplicates()
indices[:3]

title
The Hunger Games (The Hunger Games, #1)                     1
Harry Potter and the Sorcerer's Stone (Harry Potter, #1)    2
Twilight (Twilight, #1)                                     3
Name: id, dtype: int64

In [18]:
weights = [1]
similarities = [cosine_sim]

NEW_BOOK = "Sapiens: A Brief History of Humankind"

book_recs = make_book_recs(NEW_BOOK, books, indices, weights, similarities)
book_recs


id
2205                Sapiens: A Brief History of Humankind
900                                      The Selfish Gene
4096    Bringing Down the House: The Inside Story of S...
8347                                  A Night to Remember
3867                       So You've Been Publicly Shamed
7372                                     Stupid White Men
8930                     Them: Adventures with Extremists
9949                           The Men Who Stare at Goats
7929    Very Good Lives: The Fringe Benefits of Failur...
7517    Six Easy Pieces: Essentials of Physics By Its ...
1048    The Perfect Storm: A True Story of Men Against...
1344                What the Dog Saw and Other Adventures
4119                            A Briefer History of Time
758            Stiff: The Curious Lives of Human Cadavers
892     David and Goliath: Underdogs, Misfits, and the...
1046                                       Modern Romance
8639                         Detroit: An American Autopsy
2661       