In [80]:
import os
import sys
import numpy as np
import pandas as pd
import scipy
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.decomposition import TruncatedSVD
from surprise import Reader, Dataset, SVD, evaluate, dump, accuracy
from collections import defaultdict

# Custom libraries
sys.path.append('../Util')
from loader import get_books, get_book_dataframe, get_book_features
from joiner import get_ratings, get_joint, load_amazon, load_goodreads
from reduction import reduce_matrix, get_sparse

In [91]:
def get_top_n_recs(result, books, n, q):
    recs = []
    for i in range(len(result)):
        if q[i] == 0: # book user hasn't already rated
            recs.append((i, result[i]))
        else:
            recs.append((i, float('-inf'))) 
            # recs.append((i, result[i])) #leave this to verify things actually working
    recs = sorted(recs, key=lambda tup: tup[1], reverse=True)

    top_titles = []
    for i in range(n):
        book_id = recs[i][0]
        title = books.iloc[book_id]['title']
        top_titles.append(title)
    return top_titles

In [82]:
def map_user(q, V):
    # map new user to concept space by q*V
    user_to_concept = np.matmul(q, V)
    # map user back to itme space with user_to_concept * VT
    result = np.matmul(user_to_concept, V.T)
    return result

In [83]:
def map_user_sparse(q, V):
    q_sparse = scipy.sparse.csr_matrix(q)
    # map new user to concept space by q*V
    user_to_concept = q_sparse.dot(V)
    # map user back to itme space with user_to_concept * VT
    result = user_to_concept.dot(V.T).todense()
    return result.T

In [84]:
# Set this to where you save and load all data
data_path = '../../goodbooks-10k/'

In [85]:
# Get dataframe from books
books = get_book_dataframe(data_path)

found books_dataframe in file...


In [86]:
filename = '../.tmp/svd_20_1000.npy'
qi = np.load(filename)

In [8]:
'''
Users Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs
'''

"\nUsers Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs\n"

In [96]:
# user from goodreads
sparse_q = scipy.sparse.load_npz('../.tmp/cached_users/user_nickgreenquist.npz')
q = sparse_q.toarray()
q = np.array(q[0].tolist())
q.shape

(10000,)

In [97]:
sparse_q = scipy.sparse.load_npz('../.tmp/cached_users/user_likes_fantasy.npz')
q = sparse_q.toarray()
q = np.array(q[0].tolist())
q.shape

(10000,)

In [89]:
# r^ui = μ + bu + bi + qTipu
qi.shape

(10000, 1000)

In [98]:
recs = get_top_n_recs(map_user(q, qi), books, 25, q)
for r in recs:
    print(r)

City of Glass (The Mortal Instruments, #3)
The Scarlet Letter
The Other Boleyn Girl (The Plantagenet and Tudor Novels, #9)
The Lucky One
The Shining (The Shining #1)
A Breath of Snow and Ashes (Outlander, #6)
Veronika Decides to Die
Last Chance Saloon
Treasure Island
The White Queen (The Plantagenet and Tudor Novels, #2)
Year of Yes: How to Dance It Out, Stand In the Sun and Be Your Own Person
One Flew Over the Cuckoo's Nest
Outlander (Outlander, #1)
Four to Score (Stephanie Plum, #4)
Today Will Be Different
The Return of Sherlock Holmes
Heaven is for Real: A Little Boy's Astounding Story of His Trip to Heaven and Back
The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
Scrappy Little Nobody
The Light Between Oceans
The Son of Neptune (The Heroes of Olympus, #2)
Ultimate Spider-Man, Volume 1: Power and Responsibility
Eat, Pray, Love
The League of Extraordinary Gentlemen, Vol. 1
The Story of Edgar Sawtelle


In [99]:
'''

Use Item Matrix to get recs for new user

'''

'\n\nUse Item Matrix to get recs for new user\n\n'

In [100]:
# Load in item_matrix (concepts and features) and test recs
filename = '../.tmp/item_matrix.npy'
item_matrix = np.load(filename)
item_matrix.shape

(10000, 2000)

In [101]:
recs = get_top_n_recs(map_user(q, item_matrix), books, 25, q)
for r in recs:
    print(r)

The Name of the Wind (The Kingkiller Chronicle, #1)
The Belgariad Boxed Set: Pawn of Prophecy / Queen of Sorcery / Magician's Gambit / Castle of Wizardry / Enchanters' End Game (The Belgariad, #1-5)
The Farthest Shore (Earthsea Cycle, #3)
The Wise Man's Fear (The Kingkiller Chronicle, #2)
The Chronicles of Thomas Covenant, the Unbeliever (The Chronicles of Thomas Covenant the Unbeliever, #1-3)
Lord Foul's Bane (The Chronicles of Thomas Covenant the Unbeliever, #1)
The Blade Itself (The First Law, #1)
The Belgariad, Vol. 1: Pawn of Prophecy / Queen of Sorcery / Magician's Gambit (The Belgariad, #1-3)
The Lies of Locke Lamora (Gentleman Bastard, #1)
A Song of Ice and Fire (A Song of Ice and Fire, #1-4)
Rhapsody: Child of Blood (Symphony of Ages, #1)
The Dragon's Path (The Dagger and the Coin, #1)
The Crown Conspiracy (The Riyria Revelations, #1)
The Great Book of Amber (The Chronicles of Amber, #1-10)
The Tombs of Atuan (Earthsea Cycle, #2)
The Hundred Thousand Kingdoms (Inheritance Tril

In [12]:
'''

Use Just part of Item Matrix for Recs

'''

'\n\nUse Just part of Item Matrix for Recs\n\n'

In [12]:
part = item_matrix[:,10:110]
(np.amin(part), np.amax(part))

(-0.77808343373846678, 1.4040590953389824)

In [13]:
recs = get_top_n_recs(map_user(q, part), books, 10, q)
for r in recs:
    print(r)

The Lord of the Rings (The Lord of the Rings, #1-3)
The Fellowship of the Ring (The Lord of the Rings, #1)
The Hobbit
The Return of the King (The Lord of the Rings, #3)
The Two Towers (The Lord of the Rings, #2)
The Belgariad, Vol. 1: Pawn of Prophecy / Queen of Sorcery / Magician's Gambit (The Belgariad, #1-3)
The Name of the Wind (The Kingkiller Chronicle, #1)
The Farthest Shore (Earthsea Cycle, #3)
The Tombs of Atuan (Earthsea Cycle, #2)
The Crown Conspiracy (The Riyria Revelations, #1)


In [14]:
'''

Use Feature Matrix for Recs

'''

'\n\nUse Feature Matrix for Recs\n\n'

In [14]:
# produce feature matrix
feature_matrix = get_book_features(books)
feature_matrix.shape

feature_matrix exists in file...


(10000, 82203)

In [15]:
recs = get_top_n_recs(map_user_sparse(q, feature_matrix), books, 25, q)
for r in recs:
    print(r)

A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
Grendel
A Wizard of Earthsea (Earthsea Cycle, #1)
Dandelion Wine (Green Town, #1)
A Wind in the Door (A Wrinkle in Time Quintet, #2)
Many Waters (A Wrinkle in Time Quintet, #4)
An Acceptable Time (A Wrinkle in Time Quintet, #5)
The Earthsea Trilogy
A Swiftly Tilting Planet (A Wrinkle in Time Quintet, #3)
Slaughterhouse-Five
Flowers for Algernon
Tuck Everlasting
The Bone Clocks
Alice in Wonderland
The Neverending Story
Alice's Adventures in Wonderland & Through the Looking-Glass
Howl's Moving Castle (Howl's Moving Castle, #1)
The Lost World (Professor Challenger, #1)
Brave New World Revisited 
Alice's Adventures in Wonderland
Through the Looking-Glass, and What Alice Found There
Brave New World / Brave New World Revisited
The Horse and His Boy (Chronicles of Narnia, #5)
The Buried Giant
Journey to the Center of the Earth (Extraordinary Voyages, #3)
