In [1]:
import os
import sys
import numpy as np
import pandas as pd
import scipy
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.decomposition import TruncatedSVD
from surprise import Reader, Dataset, SVD, evaluate, dump, accuracy
from collections import defaultdict

# Custom libraries
sys.path.append('../Util')
from loader import get_books, get_book_dataframe, get_book_features
from joiner import get_ratings, get_joint, load_amazon, load_goodreads
from reduction import reduce_matrix, get_sparse

In [2]:
def get_top_n_recs(result, books, n, q):
    recs = []
    for i in range(len(result)):
        if q[i] == 0: # book user hasn't already rated
            recs.append((i, result[i]))
        else:
            # recs.append((i, float('-inf'))) 
            recs.append((i, result[i])) #leave this to verify things actually working
    recs = sorted(recs, key=lambda tup: tup[1], reverse=True)

    top_titles = []
    for i in range(n):
        book_id = recs[i][0]
        title = books.iloc[book_id]['title']
        top_titles.append(title)
    return top_titles

In [3]:
def map_user(q, V):
    # map new user to concept space by q*V
    user_to_concept = np.matmul(q, V)
    # map user back to itme space with user_to_concept * VT
    result = np.matmul(user_to_concept, V.T)
    return result

In [4]:
def map_user_sparse(q, V):
    q_sparse = scipy.sparse.csr_matrix(q)
    # map new user to concept space by q*V
    user_to_concept = q_sparse.dot(V)
    # map user back to itme space with user_to_concept * VT
    result = user_to_concept.dot(V.T).todense()
    return result.T

In [5]:
# Set this to where you save and load all data
data_path = '../../goodbooks-10k/'

In [6]:
# Get dataframe from books
books = get_book_dataframe(data_path)

found books_dataframe in file...


In [7]:
filename = '../.tmp/svd_100_300.npy'
qi = np.load(filename)

In [8]:
'''
Users Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs
'''

"\nUsers Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs\n"

In [10]:
# new user - likes fantasy
q = np.zeros((10000), dtype = np.int)
q[19-1] = 2.0 # LOTR 1
# q[155-1] = 2.0 # LOTR 2
# q[161-1] = 2.0 # LOTR 3
# q[7-1] = 2.0 # Hobbit
# q[611-1] = 2.0 #Silmarillion
# q[189-1] = 2.0 #LOTR boxed set
# q[135-1] = 2.0 #GOT
# q[188-1] = 2.0 #GOT
# q[330-1] = 2.0 #WOT
# q[510-1] = 2.0 #WOT

In [15]:
# new user - likes sci-fi and mystery, hates fantasy
q = np.zeros((10000), dtype = np.int)
q[126-1] = 2.0 # Dune
q[70-1] = 2.0 # Enders Game
q[503-1] = 2.0 # Space Odyssey

q[514-1] = 2.0 # Sherlock Holmens
q[672-1] = 2.0 # Orient Express
q[200-1] = 2.0 # And then there were none

q[19-1] = -2.0 # LOTR 1
q[155-1] = -2.0 # LOTR 2
q[161-1] = -2.0 # LOTR 3
q[7-1] = -2.0 # Hobbit

In [12]:
# user from goodreads
q = np.load('../.tmp/user_nickgreenquist.npy')

# Turn 1-5 rating scale into negative - positive scale
ratings_mapper = {0:0, 1:-2, 2:-1, 3:1, 4:2, 5:3}
for i in range(len(q)):
    q[i] = ratings_mapper[q[i]]

# for i in range(len(q)):
#     if q[i] != 0:
#         title = books.iloc[i]['title']
#         print("%s --> %s" % (q[i], title))

In [13]:
# r^ui = μ + bu + bi + qTipu
qi.shape

(10000, 300)

In [16]:
recs = get_top_n_recs(map_user(q, qi), books, 25, q)
for r in recs:
    print(r)

And Then There Were None
Ender's Game (Ender's Saga, #1)
The Adventures of Sherlock Holmes
Murder on the Orient Express (Hercule Poirot, #10)
2001: A Space Odyssey (Space Odyssey, #1)
Batman: Year One
A Christmas Carol
The Corrections
Servant of the Shard (Forgotten Realms: Paths of Darkness, #3; The Sellswords, #1)
Shatter Me (Shatter Me, #1)
Maid-sama! Vol. 01 (Maid-sama!, #1)
The Shining (The Shining #1)
Frog and Toad All Year (Frog and Toad, #3)
One Day at Horrorland (Goosebumps, #16)
The Rise of Nine (Lorien Legacies, #3)
The Heir (The Selection, #4)
The Matarese Circle (Matarese #1)
Magician: Apprentice (The Riftwar Saga, #1)
Babe: The Gallant Pig
The Hundred-Foot Journey
The Elementary Particles
Faust: First Part
The Creative License: Giving Yourself Permission to Be The Artist You Truly Are
Born on a Blue Day: Inside the Extraordinary Mind of an Autistic Savant
The One Thing: The Surprisingly Simple Truth Behind Extraordinary Results


In [9]:
'''

Use Item Matrix to get recs for new user

'''

'\n\nUse Item Matrix to get recs for new user\n\n'

In [17]:
# Load in item_matrix (concepts and features) and test recs
filename = '../.tmp/item_matrix.npy'
item_matrix = np.load(filename)
item_matrix.shape

(10000, 110)

In [18]:
recs = get_top_n_recs(map_user(q, item_matrix), books, 25, q)
for r in recs:
    print(r)

And Then There Were None
The Invisible Man
The Girl on the Train
The Mysterious Affair at Styles (Hercule Poirot, #1)
Seveneves
Shutter Island
Starship Troopers
Revelation Space
Fearless (The Lost Fleet, #2)
The Neutronium Alchemist (Night's Dawn, #2)
4:50 from Paddington (Miss Marple, #8)
The Sunday Philosophy Club (Isabel Dalhousie, #1)
Armada
Gone Girl
The Further Adventures of Sherlock Holmes: After Sir Arthur Conan Doyle (Classic Crime)
Ubik
Faithful Place (Dublin Murder Squad, #3)
A Great Reckoning (Chief Inspector Armand Gamache, #12)
Beacon 23: The Complete Novel (Beacon 23, #1-5)
The Time Machine
Murder on the Orient Express (Hercule Poirot, #10)
Departure
Death on the Nile (Hercule Poirot, #17)
Dawn (Xenogenesis, #1)
The Secret Adversary (Tommy and Tuppence #1)


In [12]:
'''

Use Just part of Item Matrix for Recs

'''

'\n\nUse Just part of Item Matrix for Recs\n\n'

In [12]:
part = item_matrix[:,10:110]
(np.amin(part), np.amax(part))

(-0.77808343373846678, 1.4040590953389824)

In [13]:
recs = get_top_n_recs(map_user(q, part), books, 10, q)
for r in recs:
    print(r)

The Lord of the Rings (The Lord of the Rings, #1-3)
The Fellowship of the Ring (The Lord of the Rings, #1)
The Hobbit
The Return of the King (The Lord of the Rings, #3)
The Two Towers (The Lord of the Rings, #2)
The Belgariad, Vol. 1: Pawn of Prophecy / Queen of Sorcery / Magician's Gambit (The Belgariad, #1-3)
The Name of the Wind (The Kingkiller Chronicle, #1)
The Farthest Shore (Earthsea Cycle, #3)
The Tombs of Atuan (Earthsea Cycle, #2)
The Crown Conspiracy (The Riyria Revelations, #1)


In [14]:
'''

Use Feature Matrix for Recs

'''

'\n\nUse Feature Matrix for Recs\n\n'

In [14]:
# produce feature matrix
feature_matrix = get_book_features(books)
feature_matrix.shape

feature_matrix exists in file...


(10000, 82203)

In [15]:
recs = get_top_n_recs(map_user_sparse(q, feature_matrix), books, 25, q)
for r in recs:
    print(r)

A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
Grendel
A Wizard of Earthsea (Earthsea Cycle, #1)
Dandelion Wine (Green Town, #1)
A Wind in the Door (A Wrinkle in Time Quintet, #2)
Many Waters (A Wrinkle in Time Quintet, #4)
An Acceptable Time (A Wrinkle in Time Quintet, #5)
The Earthsea Trilogy
A Swiftly Tilting Planet (A Wrinkle in Time Quintet, #3)
Slaughterhouse-Five
Flowers for Algernon
Tuck Everlasting
The Bone Clocks
Alice in Wonderland
The Neverending Story
Alice's Adventures in Wonderland & Through the Looking-Glass
Howl's Moving Castle (Howl's Moving Castle, #1)
The Lost World (Professor Challenger, #1)
Brave New World Revisited 
Alice's Adventures in Wonderland
Through the Looking-Glass, and What Alice Found There
Brave New World / Brave New World Revisited
The Horse and His Boy (Chronicles of Narnia, #5)
The Buried Giant
Journey to the Center of the Earth (Extraordinary Voyages, #3)
