In [4]:
import sys
import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn import neighbors

NUM_REC = 5

df = pd.read_csv("final.csv")

df2 = df.copy()
df2.loc[ (df2['rating'] >= 0) & (df2['rating'] <= 1), 'rating_between'] = "0 to 1"
df2.loc[ (df2['rating'] > 1) & (df2['rating'] <= 2), 'rating_between'] = "1 to 2"
df2.loc[ (df2['rating'] > 2) & (df2['rating'] <= 3), 'rating_between'] = "2 to 3"
df2.loc[ (df2['rating'] > 3) & (df2['rating'] <= 4), 'rating_between'] = "3 to 4"
df2.loc[ (df2['rating'] > 4) & (df2['rating'] <= 5), 'rating_between'] = "4 to 5"

rating_df = pd.get_dummies(df2['rating_between'])

g_types = ['fiction', 'nonfiction', 'horror', 'thriller', 'fantasy', 'mystery', 'history', 'romance', 'classic', 'crime', 'comedy']

genres = df2[g_types]
features = pd.concat([rating_df, df2['rating'], df2['rating_count'], df2['pages'], df2[g_types]], axis=1)

min_max_scaler = MinMaxScaler()
features = min_max_scaler.fit_transform(features)

model = neighbors.NearestNeighbors(n_neighbors=10, algorithm='auto')
model.fit(features)


# Query for indices of and distances to the neighbors of every book. 
# Each entry represents the list of nearest neighbours and their distances for each book,
#     sharing the same index of the queried book as in the dataframe. 
dist, idlist = model.kneighbors(features)


'''
Returns a list of other books by the author
    returns a numpy array
'''

def recommend_by_author(_df, title):
    entry = _df[_df['title'] == title]
    author = entry['author'].to_numpy()
#     display(author)
    books = _df[(_df['author'].to_numpy() == author) &
                (_df['rating'] >= 3) &
                (_df['title'] != title)]
    out = list()
    for isbn in books['isbn']:
        out.append(isbn)
#     out = books.to_numpy()
    return (out)



'''
    kNN algorithm
    returns array of isbns of recommended books based on given book
'''
def kNN_recommender(_df, knn_idlist, title):
    rec_titles = []
    # get index of read book in dataframe
    idx = df2[df2['title'] == title].index
    bookID = idx[0]
    # For all entries in the nearest neighbours matrix, 
    # add nearest neighbours for the book as specified by index. 
    for newid in idlist[bookID]:
        if (str(df2.loc[newid].title) == title):
            continue
        else:
            rec_titles.append(df2.loc[newid].title) 
    # Only return books whose average rating >= 3
    books = _df[(_df['rating'] >= 3) &
                (_df['title'].isin(rec_titles))]
    out = list()
    for isbn in books['isbn']:
        out.append(isbn)
    return (out)




# '''
#     Get a random set of books
# '''
# def getRandom(_df):
#     randoms = _df.sample(n = NUM_REC)
#     out = randoms['isbn'].to_numpy()
#     return out

'''
    Get a random set of books
    Returns: list of ISBN
'''
def getRandom(_df):
    randoms = _df.sample(n = NUM_REC)
    out = list()
    for isbn in randoms['isbn']:
        out.append(isbn)
    return out


def getRecommendations(_df, knn_idlist, readBooks):
    books = set()
    if (len(readBooks) == 0):
        # if user nas no books in main collection, recommend books at random
        for rec in getRandom(_df):
            books.add(rec)
    else:
        # otherwise, for each book generate a list of recommendations
        for title in readBooks:
            print(title)
            # generate recommendations by author
            for rec in recommend_by_author(_df, title):
                books.add(rec)
                print(f"author rec: {rec}")
            # generate recommendations by features
            for rec in kNN_recommender(_df, knn_idlist, title):
                books.add(rec)
                print(f"kNN rec: {rec}")
    
    # if not enough books to recommend, generate some extras
    if (len(books) < NUM_REC):
        extras = getRandom(_df)
        for rec in extras:
            books.add(rec)
            if (len(books) == NUM_REC):
                break
    
    
    result = random.choices(tuple(books), k=NUM_REC)
    print(type(result))
    
    return result
        

input = sys.argv[1]

FileNotFoundError: [Errno 2] No such file or directory: 'final.csv'

In [29]:
read_titles = []
books = getRecommendations(df, idlist, read_titles)

print(books)

<class 'list'>
['0553287893', '1556434715', '0553287893', '0375704051', '0375704051']


In [2]:
read_titles = ["The Tao of Pooh"]
books = getRecommendations(df, idlist, read_titles)

print(books)

NameError: name 'getRecommendations' is not defined

In [3]:
read_titles = ["Airframe"]
books = getRecommendations(df, idlist, read_titles)

print(books)

Airframe
author rec: 0345417623
author rec: 0345378490
author rec: 0345370775
author rec: 0345380371
author rec: 034540288X
author rec: 0345378482
author rec: 0061015725
author rec: 0345353145
author rec: 0345391055
author rec: 0345354621
author rec: 0345354613
author rec: 034539092X
kNN rec: 0385497466
kNN rec: 0061032077
kNN rec: 0440241073
kNN rec: 0446604089
kNN rec: 0743448642
kNN rec: 0446354678
kNN rec: 0241125421
kNN rec: 0312956614
kNN rec: 0312266863
['0345378490', '0241125421', '0446354678', '0345378490', '0743448642']


In [4]:
read_titles = ["Sarah, Plain and Tall"]
books = getRecommendations(df, idlist, read_titles)

print(books)

Sarah, Plain and Tall
author rec: 0060210990
kNN rec: 0452264510
kNN rec: 0394744209
kNN rec: 0380010038
kNN rec: 0440479002
kNN rec: 0571069770
kNN rec: 0590406116
kNN rec: 055327838X
kNN rec: 082031661X
kNN rec: 8401427762
['0060210990', '0440479002', '0452264510', '055327838X', '0380010038']


In [26]:
read_titles = ["Sarah, Plain and Tall", "Airframe"]
books = getRecommendations(df, idlist, read_titles)

print(books)

Sarah, Plain and Tall
author rec: 0060210990
kNN rec: 0452264510
kNN rec: 0394744209
kNN rec: 0380010038
kNN rec: 0440479002
kNN rec: 0571069770
kNN rec: 0590406116
kNN rec: 055327838X
kNN rec: 082031661X
kNN rec: 8401427762
Airframe
author rec: 0345417623
author rec: 0345378490
author rec: 0345370775
author rec: 0345380371
author rec: 034540288X
author rec: 0345378482
author rec: 0061015725
author rec: 0345353145
author rec: 0345391055
author rec: 0345354621
author rec: 0345354613
author rec: 034539092X
kNN rec: 0385497466
kNN rec: 0061032077
kNN rec: 0440241073
kNN rec: 0446604089
kNN rec: 0743448642
kNN rec: 0446354678
kNN rec: 0241125421
kNN rec: 0312956614
kNN rec: 0312266863
['0312956614', '0446604089', '0345380371', '0345417623', '034540288X']
