In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
books_raw = pd.read_csv('data/books.csv')
ratings_raw = pd.read_csv('data/ratings.csv')
users_raw = pd.read_csv('data/users.csv')

  books_raw = pd.read_csv('data/books.csv')


In [3]:
books_raw.info()
ratings_raw.info()
users_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271360 non-null  object
 1   Book-Title           271360 non-null  object
 2   Book-Author          271358 non-null  object
 3   Year-Of-Publication  271360 non-null  object
 4   Publisher            271358 non-null  object
 5   Image-URL-S          271360 non-null  object
 6   Image-URL-M          271360 non-null  object
 7   Image-URL-L          271357 non-null  object
dtypes: object(8)
memory usage: 16.6+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149780 entries, 0 to 1149779
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   User-ID      1149780 non-null  int64 
 1   ISBN         1149780 non-null  object
 2   Book-Rating  1149780 non-null  int64 
dtypes: int64(2), obje

In [4]:
books_cleaned = books_raw.drop(columns=['Image-URL-S', 'Image-URL-M', 'Image-URL-L', 'Year-Of-Publication']).dropna(axis=0).drop_duplicates()
ratings_cleaned = ratings_raw.dropna(axis=0).drop_duplicates()
users_cleaned = users_raw.dropna(axis=0).drop_duplicates()

In [5]:
book_count = len(books_cleaned)
book_count

271356

In [6]:
books_cleaned.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,HarperPerennial
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,Farrar Straus Giroux
4,393045218,The Mummies of Urumchi,E. J. W. Barber,W. W. Norton &amp; Company


In [7]:
book_ratings_count = ratings_cleaned.groupby('ISBN').size().reset_index(name='num_ratings')

book_ratings_df = book_ratings_count.merge(books_cleaned, on='ISBN', how='left').dropna()

book_ratings_df = book_ratings_df.sort_values('num_ratings', ascending=False).reset_index(drop=True)

book_ratings_df.head(10)

Unnamed: 0,ISBN,num_ratings,Book-Title,Book-Author,Publisher
0,0971880107,2502,Wild Animus,Rich Shapero,Too Far
1,0316666343,1295,The Lovely Bones: A Novel,Alice Sebold,"Little, Brown"
2,0385504209,883,The Da Vinci Code,Dan Brown,Doubleday
3,0060928336,732,Divine Secrets of the Ya-Ya Sisterhood: A Novel,Rebecca Wells,Perennial
4,0312195516,723,The Red Tent (Bestselling Backlist),Anita Diamant,Picador USA
5,044023722X,647,A Painted House,John Grisham,Dell Publishing Company
6,0142001740,615,The Secret Life of Bees,Sue Monk Kidd,Penguin Books
7,067976402X,614,Snow Falling on Cedars,David Guterson,Vintage Books USA
8,0671027360,586,Angels &amp; Demons,Dan Brown,Pocket Star
9,0446672211,585,Where the Heart Is (Oprah's Book Club (Paperba...,Billie Letts,Warner Books


In [8]:
print(book_ratings_df['num_ratings'].describe())

count    270147.000000
mean          3.816929
std          13.811788
min           1.000000
25%           1.000000
50%           1.000000
75%           3.000000
max        2502.000000
Name: num_ratings, dtype: float64


In [9]:
thresholds = [5, 10, 20, 50, 100, 200]

for n in thresholds:
    count = (book_ratings_df['num_ratings'] >= n).sum()
    percentage = (count / len(book_ratings_df)) * 100
    print(f"More than {n} ratings: {count} books ({percentage:.2f}%)")

More than 5 ratings: 40917 books (15.15%)
More than 10 ratings: 17479 books (6.47%)
More than 20 ratings: 7269 books (2.69%)
More than 50 ratings: 2161 books (0.80%)
More than 100 ratings: 727 books (0.27%)
More than 200 ratings: 194 books (0.07%)


In [10]:
user_ratings_df = ratings_cleaned.groupby('User-ID').size().reset_index(name='num_ratings')

user_ratings_df = user_ratings_df.sort_values('num_ratings', ascending=False).reset_index(drop=True)

user_ratings_df.head(10)

Unnamed: 0,User-ID,num_ratings
0,11676,13602
1,198711,7550
2,153662,6109
3,98391,5891
4,35859,5850
5,212898,4785
6,278418,4533
7,76352,3367
8,110973,3100
9,235105,3067


In [11]:
print(user_ratings_df['num_ratings'].describe())

count    105283.000000
mean         10.920851
std          90.562825
min           1.000000
25%           1.000000
50%           1.000000
75%           4.000000
max       13602.000000
Name: num_ratings, dtype: float64


In [12]:
thresholds = [5, 10, 20, 50, 100, 200]

for n in thresholds:
    count = (user_ratings_df['num_ratings'] >= n).sum()
    percentage = (count / len(user_ratings_df)) * 100
    print(f"More than {n} ratings: {count} users ({percentage:.2f}%)")

More than 5 ratings: 22816 users (21.67%)
More than 10 ratings: 13097 users (12.44%)
More than 20 ratings: 7369 users (7.00%)
More than 50 ratings: 3427 users (3.26%)
More than 100 ratings: 1847 users (1.75%)
More than 200 ratings: 905 users (0.86%)


In [13]:
min_num_book_ratings = 50
filtered_books = book_ratings_df[book_ratings_df['num_ratings'] >= min_num_book_ratings]
print(len(filtered_books))
print(filtered_books.head(5))

2161
         ISBN  num_ratings                                       Book-Title  \
0  0971880107         2502                                      Wild Animus   
1  0316666343         1295                        The Lovely Bones: A Novel   
2  0385504209          883                                The Da Vinci Code   
3  0060928336          732  Divine Secrets of the Ya-Ya Sisterhood: A Novel   
4  0312195516          723              The Red Tent (Bestselling Backlist)   

     Book-Author      Publisher  
0   Rich Shapero        Too Far  
1   Alice Sebold  Little, Brown  
2      Dan Brown      Doubleday  
3  Rebecca Wells      Perennial  
4  Anita Diamant    Picador USA  


In [14]:
min_num_user_ratings = 10
filtered_users = user_ratings_df[user_ratings_df['num_ratings'] >= min_num_user_ratings]
print(len(filtered_users))
print(filtered_users.head(5))

13097
   User-ID  num_ratings
0    11676        13602
1   198711         7550
2   153662         6109
3    98391         5891
4    35859         5850


In [15]:
ratings_filtered = ratings_cleaned[ratings_cleaned['ISBN'].isin(filtered_books['ISBN']) & ratings_cleaned['User-ID'].isin(filtered_users['User-ID'])].reset_index(drop=True)
ratings_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188063 entries, 0 to 188062
Data columns (total 3 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   User-ID      188063 non-null  int64 
 1   ISBN         188063 non-null  object
 2   Book-Rating  188063 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 4.3+ MB


In [16]:
isbn_lookup = filtered_books[['ISBN', 'Book-Title', 'Book-Author']].drop_duplicates().set_index('ISBN').T.to_dict()
len(isbn_lookup)

2161

In [17]:
print(f"{isbn_lookup['002542730X']}")

{'Book-Title': 'Politically Correct Bedtime Stories: Modern Tales for Our Life and Times', 'Book-Author': 'James Finn Garner'}


In [18]:
def search_books(title=None, author=None, df=filtered_books):
    mask = pd.Series([True] * len(df))
    
    if title:
        mask = mask & df['Book-Title'].str.contains(title, case=False, na=False)
    if author:
        mask = mask & df['Book-Author'].str.contains(author, case=False, na=False)
    
    results = df[mask]
    return results[['ISBN', 'Book-Title', 'Book-Author', 'num_ratings']].sort_values('num_ratings', ascending=False).reset_index(drop=True)

title = ""
# title = "lord of the rings"
# title = "feast"
author = ""
# author = "Tolkien"
author = "George R.R. Martin"
print(search_books(title=title, author=author).head(100))

         ISBN                                         Book-Title  \
0  0553573403  A Game of Thrones (A Song of Ice and Fire, Boo...   

          Book-Author  num_ratings  
0  George R.R. Martin           87  


In [19]:
pivot = ratings_filtered.pivot_table(index='ISBN', columns='User-ID', values='Book-Rating')
user_means = pivot.mean(axis=0)
    
pivot_centered = pivot.sub(user_means, axis=1).fillna(0)

pivot_centered

User-ID,99,242,243,254,383,384,388,408,424,446,...,278522,278535,278536,278554,278563,278582,278633,278771,278843,278851
ISBN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000649840X,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0007110928,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
002026478X,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0020442203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
002542730X,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3257229534,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3404148665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3423202327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3442541751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
similarity = cosine_similarity(pivot_centered)

In [21]:
def recommend(title=None, author=None, n=10):
    search_results = search_books(title=title, author=author)
    
    if search_results.empty:
        print("Book not found by title search.")
        return
    
    target_book = search_results.iloc[0]
    target_book_isbn = target_book['ISBN']
    target_book_title = target_book['Book-Title']
    target_book_author = target_book['Book-Author']
    target_num_ratings = target_book['num_ratings']

    if target_book_isbn not in pivot_centered.index:
        print(f"Book found but not in recommendation system: {target_book_title}")
        return

    print(f"Finding recommendations for: {target_book_title} by {target_book_author} with {target_num_ratings} ratings\n")
    
    
    target_pos = pivot_centered.index.get_loc(target_book_isbn)
    
    sim_scores = list(enumerate(similarity[target_pos]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:n+1]
    
    print(f"Top {n} recommendations:\n")
    for i, (pos, score) in enumerate(sim_scores, 1):
        rec_isbn = pivot_centered.index[pos]
        book_info = isbn_lookup[rec_isbn]
        rec_num_ratings = filtered_books[filtered_books['ISBN'] == rec_isbn]['num_ratings'].values[0]
        print(f"Recommendation {i}")
        print(f"Title: {book_info['Book-Title']}")
        print(f"Author: {book_info['Book-Author']}")
        print(f"ISBN: {rec_isbn}")
        print(f"Number of ratings: {rec_num_ratings}")
        print(f"Similarity Score: {score:.4f}\n")


In [22]:
# title = None
title = "lord of the rings"
# title = "Caribbean"
# title = "harry"
# title = "free"
author = None
# author = "George R.R. Martin"
recommend(title=title, author=author, n=10)

Finding recommendations for: The Hobbit : The Enchanting Prelude to The Lord of the Rings by J.R.R. TOLKIEN with 281 ratings

Top 10 recommendations:

Recommendation 1
Title: The Return of the King (The Lord of the Rings, Part 3)
Author: J.R.R. TOLKIEN
ISBN: 0345339738
Number of ratings: 173
Similarity Score: 0.1537

Recommendation 2
Title: The Two Towers (The Lord of the Rings, Part 2)
Author: J.R.R. TOLKIEN
ISBN: 0345339711
Number of ratings: 177
Similarity Score: 0.1078

Recommendation 3
Title: Chapterhouse Dune (Dune Chronicles, Book 6)
Author: Frank Herbert
ISBN: 0441102670
Number of ratings: 53
Similarity Score: 0.0982

Recommendation 4
Title: The Fellowship of the Ring (The Lord of the Rings, Part 1)
Author: J.R.R. TOLKIEN
ISBN: 0345339703
Number of ratings: 257
Similarity Score: 0.0981

Recommendation 5
Title: The Lord of the Rings (Movie Art Cover)
Author: J.R.R. Tolkien
ISBN: 0618129022
Number of ratings: 62
Similarity Score: 0.0950

Recommendation 6
Title: Where the Red Fern

# Updated version

#### search functions

In [23]:
def search_books_string(title=None, author=None, df=filtered_books):
    mask = pd.Series([True] * len(df))
    
    if title:
        mask = mask & df['Book-Title'].str.contains(title, case=False, na=False)
    if author:
        mask = mask & df['Book-Author'].str.contains(author, case=False, na=False)
    
    results = df[mask]
    return results[['ISBN', 'Book-Title', 'Book-Author', 'num_ratings']].sort_values('num_ratings', ascending=False).reset_index(drop=True)

In [24]:
def search_books_similarity(title=None, author=None, df=filtered_books, top_n=20):
    search_string = ""

    if title:
        search_string = title
    if author:
        search_string += " " + author if title else author
    
    df_copy = df.copy()
    df_copy['combined'] = df_copy['Book-Title'] + " " + df_copy['Book-Author']
    
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(df_copy['combined'])
    
    search_string_vec = vectorizer.transform([search_string])
    
    similarities = cosine_similarity(search_string_vec, tfidf_matrix).flatten()
    
    top_indices = similarities.argsort()[-top_n:][::-1]
    
    results = df_copy.iloc[top_indices].copy()
    results['similarity_score'] = similarities[top_indices]
    
    return results[['ISBN', 'Book-Title', 'Book-Author', 'num_ratings', 'similarity_score']].reset_index(drop=True)

#### similarity check functions

In [25]:
def is_similar_title(search_title, rec_title, threshold=None, verbose=False):
    search_lower = search_title.lower()
    rec_lower = rec_title.lower()
    return search_lower in rec_lower or rec_lower in search_lower

In [26]:
def is_similar_title_cosine(search_title, rec_title, threshold=0.5, verbose=False):
    vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
    
    try:
        tfidf_matrix = vectorizer.fit_transform([search_title, rec_title])
        
        similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
        
        is_similar = similarity_score >= threshold
        
        if verbose:
            print(f"Comparing '{search_title}' to '{rec_title}'")
            print(f"similarity: {similarity_score:.4f}, threshold: {threshold}, is_similar: {is_similar}")
        
        return is_similar
    
    except:
        return is_similar_title(search_title, rec_title)

In [27]:
is_similar_title_cosine("lord of the rings", "The Hobbit : The Enchanting Prelude to The Lord of the Rings", verbose=True)

Comparing 'lord of the rings' to 'The Hobbit : The Enchanting Prelude to The Lord of the Rings'
similarity: 0.5023, threshold: 0.5, is_similar: True


np.True_

In [28]:
is_similar_title_cosine("lord of the rings", "The Two Towers (The Lord of the Rings, Part 2)", verbose=True)

Comparing 'lord of the rings' to 'The Two Towers (The Lord of the Rings, Part 2)'
similarity: 0.7093, threshold: 0.5, is_similar: True


np.True_

In [29]:
is_similar_title_cosine("lord of the rings", "The Fellowship of the Ring (The Lord of the Rings, Part 1)", verbose=True)

Comparing 'lord of the rings' to 'The Fellowship of the Ring (The Lord of the Rings, Part 1)'
similarity: 0.5797, threshold: 0.5, is_similar: True


np.True_

In [30]:
def get_recommendations(target_isbn, n, exclude_similar_titles, search_title, similarity_function=None, similarity_threshold=0.5):
    target_pos = pivot_centered.index.get_loc(target_isbn)
    
    sim_scores = list(enumerate(similarity[target_pos]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
    
    if similarity_function is None:
        similarity_function = is_similar_title
    
    recommendations = []
    for pos, score in sim_scores:
        rec_isbn = pivot_centered.index[pos]
        book_info = isbn_lookup[rec_isbn]
        rec_title = book_info['Book-Title']
        rec_author = book_info['Book-Author']
        
        if exclude_similar_titles and search_title:
            if similarity_function(search_title, rec_title, threshold=similarity_threshold):
                continue
        
        rec_num_ratings = filtered_books[filtered_books['ISBN'] == rec_isbn]['num_ratings'].values[0]
        
        recommendations.append({
            'isbn': rec_isbn,
            'title': rec_title,
            'author': rec_author,
            'num_ratings': rec_num_ratings,
            'similarity_score': score
        })
        
        if len(recommendations) >= n:
            break
    
    return recommendations

In [31]:
def recommend(title=None, author=None, n=10, exclude_similar_titles=False, search_function=None, similarity_function=None, similarity_threshold=0.5):
    if search_function is None:
        search_function = search_books_string
    
    search_results = search_function(title=title, author=author, df=filtered_books)
    
    if search_results.empty:
        print("Book not found by title search.")
        return
    
    target_book = search_results.iloc[0]
    target_book_isbn = target_book['ISBN']
    target_book_title = target_book['Book-Title']
    target_book_author = target_book['Book-Author']
    target_num_ratings = target_book['num_ratings']

    if target_book_isbn not in pivot_centered.index:
        print(f"Book found but not in recommendation system: {target_book_title}")
        return

    print(f"Finding recommendations for: {target_book_title} by {target_book_author}")
    print(f"Number of ratings: {target_num_ratings}")
    if exclude_similar_titles:
        print("Excluding books with similar titles")
    
    if exclude_similar_titles:
        if similarity_function == is_similar_title_cosine:
            print(f"Using cosine similarity with threshold: {similarity_threshold}\n")
        else:
            print("Using substring matching\n")
    
    recommendations = get_recommendations(
        target_isbn=target_book_isbn,
        n=n,
        exclude_similar_titles=exclude_similar_titles,
        search_title=title,
        similarity_function=similarity_function,
        similarity_threshold=similarity_threshold
    )
    
    print(f"Top {len(recommendations)} recommendations:\n")
    for i, rec in enumerate(recommendations, 1):
        print(f"Recommendation {i}")
        print(f"Title: {rec['title']}")
        print(f"Author: {rec['author']}")
        print(f"ISBN: {rec['isbn']}")
        print(f"Number of ratings: {rec['num_ratings']}")
        print(f"Similarity Score: {rec['similarity_score']:.4f}\n")

In [32]:
title = None
author = None

In [33]:
title = 'lord of the rings'
author = None

In [34]:
title = 'lord of the rings'
author = 'tolkien'

In [35]:
n = 10

In [36]:
recommend(title=title, author=author, n=n, search_function=search_books_string)

Finding recommendations for: The Hobbit : The Enchanting Prelude to The Lord of the Rings by J.R.R. TOLKIEN
Number of ratings: 281
Top 10 recommendations:

Recommendation 1
Title: The Return of the King (The Lord of the Rings, Part 3)
Author: J.R.R. TOLKIEN
ISBN: 0345339738
Number of ratings: 173
Similarity Score: 0.1537

Recommendation 2
Title: The Two Towers (The Lord of the Rings, Part 2)
Author: J.R.R. TOLKIEN
ISBN: 0345339711
Number of ratings: 177
Similarity Score: 0.1078

Recommendation 3
Title: Chapterhouse Dune (Dune Chronicles, Book 6)
Author: Frank Herbert
ISBN: 0441102670
Number of ratings: 53
Similarity Score: 0.0982

Recommendation 4
Title: The Fellowship of the Ring (The Lord of the Rings, Part 1)
Author: J.R.R. TOLKIEN
ISBN: 0345339703
Number of ratings: 257
Similarity Score: 0.0981

Recommendation 5
Title: The Lord of the Rings (Movie Art Cover)
Author: J.R.R. Tolkien
ISBN: 0618129022
Number of ratings: 62
Similarity Score: 0.0950

Recommendation 6
Title: Where the Red

In [37]:
recommend(title=title, author=author, n=n, search_function=search_books_string, exclude_similar_titles=True, similarity_function=is_similar_title)

Finding recommendations for: The Hobbit : The Enchanting Prelude to The Lord of the Rings by J.R.R. TOLKIEN
Number of ratings: 281
Excluding books with similar titles
Using substring matching

Top 10 recommendations:

Recommendation 1
Title: Chapterhouse Dune (Dune Chronicles, Book 6)
Author: Frank Herbert
ISBN: 0441102670
Number of ratings: 53
Similarity Score: 0.0982

Recommendation 2
Title: Where the Red Fern Grows
Author: Wilson Rawls
ISBN: 0553274295
Number of ratings: 112
Similarity Score: 0.0946

Recommendation 3
Title: Caribbean
Author: James A. Michener
ISBN: 0449217493
Number of ratings: 51
Similarity Score: 0.0921

Recommendation 4
Title: Bridget Jones's Diary
Author: Helen Fielding
ISBN: 0670880728
Number of ratings: 123
Similarity Score: 0.0892

Recommendation 5
Title: The Cat Who Knew Shakespeare
Author: Lilian Jackson Braun
ISBN: 0515095826
Number of ratings: 71
Similarity Score: 0.0882

Recommendation 6
Title: Rich Dad, Poor Dad: What the Rich Teach Their Kids About Mon

In [38]:
recommend(title=title, author=author, n=n, search_function=search_books_string, exclude_similar_titles=True, similarity_function=is_similar_title_cosine, similarity_threshold=0.5)

Finding recommendations for: The Hobbit : The Enchanting Prelude to The Lord of the Rings by J.R.R. TOLKIEN
Number of ratings: 281
Excluding books with similar titles
Using cosine similarity with threshold: 0.5

Top 10 recommendations:

Recommendation 1
Title: Chapterhouse Dune (Dune Chronicles, Book 6)
Author: Frank Herbert
ISBN: 0441102670
Number of ratings: 53
Similarity Score: 0.0982

Recommendation 2
Title: Where the Red Fern Grows
Author: Wilson Rawls
ISBN: 0553274295
Number of ratings: 112
Similarity Score: 0.0946

Recommendation 3
Title: Caribbean
Author: James A. Michener
ISBN: 0449217493
Number of ratings: 51
Similarity Score: 0.0921

Recommendation 4
Title: Bridget Jones's Diary
Author: Helen Fielding
ISBN: 0670880728
Number of ratings: 123
Similarity Score: 0.0892

Recommendation 5
Title: The Cat Who Knew Shakespeare
Author: Lilian Jackson Braun
ISBN: 0515095826
Number of ratings: 71
Similarity Score: 0.0882

Recommendation 6
Title: Rich Dad, Poor Dad: What the Rich Teach T

In [39]:
recommend(title=title, author=author, n=n, search_function=search_books_similarity)

Finding recommendations for: The Two Towers (The Lord of the Rings, Part 2) by J.R.R. TOLKIEN
Number of ratings: 177
Top 10 recommendations:

Recommendation 1
Title: The Return of the King (The Lord of the Rings, Part 3)
Author: J.R.R. TOLKIEN
ISBN: 0345339738
Number of ratings: 173
Similarity Score: 0.3484

Recommendation 2
Title: The Fellowship of the Ring (The Lord of the Rings, Part 1)
Author: J.R.R. TOLKIEN
ISBN: 0345339703
Number of ratings: 257
Similarity Score: 0.2149

Recommendation 3
Title: Fatherhood
Author: Bill Cosby
ISBN: 0385234104
Number of ratings: 62
Similarity Score: 0.1462

Recommendation 4
Title: The Hobbit : The Enchanting Prelude to The Lord of the Rings
Author: J.R.R. TOLKIEN
ISBN: 0345339681
Number of ratings: 281
Similarity Score: 0.1078

Recommendation 5
Title: Whispers
Author: Dean R. Koontz
ISBN: 0425097609
Number of ratings: 60
Similarity Score: 0.0935

Recommendation 6
Title: Up Country
Author: Nelson DeMille
ISBN: 0446611913
Number of ratings: 67
Similar

In [40]:
recommend(title=title, author=author, n=n, search_function=search_books_similarity, exclude_similar_titles=True, similarity_function=is_similar_title_cosine, similarity_threshold=0.55)

Finding recommendations for: The Two Towers (The Lord of the Rings, Part 2) by J.R.R. TOLKIEN
Number of ratings: 177
Excluding books with similar titles
Using cosine similarity with threshold: 0.55

Top 10 recommendations:

Recommendation 1
Title: Fatherhood
Author: Bill Cosby
ISBN: 0385234104
Number of ratings: 62
Similarity Score: 0.1462

Recommendation 2
Title: The Hobbit : The Enchanting Prelude to The Lord of the Rings
Author: J.R.R. TOLKIEN
ISBN: 0345339681
Number of ratings: 281
Similarity Score: 0.1078

Recommendation 3
Title: Whispers
Author: Dean R. Koontz
ISBN: 0425097609
Number of ratings: 60
Similarity Score: 0.0935

Recommendation 4
Title: Up Country
Author: Nelson DeMille
ISBN: 0446611913
Number of ratings: 67
Similarity Score: 0.0916

Recommendation 5
Title: Lost in a Good Book: A Thursday Next Novel
Author: Jasper Fforde
ISBN: 0670031909
Number of ratings: 50
Similarity Score: 0.0889

Recommendation 6
Title: False Pretenses
Author: Catherine Coulter
ISBN: 0451199685
Nu

The sweet spot for threshold seems to be 0.55  
The last result makes the most sense