In [2]:
import pandas as pd

df = pd.read_csv('goodreads_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Book,Author,Description,Genres,Avg_Rating,Num_Ratings,URL
0,0,To Kill a Mockingbird,Harper Lee,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ...",4.27,5691311,https://www.goodreads.com/book/show/2657.To_Ki...
1,1,Harry Potter and the Philosopher’s Stone (Harr...,J.K. Rowling,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",4.47,9278135,https://www.goodreads.com/book/show/72193.Harr...
2,2,Pride and Prejudice,Jane Austen,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical...",4.28,3944155,https://www.goodreads.com/book/show/1885.Pride...
3,3,The Diary of a Young Girl,Anne Frank,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'History', 'Biograp...",4.18,3488438,https://www.goodreads.com/book/show/48855.The_...
4,4,Animal Farm,George Orwell,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',...",3.98,3575172,https://www.goodreads.com/book/show/170448.Ani...


In [3]:
df = df[['Book', 'Author', 'Description', 'Genres', 'Avg_Rating']]
df = df.fillna('')
df = df.drop_duplicates(subset=["Book"])
df.head()

Unnamed: 0,Book,Author,Description,Genres,Avg_Rating
0,To Kill a Mockingbird,Harper Lee,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ...",4.27
1,Harry Potter and the Philosopher’s Stone (Harr...,J.K. Rowling,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",4.47
2,Pride and Prejudice,Jane Austen,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical...",4.28
3,The Diary of a Young Girl,Anne Frank,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'History', 'Biograp...",4.18
4,Animal Farm,George Orwell,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',...",3.98


In [4]:
df['features'] = df['Author'] + ' ' + df['Genres']

In [5]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(stop_words='english', min_df=20)
word_matrix = vectorizer.fit_transform(df['features'])
word_matrix.shape

(9871, 357)

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

def get_recommendations(title, author, genres, count=10):
    new_book_df = pd.DataFrame({"Title": [title], "Author": [author], "Genres": [genres]})

    new_book_df['features'] = new_book_df['Author'] + ' ' + new_book_df['Genres']

    new_book_df_vector = vectorizer.transform(new_book_df['features'])
    
    sim_scores = cosine_similarity(new_book_df_vector, word_matrix).flatten()

    sim_scores = list(enumerate(sim_scores))

    sim_scores = [score for score in sim_scores if df.iloc[score[0]]["Book"] != title]

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_indices = [i[0] for i in sim_scores[:count]]

    recommendations_with_ratings = df.iloc[sim_indices][["Book", "Avg_Rating"]]

    recommendations_with_ratings = recommendations_with_ratings.sort_values(by="Avg_Rating", ascending=False)

    return recommendations_with_ratings["Book"].tolist()

In [11]:
get_recommendations('Yellowface', 'R.F. Kuang', "['Fiction', 'Contemporary', 'Thriller', 'Adult', 'Mystery']")

['If We Were Villains',
 'The Push',
 'You (You, #1)',
 'The Weight of Silence',
 'Miracle Creek',
 'Tell Me Everything',
 'False Step',
 'You Will Know Me',
 'The Dinner',
 'The Birthday Girl']