In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load your dataset
df = pd.read_csv('./Dataset/Books_merged.csv')

In [4]:
# Fill missing author values with a placeholder
df['Author'] = df['Author'].fillna('Unknown Author')

In [5]:
df1 = df.copy()

In [6]:
# Encode the author's name
label_encoder = LabelEncoder()
df1['Author_encoded'] = label_encoder.fit_transform(df1['Author'].astype(str))

In [8]:
# Reshape author_encoded to be a 2D array (required for cosine_similarity)
author_encoded_reshaped = df1['Author_encoded'].values.reshape(-1, 1)

In [9]:
# Compute cosine similarity between all books based on the encoded authors
cosine_sim = cosine_similarity(author_encoded_reshaped, author_encoded_reshaped)

In [10]:
# Function to get book recommendations based on the author
def get_author_recommendations(title, cosine_sim=cosine_sim):
    # Check if the title exists in the dataset
    if title not in df['Title'].values:
        print(f"The book titled '{title}' was not found in the dataset.")
        return pd.DataFrame()

    # Get the index of the book that matches the title
    idx = df1[df1['Title'] == title].index[0]

    # Get the pairwise similarity scores of all books with that book
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the books based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 10 most similar books
    sim_indices = [i[0] for i in sim_scores[1:6]]

    # Return the top 10 most similar books
    return df1.iloc[sim_indices]

In [12]:
# Example: Get recommendations for a specific book title
book_title = 'Data Smart'
recommendations = get_author_recommendations(book_title)

# Display the recommended books if found
if not recommendations.empty:
    print(recommendations[['Title', 'Author', 'GenreCombined', 'Publisher', 'Height']])

                                        Title            Author  \
1                                  Data Smart     Foreman, John   
2                    God Created the Integers  Hawking, Stephen   
3                           Superfreakonomics   Dubner, Stephen   
4                                 Orientalism      Said, Edward   
5  Nature of Statistical Learning Theory, The  Vapnik, Vladimir   

        GenreCombined      Publisher  Height  
1   tech,data_science          Wiley     235  
2    tech,mathematics        Penguin     197  
3   science,economics  HarperCollins     179  
4  nonfiction,history        Penguin     197  
5   tech,data_science       Springer     230  


In [18]:
df1[df1['Author'].str.contains('Foreman', case=False)]


Unnamed: 0.1,Unnamed: 0,Title,Author,Height,Publisher,GenreCombined,Author_encoded
1,1,Data Smart,"Foreman, John",235,Wiley,"tech,data_science",40
212,1,Data Smart,"Foreman, John",235,Wiley,"data_science,objectivism",40
