In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load and preprocess the dataset
df = pd.read_csv("Books.csv", engine="python")
df = df[["Book-Title", "Book-Author",]].dropna().reset_index(drop=True)
df = df[:5500]

df['Book-Author'] = df['Book-Author'].str.replace('[', '', regex=False).str.replace(']', '', regex=False).str.replace("'", "", regex=False).str.replace(',', ' ', regex=False)
df['Book-Author'] = df['Book-Author'].str.lower()




tfidf = TfidfVectorizer(stop_words='english')


tfidf_matrix = tfidf.fit_transform(df['Book-Author'])


similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

def recommend_by_genre(genre, num_recommendations=10):

    filtered_df = df[df['Book-Author'].str.contains(genre.lower(), case=False)]
    
    if filtered_df.empty:
        return f"No movies found for genre '{genre}'."
    

    recommended_books = []

    for idx in filtered_df.index:

        sim_scores = list(enumerate(similarity_matrix[idx]))
        

        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        

        sim_scores = sim_scores[1:num_recommendations + 1]  # Exclude the input movie itself
        

        movie_indices = [i[0] for i in sim_scores]
        top_movies = df['Book-Title'].iloc[movie_indices].tolist()
        recommended_books.extend(top_movies)
    

    recommended_books = list(set(recommended_books))
    
    return recommended_books[:num_recommendations]


recommended_books = recommend_by_genre("J. R. R. Tolkien", num_recommendations=5)
print("Recommended books:", recommended_books)

Recommended books: ['El Senor De Los Anillos: Las DOS Torres (Lord of the Rings (Paperback))', 'The Book of Lost Tales 1 (The History of Middle-Earth - Volume 1)', 'El Senor De Los Anillos: El Retorno Del Rey (Tolkien, J. R. R. Lord of the Rings. 3.)', 'The Book of Lost Tales 2 (The History of Middle-Earth - Volume 2)', 'The Two Towers (The Lord of the Rings, Part 2)']
