In [1]:
import pandas as pd

In [2]:
# Load the dataset

df = pd.read_csv("../../data/final/books_translated.csv")
df = df.drop(columns=['Unnamed: 0'], errors='ignore')
df

Unnamed: 0,title,series,author,rating,description,language,genres,bookFormat,pages,publisher,publishDate,firstPublishDate,awards,coverImg,series_number,desc_length
0,The Hunger Games,The Hunger Games #1,Suzanne Collins,4.33,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,en,"['adventure', 'dystopia', 'fantasy']",Hardcover,374,Scholastic Press,2008-09-14 00:00:00,False,True,https://i.gr-assets.com/images/S/compressed.ph...,1.0,147
1,Harry Potter and the Order of the Phoenix,Harry Potter #5,"J.K. Rowling, Mary Grandpré (Illustrator)",4.50,There is a door at the end of a silent corrido...,en,"['adventure', 'childrens', 'classics']",Paperback,870,Scholastic Inc.,2004-09-28 00:00:00,True,True,https://i.gr-assets.com/images/S/compressed.ph...,5.0,136
2,To Kill a Mockingbird,To Kill a Mockingbird,Harper Lee,4.28,The unforgettable novel of a childhood in a sl...,en,"['classics', 'fiction', 'historical']",Paperback,324,Harper Perennial Modern Classics,2006-05-23 00:00:00,True,True,https://i.gr-assets.com/images/S/compressed.ph...,,137
3,Pride and Prejudice,Standalone Novel,"Jane Austen, Anna Quindlen (Introduction)",4.26,Alternate cover edition of ISBN 9780679783268S...,en,"['classics', 'fiction', 'historical']",Paperback,279,Modern Library,2000-10-10 00:00:00,True,False,https://i.gr-assets.com/images/S/compressed.ph...,,100
4,Twilight,The Twilight Saga #1,Stephenie Meyer,3.60,About three things I was absolutely positive.\...,en,"['fantasy', 'fiction', 'paranormal']",Paperback,501,"Little, Brown and Company",2006-09-06 00:00:00,True,True,https://i.gr-assets.com/images/S/compressed.ph...,1.0,55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57388,Beasts & Behemoths (Dungeons & Dragons),Standalone Novel,"Jim Zub, Stacy King, Andrew Wheeler, Official ...",3.60,Study this guide and keep it close at hand--th...,en,['fiction'],Paperback,114,Ten Speed Press,2020-10-20 00:00:00,True,False,http://books.google.com/books/content?id=1toui...,,170
57389,Faculty of Dragon Riders,Standalone Novel,Dmitry Nazarov,3.60,I tamed the Black Dragon!So I thought until I ...,en,['fiction'],Paperback,401,Litres,2022-08-24 00:00:00,True,False,http://books.google.com/books/content?id=QSGFE...,,52
57390,Midnight Delivery Sex,Standalone Novel,Neneko Narazaki,4.00,With SNS card for collecting in the first edit...,de,['comics'],Paperback,29,Hayabusa,2021-05-04 00:00:00,True,False,http://books.google.com/books/content?id=s_8_E...,,78
57391,Monster Girl: 2,Standalone Novel,Kazuki Funatsu,3.50,"Dopo il loro incontro, Yatsuki si ritrova ad a...",it,"['comics', 'isekai']",Paperback,216,Edizioni BD,2020-05-01 00:00:00,True,False,http://books.google.com/books/content?id=_yjnE...,,48


In [3]:
# Ensure the necessary columns are properly formatted

df['genres'] = df['genres'].apply(eval if isinstance(df['genres'].iloc[0], str) else lambda x: x)
df['author'] = df['author'].apply(lambda x: [a.strip() for a in x.split(',')])

In [4]:
# Model configuration

def multi_filter_recommendation(
    df,
    liked_genres=None,
    exclude_authors=None,
    selected_language=None,
    min_rating=0.0,
    top_n=10
):
    filtered_df = df.copy()

    # Filter by liked genres
    if liked_genres:
        filtered_df = filtered_df[
            filtered_df['genres'].apply(lambda genres: any(g in genres for g in liked_genres))
        ]

    # Filter by authors and exclude selected authors
    if exclude_authors:
        def no_excluded_authors(authors):
            return all(a.lower().strip() not in [e.lower().strip() for e in exclude_authors] for a in authors)

        filtered_df = filtered_df[
            filtered_df['author'].apply(no_excluded_authors)
        ]

    # Filter by language
    if selected_language:
        filtered_df = filtered_df[filtered_df['language'] == selected_language]

    # Filter by rating
    if min_rating > 0:
        filtered_df = filtered_df[filtered_df['rating'] >= min_rating]

    # Sort by rating and remove duplicates
    filtered_df = filtered_df.sort_values(by='rating', ascending=False)
    filtered_df = filtered_df.drop_duplicates(subset='title')

    # Selection of cleaned columns and limit to top_n results
    return filtered_df[['title', 'author', 'genres', 'rating', 'language']].head(top_n).reset_index(drop=True)



In [5]:
# Example usage of the recommendation function

liked_genres = ['fantasy', 'adventure']
exclude_authors = ['J.K. Rowling', 'Suzanne Collins']
language = 'en'
min_rating = 4.0

recommendations = multi_filter_recommendation(
    df,
    liked_genres=liked_genres,
    exclude_authors=exclude_authors,
    selected_language=language,
    min_rating=min_rating,
    top_n=10
)

pd.set_option('display.max_colwidth', None)  
print(recommendations.to_string(index=False))

                                                 title                                          author                          genres  rating language
                                     Maze of Existence           [Tina M. Randolph (Goodreads Author)]                       [fantasy]    5.00       en
                          Origins of The Wheel of Time                            [Michael Livingston]              [fantasy, fiction]    5.00       en
                       Bertie's Book of Spooky Wonders                                [Ocelot Emerson]                       [fantasy]    5.00       en
                              Orion: The Fight for Vox         [Ruth Watson-Morris (Goodreads Author)]                       [fantasy]    4.93       en
          Elfquest: The Original Quest Gallery Edition      [Wendy Pini (Illustrations), Richard Pini]               [comics, fantasy]    4.90       en
                                  Jellybean the Dragon               [Elias Zapple (Good