In [1]:
# hybrid_book_recommender.py

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from difflib import get_close_matches

# Load datasets
df = pd.read_csv("C:/Users/riyai/combined_books.csv")
ratings_df = pd.read_csv("C:/Users/riyai/OneDrive/Desktop/book_rec_system/ratings.csv")

In [2]:

# Preprocessing
df = df.drop_duplicates(subset='original_title', keep='first').reset_index(drop=True)
df['Genres'] = df.groupby('authors_ratings')['Genres'].transform(lambda x: x.fillna(x.mode().iloc[0]) if not x.mode().empty else x)
df['Description'] = df.apply(lambda row: f"{row['original_title']} by {row['authors_ratings']}" if pd.isna(row['Description']) or not row['Description'].strip() else row['Description'], axis=1)
df['Genres'].fillna('Unknown', inplace=True)
df['original_publication_year'] = df.groupby('authors_ratings')['original_publication_year'].transform(lambda x: x.fillna(round(x.mean())) if not x.dropna().empty else x)
df['original_publication_year'] = df['original_publication_year'].fillna(0)
df['Description'] = df['Description'].fillna('').astype(str)
df['Genres'] = df['Genres'].fillna('').astype(str)
df['combined_content'] = df['Description'] + " " + df['Genres']


In [3]:

# Ratings matrix for collaborative filtering
ratings_matrix = ratings_df.pivot_table(index='user_id', columns='book_id', values='rating')
ratings_matrix_filled = ratings_matrix.fillna(0)
book_id_to_idx = {book_id: idx for idx, book_id in enumerate(ratings_matrix.columns)}
collaborative_similarity_matrix = cosine_similarity(ratings_matrix_filled.T)


In [4]:

# Content-based filtering
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined_content'])
content_similarity_matrix = cosine_similarity(tfidf_matrix)


In [5]:

# Hybrid score function
def compute_hybrid_score(book_ids, base_idx, top_n=10):
    content_scores = content_similarity_matrix[base_idx, book_ids]
    collaborative_scores = collaborative_similarity_matrix[base_idx, book_ids] if base_idx < collaborative_similarity_matrix.shape[0] else np.zeros(len(book_ids))
    hybrid_scores = 0.3 * content_scores + 0.8 * collaborative_scores
    current_year = df['original_publication_year'].max()
    year_scores = 1 - ((current_year - df.iloc[book_ids]['original_publication_year']) / 100)
    hybrid_scores += 0.1 * year_scores.clip(lower=0).fillna(0).to_numpy()
    results = df.iloc[book_ids].copy()
    results['hybrid_score'] = hybrid_scores
    return results[['book_id', 'original_title', 'Genres', 'original_publication_year', 'hybrid_score']].sort_values(by='hybrid_score', ascending=False).head(top_n)


In [6]:

# Title-based recommendations with fuzzy fallback
def get_recommendations_by_title(title, top_n=10):
    if title.lower() not in df['original_title'].str.lower().values:
        close_titles = get_close_matches(title, df['original_title'].dropna().tolist(), n=3, cutoff=0.6)
        return f"Book not found. Did you mean: {', '.join(close_titles)}?"
    base_idx = df[df['original_title'].str.lower() == title.lower()].index[0]
    sim_scores = content_similarity_matrix[base_idx]
    top_ids = sim_scores.argsort()[-top_n-20:][::-1]
    return compute_hybrid_score(top_ids, base_idx, top_n)

# Genre-based recommendations
def get_recommendations_by_genre(genre, top_n=10):
    filtered_books = df[df['Genres'].str.contains(genre, case=False, na=False)]
    if filtered_books.empty:
        return pd.DataFrame(columns=['original_title', 'Genres', 'original_publication_year', 'hybrid_score'])
    base_idx = filtered_books.index[0]
    return compute_hybrid_score(filtered_books.index.tolist(), base_idx, top_n)

# Keyword-based recommendations
def get_recommendations_by_keywords(keywords, top_n=10):
    query = ' '.join(keywords) if isinstance(keywords, list) else keywords
    query_vec = tfidf.transform([query])
    sim_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    top_ids = sim_scores.argsort()[-top_n-20:][::-1]
    base_idx = top_ids[0]
    return compute_hybrid_score(top_ids, base_idx, top_n)

# Filter-based recommendations
def get_recommendations_by_filters(genre=None, year=None, mood_keywords=None, top_n=10):
    df_filtered = df.copy()
    if genre:
        df_filtered = df_filtered[df_filtered['Genres'].str.lower().str.contains(genre.lower(), na=False)]
    if year:
        df_filtered = df_filtered[df_filtered['original_publication_year'] >= float(year)]
    if mood_keywords:
        query = ' '.join(mood_keywords) if isinstance(mood_keywords, list) else mood_keywords
        query_vec = tfidf.transform([query])
        sim_scores = cosine_similarity(query_vec, tfidf.transform(df_filtered['combined_content'])).flatten()
        df_filtered['keyword_score'] = sim_scores
        df_filtered = df_filtered[df_filtered['keyword_score'] > 0]
    if df_filtered.empty:
        return pd.DataFrame(columns=['original_title', 'Genres', 'original_publication_year', 'hybrid_score'])
    base_idx = df_filtered.index[0]
    return compute_hybrid_score(df_filtered.index.tolist(), base_idx, top_n)

# Google Books API fallback (pseudo-interface)
def fallback_google_books_api(query):
    return {"title": query, "source": "Google Books API", "message": "No match found locally. Showing external results."}


In [8]:
get_recommendations_by_title("The Hunger Games")

Unnamed: 0,book_id,original_title,Genres,original_publication_year,hybrid_score
0,1,The Hunger Games,"['Young Adult', 'Fiction', 'Dystopia', 'Fantas...",2008.0,1.191
16,17,Catching Fire,"['Young Adult', 'Dystopia', 'Fiction', 'Fantas...",2009.0,0.780642
19,20,Mockingjay,"['Young Adult', 'Dystopia', 'Fiction', 'Fantas...",2010.0,0.742226
320,327,Legend,"['Young Adult', 'Dystopia', 'Fantasy', 'Scienc...",2011.0,0.278473
217,221,"A Child Called ""It"": One Child's Courage to Su...","['Nonfiction', 'Memoir', 'Biography', 'Psychol...",1995.0,0.23056
497,507,The Hunger Games Box Set,"['Young Adult', 'Fiction', 'Fantasy', 'Dystopi...",2010.0,0.206871
929,947,Angelfall,"['Fantasy', 'Young Adult', 'Angels', 'Paranorm...",2011.0,0.205277
778,794,Doctor Sleep,"['Horror', 'Fiction', 'Thriller', 'Fantasy', '...",2013.0,0.197736
616,628,Patriot Games,"['Fiction', 'Thriller', 'Espionage', 'Military...",1987.0,0.197277
926,944,Desperation,"['Horror', 'Fiction', 'Thriller', 'Fantasy', '...",1996.0,0.188324


In [9]:
get_recommendations_by_genre("Fantasy")

Unnamed: 0,book_id,original_title,Genres,original_publication_year,hybrid_score
0,1,The Hunger Games,"['Young Adult', 'Fiction', 'Dystopia', 'Fantas...",2008.0,1.191
16,17,Catching Fire,"['Young Adult', 'Dystopia', 'Fiction', 'Fantas...",2009.0,0.780642
19,20,Mockingjay,"['Young Adult', 'Dystopia', 'Fiction', 'Fantas...",2010.0,0.742226
1,2,Harry Potter and the Philosopher's Stone,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",1997.0,0.560239
2,3,Twilight,"['Fiction', 'Classics', 'Fantasy', 'Philosophy...",2005.0,0.540125
11,12,Divergent,"['Young Adult', 'Dystopia', 'Fantasy', 'Fictio...",2011.0,0.533389
20,21,Harry Potter and the Order of the Phoenix,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",2003.0,0.457892
24,25,Harry Potter and the Deathly Hallows,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",2007.0,0.451882
26,27,Harry Potter and the Half-Blood Prince,"['Fantasy', 'Young Adult', 'Fiction', 'Magic',...",2005.0,0.45003
17,18,Harry Potter and the Prisoner of Azkaban,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',...",1999.0,0.444633


In [18]:
get_recommendations_by_keywords("sarah j. maas")

Unnamed: 0,book_id,original_title,Genres,original_publication_year,hybrid_score
7839,8363,The Assassin and the Empire,"['Fantasy', 'Romance', 'Young Adult', 'New Adu...",2012.0,1.195
8117,8668,The Assassin and the Underworld,"['Fantasy', 'Romance', 'Young Adult', 'New Adu...",2012.0,0.358874
7183,7621,The Assassin and the Desert,"['Fantasy', 'Romance', 'Young Adult', 'New Adu...",2012.0,0.3556
6974,7391,The Assassin and the Pirate Lord,"['Fantasy', 'Romance', 'Young Adult', 'New Adu...",2012.0,0.337007
3878,4033,The Moon and More,"['Young Adult', 'Romance', 'Contemporary', 'Fi...",2013.0,0.168936
1629,1667,What Happened to Goodbye,"['Young Adult', 'Romance', 'Contemporary', 'Fi...",2011.0,0.168894
3248,3372,Saint Anything,"['Young Adult', 'Romance', 'Contemporary', 'Fi...",2015.0,0.168065
4899,5124,First Frost,"['Fiction', 'Magical Realism', 'Fantasy', 'Rom...",2015.0,0.163008
2387,2456,Keeping the Moon,"['Young Adult', 'Romance', 'Contemporary', 'Fi...",1999.0,0.160936
815,831,Lock and Key,"['Young Adult', 'Romance', 'Contemporary', 'Fi...",2008.0,0.160579


In [15]:
get_recommendations_by_filters(
    genre="Fantasy", 
    year=2010, 
    mood_keywords=["strong female lead"]
)

Unnamed: 0,book_id,original_title,Genres,original_publication_year,hybrid_score
186,188,A Dance with Dragons,"['Fantasy', 'Fiction', 'Epic Fantasy', 'Scienc...",2011.0,1.194
1094,1116,The Immortals of Meluha,"['Fiction', 'Fantasy', 'Mythology', 'Indian Li...",2010.0,0.28037
737,752,Crown of Midnight,"['Fantasy', 'Young Adult', 'Romance', 'Magic',...",2013.0,0.165004
4457,4644,Eona: The Last Dragoneye,"['Fantasy', 'Young Adult', 'Dragons', 'Romance...",2011.0,0.163337
6755,7148,Rebel of the Sands,"['Fantasy', 'Young Adult', 'Romance', 'Magic',...",2016.0,0.157663
635,648,Inheritance,"['Fantasy', 'Young Adult', 'Fiction', 'Dragons...",2011.0,0.14004
2323,2390,The Queen of the Tearling,"['Fantasy', 'Young Adult', 'Fiction', 'Dystopi...",2014.0,0.137036
5831,6118,Magic Breaks,"['Urban Fantasy', 'Fantasy', 'Paranormal', 'Ma...",2014.0,0.135744
1100,1122,Finale,"['Fantasy', 'Young Adult', 'Romance', 'Angels'...",2012.0,0.135681
2725,2814,Dreams of Gods & Monsters,"['Fantasy', 'Young Adult', 'Romance', 'Angels'...",2014.0,0.135626
