# Search and Recommendation System as Proof of Concept

The search function finds a book and its book_id.

The recommend_books function uses the book_id for generating recommendations.

## Search Function

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

titles = pd.read_json("books_titles.json")

#TF-IDF matrix for book titles
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(titles["modified_title"])

#search function
def search(query, vectorizer):
    processed = re.sub("[^a-zA-Z0-9 ]", "", query.lower())
    query_vec = vectorizer.transform([processed])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -10)[-10:]
    results = titles.iloc[indices]
    results = results.sort_values("ratings", ascending=False)
    return results

Generate a list of bookids from the book names: 

In [2]:
def generate_liked_books_from_names(book_names, vectorizer):
    """
    Generate a list of book IDs (as strings) for the given book names.

    Parameters:
    - book_names (list of str): List of book titles to search for.
    - vectorizer: TF-IDF vectorizer for matching book titles.

    Returns:
    - list: A list of matching book IDs as strings.
    """
    liked_books = []
    for book_name in book_names:
        results = search(book_name, vectorizer)
        if not results.empty:
            book_id = str(results.iloc[0]["book_id"])  # Ensure the book_id is a string
            liked_books.append(f"{book_id}")  # Wrap the string in double quotes
        else:
            print(f"No matches found for '{book_name}'")
    return liked_books

# Example Usage
book_names = ["Everything I never told you", "The Great Gatsby", "1984", "Brave New World", "The Catcher in the Rye"]

# Generate liked_books
liked_books = generate_liked_books_from_names(book_names, vectorizer)

# Display the result
print("Liked Books (book_ids):", liked_books)


Liked Books (book_ids): ['18693763', '71135', '518844', '3180338', '7933650']


## Recommendation Function

In [3]:
import pandas as pd

def recommend_books(liked_books, csv_book_mapping_path, interactions_path, books_titles_path):
    """
    Recommend books based on user interactions and a list of liked books.

    Parameters:
    - liked_books (list of str): List of book IDs the user likes.
    - csv_book_mapping_path (str): Path to the book ID mapping CSV file.
    - interactions_path (str): Path to the Goodreads interactions CSV file.
    - books_titles_path (str): Path to the books titles JSON file.

    Returns:
    - pd.DataFrame: Top recommended books with scores, links, and images.
    """
    #book ID mapping
    csv_book_mapping = {}
    with open(csv_book_mapping_path, "r") as f:
        for line in f:
            csv_id, book_id = line.strip().split(",")
            csv_book_mapping[csv_id] = book_id

    #overlap users who rated liked books highly
    overlap_users = set()
    with open(interactions_path, 'r') as f:
        for line in f:
            user_id, csv_id, _, rating, _ = line.split(",")
            try:
                rating = int(rating)
            except ValueError:
                continue

            book_id = csv_book_mapping.get(csv_id)
            if book_id in liked_books and rating >= 4:
                overlap_users.add(user_id)

    #recommendations based on overlap users
    rec_lines = []
    with open(interactions_path, 'r') as f:
        for line in f:
            user_id, csv_id, _, rating, _ = line.split(",")
            if user_id in overlap_users:
                book_id = csv_book_mapping.get(csv_id)
                rec_lines.append([user_id, book_id, int(rating)])

    #DataFrame for recommendations
    recs_df = pd.DataFrame(rec_lines, columns=["user_id", "book_id", "rating"])
    recs_df["book_id"] = recs_df["book_id"].astype(str)

    #calculate top recommendations
    top_recs = recs_df["book_id"].value_counts()
    books_titles = pd.read_json(books_titles_path)
    books_titles["book_id"] = books_titles["book_id"].astype(str)
    all_recs = top_recs.to_frame().reset_index()
    all_recs.columns = ["book_id", "book_count"]

    #book details
    all_recs = all_recs.merge(books_titles, how="inner", on="book_id")
    all_recs["score"] = all_recs["book_count"] * (all_recs["book_count"] / all_recs["ratings"])

    #recommendations
    popular_recs = all_recs[all_recs["book_count"] > 75].sort_values("score", ascending=False)

    #books already liked by the user
    popular_recs = popular_recs[~popular_recs["book_id"].isin(liked_books)]

    #links and images for better presentation
    def make_clickable(val):
        return f'<a target="_blank" href="{val}">Goodreads</a>'

    def show_image(val):
        return f'<img src="{val}" width=50></img>'

    return popular_recs.head(10).style.format({'url': make_clickable, 'cover_image': show_image})


## Integrated Search and Recomendation

Call the function: 

In [4]:
recommend_books(
    liked_books, 
    "data/book_id_map.csv", 
    "data/goodreads_interactions.csv", 
    "books_titles.json")

Unnamed: 0,book_id,book_count,title,ratings,url,cover_image,modified_title,score
4744,28170940,201,"Lethal White (Cormoran Strike, #4)",106,Goodreads,,lethal white cormoran strike 4,381.141509
8746,26856502,114,"Vengeful (Villains, #2)",35,Goodreads,,vengeful villains 2,371.314286
105,34273236,2682,Little Fires Everywhere,21135,Goodreads,,little fires everywhere,340.341803
6724,34927828,145,The Great Alone,70,Goodreads,,the great alone,300.357143
6235,24909347,156,"Obsidio (The Illuminae Files, #3)",82,Goodreads,,obsidio the illuminae files 3,296.780488
3173,24493732,294,Solutions and Other Problems,334,Goodreads,,solutions and other problems,258.790419
3756,34217599,249,Future Home of the Living God,263,Goodreads,,future home of the living god,235.745247
619,32920226,1038,"Sing, Unburied, Sing",4592,Goodreads,,sing unburied sing,234.635017
556,25810500,1114,What is Not Yours is Not Yours,5470,Goodreads,,what is not yours is not yours,226.873126
197,28815371,2039,The Mothers,22346,Goodreads,,the mothers,186.052135


This system shows a proof of concept of a bookworm recommender system. 