<a href="https://colab.research.google.com/github/ShivEla/LibriQuery/blob/main/Unified_RAG_Book_Q%26A_System_with_Chat_History.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import json
import os
import time # For adding a small delay between API calls

# --- Configuration ---
OPEN_LIBRARY_API_URL_SEARCH = "https://openlibrary.org/search.json"
OPEN_LIBRARY_API_URL_WORKS = "https://openlibrary.org/works/" # For detailed work data
OPEN_LIBRARY_API_URL_AUTHORS = "https://openlibrary.org/authors/" # For author data
OPEN_LIBRARY_API_URL_BOOKS_BY_BIBKEYS = "https://openlibrary.org/api/books" # For ISBN lookup
OPEN_LIBRARY_API_URL_COVERS = "https://covers.openlibrary.org/b/" # For cover images

GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"

GEMINI_API_KEY = "YOUR Key" # <--- REPLACE THIS WITH YOUR ACTUAL API KEY FOR LOCAL USE IF NOT USING SECRETS


In [None]:
def make_api_request(url: str, params: dict = None, headers: dict = None) -> dict:
    """
    Helper function to make HTTP GET requests and handle common errors.
    """
    if headers is None:
        headers = {}
    # headers['User-Agent'] = USER_AGENT # Removed User-Agent as requested

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error making API request to {url}: {e}")
        return {}


In [None]:
def fetch_open_library_data_for_context(query: str, limit: int = 5) -> list:
    """
    Fetches initial book data from the Open Library Search API based on a query.
    This is the first step to get potential books for context.
    """
    print(f"  Searching Open Library for '{query}' as initial context...")
    params = {"q": query, "limit": limit}
    data = make_api_request(OPEN_LIBRARY_API_URL_SEARCH, params=params)
    return data.get("docs", [])

In [5]:
def fetch_detailed_work_data(work_olid: str) -> dict:
    """
    Fetches more detailed data for a specific book work using its OLID.
    This uses the /works/{OLID}.json API.
    """
    url = f"{OPEN_LIBRARY_API_URL_WORKS}{work_olid}.json"
    data = make_api_request(url)

    detailed_info = {
        'description': 'No detailed description available.',
        'subjects': [],
        'authors_ol_ids': [] # To store author OLIDs
    }

    description = data.get('description')
    if isinstance(description, dict) and 'value' in description:
        detailed_info['description'] = description['value']
    elif isinstance(description, str):
        detailed_info['description'] = description

    subjects = data.get('subjects')
    if subjects and isinstance(subjects, list):
        detailed_info['subjects'] = subjects

    authors = data.get('authors')
    if authors and isinstance(authors, list):
        for author_entry in authors:
            if 'author' in author_entry and 'key' in author_entry['author']:
                author_olid = author_entry['author']['key'].split('/')[-1]
                detailed_info['authors_ol_ids'].append(author_olid)

    return detailed_info

In [6]:
def fetch_author_details(author_olid: str) -> dict:
    """
    Fetches detailed data for a specific author using their OLID.
    This uses the /authors/{OLID}.json API.
    """
    url = f"{OPEN_LIBRARY_API_URL_AUTHORS}{author_olid}.json"
    data = make_api_request(url)

    author_info = {
        'name': data.get('name', 'N/A'),
        'bio': 'No biography available.',
        'birth_date': data.get('birth_date', 'N/A'),
        'death_date': data.get('death_date', 'N/A'),
        'author_subjects': data.get('subjects', []) # Subjects associated with the author
    }

    bio = data.get('bio')
    if isinstance(bio, dict) and 'value' in bio:
        author_info['bio'] = bio['value']
    elif isinstance(bio, str):
        author_info['bio'] = bio

    return author_info

In [7]:
def fetch_book_by_isbn(isbn: str) -> dict:
    """
    Fetches book details using its ISBN.
    This uses the /api/books?bibkeys=ISBN:{ISBN} API.
    """
    params = {"bibkeys": f"ISBN:{isbn}", "format": "json", "jscmd": "data"}
    data = make_api_request(OPEN_LIBRARY_API_URL_BOOKS_BY_BIBKEYS, params=params)

    if data and f"ISBN:{isbn}" in data:
        book_data = data[f"ISBN:{isbn}"]
        return {
            'isbn_title': book_data.get('title', 'N/A'),
            'publish_date': book_data.get('publish_date', 'N/A'),
            'number_of_pages': book_data.get('number_of_pages', 'N/A'),
            'publishers': [p.get('name') for p in book_data.get('publishers', []) if p.get('name')],
            'isbn_authors': [a.get('name') for a in book_data.get('authors', []) if a.get('name')]
        }
    return {}


In [8]:
def get_cover_url(olid: str = None, isbn: str = None, size: str = 'M') -> str:
    """
    Constructs a cover image URL.
    Size can be 'S' (small), 'M' (medium), 'L' (large).
    Prioritizes OLID if both are provided.
    """
    if olid:
        return f"{OPEN_LIBRARY_API_URL_COVERS}olid/{olid}-{size}.jpg"
    elif isbn:
        return f"{OPEN_LIBRARY_API_URL_COVERS}isbn/{isbn}-{size}.jpg"
    return "No cover available."


In [9]:
def call_gemini_api(prompt: str, chat_history: list = None, response_schema: dict = None) -> dict:
    """
    Generic function to call the Gemini API with a given prompt, chat history, and an optional response schema.
    """
    # Initialize contents with chat history if provided, otherwise start with just the new prompt
    contents = []
    if chat_history:
        contents.extend(chat_history)
    contents.append({"role": "user", "parts": [{"text": prompt}]})

    payload = {
        "contents": contents,
        "generationConfig": {} # Initialize generationConfig
    }

    if response_schema:
        payload["generationConfig"]["responseMimeType"] = "application/json"
        payload["generationConfig"]["responseSchema"] = response_schema
    else:
        # For plain text, ensure no responseMimeType is set for JSON
        pass # Default is text/plain

    headers = {'Content-Type': 'application/json'}
    try:
        # Ensure GEMINI_API_KEY is not empty before making the call
        if not GEMINI_API_KEY or GEMINI_API_KEY == "YOUR_GEMINI_API_KEY":
            raise ValueError("GEMINI_API_KEY is not set. Please provide your API key.")

        response = requests.post(GEMINI_API_URL, headers=headers, data=json.dumps(payload), params={"key": GEMINI_API_KEY})
        response.raise_for_status()
        result = response.json()

        if result.get("candidates") and len(result["candidates"]) > 0 and \
           result["candidates"][0].get("content") and \
           result["candidates"][0]["content"].get("parts") and \
           len(result["candidates"][0]["content"]["parts"]) > 0:
            if response_schema:
                json_string = result["candidates"][0]["content"]["parts"][0]["text"]
                return json.loads(json_string)
            else:
                # For plain text responses
                return {"text": result["candidates"][0]["content"]["parts"][0]["text"]}
        else:
            print(f"Warning: LLM response was empty or malformed.")
            return {}
    except requests.exceptions.RequestException as e:
        print(f"Error calling Gemini API: {e}")
        return {}
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from LLM response: {e}")
        # print(f"Raw LLM response: {response.text}") # Uncomment for debugging raw response
        return {}
    except ValueError as e:
        print(f"Configuration Error: {e}")
        return {}


In [10]:
def extract_search_terms_from_question(question: str, chat_history: list = None) -> str:
    """
    Uses LLM to extract relevant search terms from a user's question, considering chat history.
    This helps in dynamically searching Open Library for context.
    Includes a basic fallback if LLM extraction fails.
    """
    history_str = ""
    if chat_history:
        for msg in chat_history:
            history_str += f"{msg['role'].capitalize()}: {msg['parts'][0]['text']}\n"

    prompt = f"""Given the following conversation history and the new user question, identify the primary book title, author name, or series name that would be most effective for searching Open Library. If the question is about a general topic or genre, extract that.
    Return only the most relevant single keyword or a short phrase for Open Library search. Do NOT include words like 'plot', 'storyline', 'summary', 'rating', 'review', 'what is the', 'tell me about' in the extracted terms.
    If no specific book/author/series is mentioned, return a relevant genre or 'book'.

    Chat History:
    {history_str}

    User Question: "{question}"

    Search Term:"""

    response = call_gemini_api(prompt, chat_history=chat_history) # Pass history to LLM call
    extracted_term = response.get('text', '').strip()

    # Basic cleanup for common phrases that LLM might still include
    extracted_term_lower = extracted_term.lower()
    for phrase in ["plot of", "storyline of", "summary of", "rating of", "review of", "what is the", "tell me about"]:
        if extracted_term_lower.startswith(phrase):
            extracted_term = extracted_term_lower.replace(phrase, "", 1).strip()

    if not extracted_term:
        question_lower = question.lower()
        if "plot of" in question_lower:
            return question_lower.split("plot of", 1)[1].strip().replace("?", "").replace(".", "")
        elif "storyline of" in question_lower:
            return question_lower.split("storyline of", 1)[1].strip().replace("?", "").replace(".", "")
        return "book"

    return extracted_term


In [2]:




def answer_question_about_books(user_question: str, books_context: list, chat_history: list = None) -> str:
    """
    Uses an LLM to answer a user's question based on the provided book summaries, genres, and author details,
    considering chat history.

    Args:
        user_question (str): The question asked by the user.
        books_context (list): A list of dictionaries, each containing enriched book data.
        chat_history (list): The history of the conversation.

    Returns:
        str: The answer generated by the LLM.
    """
    if not books_context:
        return "I couldn't find any relevant book information to answer your question. Please try a different query."

    context_text = []
    for book in books_context:
        title = book.get('title', 'N/A')
        author = ", ".join(book.get('author_name', ['N/A']))
        summary = book.get('description', book.get('first_sentence', ['No description available.'])[0])
        genres = ", ".join(book.get('subjects', book.get('subject', ['N/A'])))
        publish_date = book.get('publish_date', 'N/A')
        num_pages = book.get('number_of_pages', 'N/A')
        publishers = ", ".join(book.get('publishers', ['N/A']))
        cover_url = book.get('cover_url', 'N/A')

        author_bio = book.get('author_bio', 'No author biography available.')
        author_birth_date = book.get('author_birth_date', 'N/A')
        author_death_date = book.get('author_death_date', 'N/A')

        context_text.append(f"Book Title: \"{title}\"\nAuthor: \"{author}\"\nAuthor Bio: \"{author_bio}\"\nAuthor Birth Date: {author_birth_date}\nAuthor Death Date: {author_death_date}\nSummary: \"{summary}\"\nGenres: \"{genres}\"\nPublish Date: {publish_date}\nNumber of Pages: {num_pages}\nPublishers: {publishers}\nCover URL: {cover_url}\n---")

    full_context = "\n\n".join(context_text)

    prompt = f"""Based on the following detailed book and author information, and the conversation history, answer the user's question concisely. Use all provided details including summaries, genres, author bios, publish dates, pages, and publishers. If the exact information (e.g., specific plot details not in summary, or precise numerical ratings if not provided in the context) is not present in the provided context, state that you cannot answer based on the given books.

Book and Author Information:
{full_context}

User Question: "{user_question}"

Answer:"""

    response = call_gemini_api(prompt, chat_history=chat_history) # Pass history to LLM call
    return response.get('text', 'Could not generate an answer.')





                        LIBRIQUERY                        

Ask any question about books, and I'll try to answer based on Open Library data.

Type 'exit' to quit.


KeyboardInterrupt: Interrupted by user

In [None]:
def main():
    print("                        LIBRIQUERY                        ")
    print("\nAsk any question about books, and I'll try to answer based on Open Library data.")
    print("\nType 'exit' to quit.")

    # Initialize chat history
    chat_history = []

    while True:
        user_question = input("\nYour Question (or 'exit' to quit): ")
        if user_question.lower() == 'exit':
            print("Exiting Q&A session. Goodbye!")
            break
        if not user_question.strip():
            print("Please enter a question.")
            continue

        # Add user's question to chat history
        chat_history.append({"role": "user", "parts": [{"text": user_question}]})

        # Step 1: Extract search terms from the user's question using LLM, considering chat history
        search_terms = extract_search_terms_from_question(user_question, chat_history=chat_history)
        print(f"Identified search terms: '{search_terms}'")

        # Fetch initial book data from Open Library based on extracted terms
        initial_search_results = fetch_open_library_data_for_context(search_terms)

        if not initial_search_results:
            print("No books/authors found matching your query for context. Please try a different question.")
            # If no new context, still try to answer from previous context if available
            answer = answer_question_about_books(user_question, [], chat_history=chat_history)
            print(f"\nAnswer: {answer}")
            chat_history.append({"role": "model", "parts": [{"text": answer}]})
            continue

        # Enrich book data with more details from various Open Library APIs
        print("Enriching book data with more details from multiple Open Library APIs...")
        books_for_qa_context = []
        for i, book in enumerate(initial_search_results):
            #print(f"    Enriching book {i+1}/{len(initial_search_results)}: {book.get('title', 'N/A')}")

            # Fetch detailed work data
            work_olid = book.get('key', '').replace('/works/', '')
            if work_olid:
                detailed_work_data = fetch_detailed_work_data(work_olid)
                book.update(detailed_work_data)
                time.sleep(0.1) # Small delay

            # Fetch author details if OLID is available from work data
            author_ol_ids = book.get('authors_ol_ids', [])
            if author_ol_ids:
                # For simplicity, just fetch details for the first author found
                first_author_olid = author_ol_ids[0]
                author_details = fetch_author_details(first_author_olid)
                book['author_bio'] = author_details.get('bio')
                book['author_birth_date'] = author_details.get('birth_date')
                book['author_death_date'] = author_details.get('death_date')
                book['author_subjects'] = author_details.get('author_subjects') # Add author's subjects
                time.sleep(0.1) # Small delay

            # Fetch ISBN-specific data if ISBNs are available
            isbns = book.get('isbn', [])
            if isbns:
                # Use the first ISBN found for detailed lookup
                first_isbn = isbns[0]
                isbn_data = fetch_book_by_isbn(first_isbn)
                book.update(isbn_data) # Merge ISBN-specific data
                time.sleep(0.1) # Small delay

            # Construct cover URL
            book['cover_url'] = get_cover_url(olid=book.get('cover_edition_key', '').replace('/books/', '') or book.get('olid'), isbn=isbns[0] if isbns else None)

            books_for_qa_context.append(book)

        # Use the enriched book data to answer the user's question
        print("Generating answer based on retrieved and enriched book information...")
        answer = answer_question_about_books(user_question, books_for_qa_context, chat_history=chat_history)
        print(f"\nAnswer: {answer}")

        # Add AI's answer to chat history
        chat_history.append({"role": "model", "parts": [{"text": answer}]})

if __name__ == "__main__":
    main()