# A full business solution


### BUSINESS CHALLENGE: Book recommender

Build a product that recommends books to users by selecting the best options from Amazon.com.

The system will take user input (e.g., favorite genres, authors, or topics) and return top-rated and trending books directly sourced from Amazon.

This project demonstrates a real-world application of recommender systems in e-commerce and digital publishing.

In [1]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
book_system_prompt = "You are provided with a list of books retrieved from Amazon.com. \
You are able to decide which of the books would be most relevant to recommend to a user, \
such as top-rated books, trending titles, or books that match the user’s preferred genres, authors, or topics.\n"
book_system_prompt += "You should respond in JSON as in this example:"
book_system_prompt += """
{
    "books": [
        {"title": "Atomic Habits", "author": "James Clear", "url": "https://amazon.com/dp/0735211299", "reason": "Top-rated self-help book"},
        {"title": "Project Hail Mary", "author": "Andy Weir", "url": "https://amazon.com/dp/0593135202", "reason": "Popular sci-fi novel with high reviews"}
    ]
}
"""


In [5]:
print(book_system_prompt)

You are provided with a list of books retrieved from Amazon.com. You are able to decide which of the books would be most relevant to recommend to a user, such as top-rated books, trending titles, or books that match the user’s preferred genres, authors, or topics.
You should respond in JSON as in this example:
{
    "books": [
        {"title": "Atomic Habits", "author": "James Clear", "url": "https://amazon.com/dp/0735211299", "reason": "Top-rated self-help book"},
        {"title": "Project Hail Mary", "author": "Andy Weir", "url": "https://amazon.com/dp/0593135202", "reason": "Popular sci-fi novel with high reviews"}
    ]
}



In [6]:
import asyncio
from playwright.async_api import async_playwright

async def scrape_amazon_books(query="science fiction", max_books=5):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        # 🚀 Bloquear imágenes, CSS y fuentes para que cargue más rápido
        await page.route("**/*", lambda route: route.abort() 
                         if route.request.resource_type in ["image", "stylesheet", "font"] 
                         else route.continue_())

        url = f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=stripbooks"
        await page.goto(url)
        await page.wait_for_selector("div.s-main-slot div[data-asin]", timeout=8000)

        items = await page.query_selector_all("div.s-main-slot div[data-asin]")
        books = []

        for item in items[:max_books]:
            asin = await item.get_attribute("data-asin")
            if not asin:
                continue

            # --- Title (varios intentos) ---
            title = "Unknown"
            for selector in [
                "h2 > a > span",
                "span.a-size-medium.a-color-base.a-text-normal",
                ".a-link-normal.a-text-normal"
            ]:
                el = await item.query_selector(selector)
                if el:
                    text = (await el.inner_text()).strip()
                    if text and len(text) > 2 and not text.lower().startswith("visit the"):
                        title = text
                        break

            # --- Author (varios intentos) ---
            author = "Unknown Author"
            author_candidates = await item.query_selector_all(
                ".a-row.a-size-base.a-color-secondary .a-size-base, "
                ".a-row.a-size-base.a-color-secondary .a-link-normal, "
                ".a-color-secondary .a-size-base"
            )
            for candidate in author_candidates:
                text = (await candidate.inner_text()).strip()
                if not text:
                    continue
                # filtros para limpiar basura
                if text.lower() in ["by", "|", "audible audiobook", "paperback", "hardcover", "kindle edition"]:
                    continue
                if len(text) > 2:
                    author = text
                    break

            # --- Rating ---
            rating_el = await item.query_selector(".a-icon-alt")
            rating = (await rating_el.inner_text()).strip() if rating_el else None

            # --- Guardar ---
            if title != "Unknown":  # guardamos solo libros válidos
                books.append({
                    "title": title,
                    "author": author,
                    "url": f"https://www.amazon.com/dp/{asin}",
                    "rating": rating
                })

        await browser.close()
        return books


In [7]:
books = await scrape_amazon_books("science fiction", max_books=5)

for b in books:
    print(b)


{'title': 'Project Hail Mary', 'author': 'Andy Weir', 'url': 'https://www.amazon.com/dp/B08GB58KD5', 'rating': '4.7 out of 5 stars'}
{'title': 'The Science Fiction Hall of Fame, Vol. 1, 1929-1964: The Greatest Science Fiction Stories of All Time Chosen by the Members of the Science Fiction Writers of America', 'author': 'Book 1 of 1: SF Hall of Fame', 'url': 'https://www.amazon.com/dp/B077YMK615', 'rating': '4.6 out of 5 stars'}
{'title': 'Red Rising', 'author': 'Book 1 of 6: Red Rising', 'url': 'https://www.amazon.com/dp/B00I3PUCIY', 'rating': '4.6 out of 5 stars'}
{'title': 'Masterpieces: The Best Science Fiction of the 20th Century', 'author': 'Orson Scott Card', 'url': 'https://www.amazon.com/dp/0441011330', 'rating': '4.3 out of 5 stars'}


In [8]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI

# 🔑 Config
load_dotenv(override=True)
openai = OpenAI()
MODEL = "gpt-4o-mini"

# 📌 Paso 1: Filtrar libros
def step_1_filter(books, preferences):
    prompt = {
        "role": "system",
        "content": f"""
        You are given a list of books: {books}.
        Task: Select only the books that match the user preferences: {preferences}.
        Respond ONLY in JSON format:
        {{
            "books": [
                {{"title": "...", "author": "..."}}
            ]
        }}
        """
    }
    resp = openai.chat.completions.create(
        model=MODEL,
        messages=[prompt],
        response_format={"type": "json_object"}  # 🔑 fuerza JSON válido
    )
    return json.loads(resp.choices[0].message.content)

# 📌 Paso 2: Rankear
def step_2_rank(filtered_books, preferences):
    prompt = {
        "role": "system",
        "content": f"""
        Rank these books by relevance to: {preferences}.
        Books: {filtered_books}

        Respond ONLY in JSON:
        {{
            "ranked_books": [
                {{"title": "...", "author": "...", "rank": 1}}
            ]
        }}
        """
    }
    resp = openai.chat.completions.create(
        model=MODEL,
        messages=[prompt],
        response_format={"type": "json_object"}
    )
    return json.loads(resp.choices[0].message.content)

# 📌 Paso 3: Explicaciones
def step_3_explain(ranked_books):
    prompt = {
        "role": "system",
        "content": f"""
        Generate a friendly explanation for each book in {ranked_books}.
        Respond ONLY in JSON:
        {{
            "recommendations": [
                {{"title": "...", "reason": "..."}}
            ]
        }}
        """
    }
    resp = openai.chat.completions.create(
        model=MODEL,
        messages=[prompt],
        response_format={"type": "json_object"}
    )
    return json.loads(resp.choices[0].message.content)

# 🔍 Ejemplo
books = [
    {"title": "Project Hail Mary", "author": "Andy Weir"},
    {"title": "Red Rising", "author": "Pierce Brown"},
    {"title": "Atomic Habits", "author": "James Clear"}
]
preferences = "science fiction"

filtered = step_1_filter(books, preferences)
ranked = step_2_rank(filtered, preferences)
explained = step_3_explain(ranked)

# 📊 Mostrar bonito
print("\n📚 Recommended Books:\n")
for r in explained["recommendations"]:
    print(f"✅ {r['title']}\n   👉 {r['reason']}\n")




📚 Recommended Books:

✅ Project Hail Mary
   👉 This thrilling space adventure by Andy Weir is filled with clever science, unexpected twists, and a touching story of survival and friendship. Follow Ryland Grace as he embarks on a mission to save humanity, encountering fascinating alien life and challenges along the way. It's perfect for anyone who loves a blend of hard science fiction and emotional depth!

✅ Red Rising
   👉 In this captivating tale by Pierce Brown, you'll be immersed in a complex society divided by color-coded class stratification. Follow Darrow, a lowly Red, as he infiltrates the ruling Gold class to revolutionize his world. This book is packed with action, betrayal, and powerful themes of freedom and rebellion, making it a must-read for fans of epic dystopian dramas!



In [9]:
def show_recommendations(explained):
    for i, book in enumerate(explained, 1):
        title = book.get("title", "Unknown")
        author = book.get("author", "Unknown")
        url = book.get("url", "")
        rating = book.get("rating", "N/A")
        reason = book.get("reason", "")

        # convertir rating a estrellas (ej: "4.6 out of 5 stars")
        stars = ""
        if isinstance(rating, str) and rating:
            try:
                score = float(rating.split()[0])   # toma el 4.6
                full = int(score)
                half = 1 if score - full >= 0.5 else 0
                stars = "★" * full + ("½" if half else "")
            except:
                stars = rating

        print(f"#{i}: {title}")
        print(f"   👤 Author: {author}")
        print(f"   ⭐ Rating: {stars} ({rating})")
        print(f"   🔗 Link: {url}")
        print(f"   💡 Why: {reason}\n")


In [10]:
def show_recommendations(explained):
    import json
    print(f"🔎 Tipo de explained: {type(explained)}")
    try:
        print(f"📦 Cantidad de elementos: {len(explained)}")
    except Exception:
        print("⚠️ explained no tiene len() — no parece ser una lista")

    for i, book in enumerate(explained, 1):
        print(f"\nProcesando elemento {i}: tipo {type(book)}")
        if isinstance(book, str):
            try:
                book = json.loads(book)
            except Exception as e:
                print(f"   ❌ Error al parsear JSON: {e}")
                continue

        print(f"   Contenido: {book}")

        title = book.get("title", "Unknown")
        author = book.get("author", "Unknown")
        url = book.get("url", "")
        rating = book.get("rating", "N/A")
        reason = book.get("reason", "")

        print(f"#{i}: {title}")
        print(f"   👤 Author: {author}")
        print(f"   ⭐ Rating: {rating}")
        print(f"   🔗 Link: {url}")
        print(f"   💡 Why: {reason}")
        print()



In [11]:
print(type(explained))
print(explained)


<class 'dict'>
{'recommendations': [{'title': 'Project Hail Mary', 'reason': "This thrilling space adventure by Andy Weir is filled with clever science, unexpected twists, and a touching story of survival and friendship. Follow Ryland Grace as he embarks on a mission to save humanity, encountering fascinating alien life and challenges along the way. It's perfect for anyone who loves a blend of hard science fiction and emotional depth!"}, {'title': 'Red Rising', 'reason': "In this captivating tale by Pierce Brown, you'll be immersed in a complex society divided by color-coded class stratification. Follow Darrow, a lowly Red, as he infiltrates the ruling Gold class to revolutionize his world. This book is packed with action, betrayal, and powerful themes of freedom and rebellion, making it a must-read for fans of epic dystopian dramas!"}]}


## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [12]:
import os
import json
import asyncio
from dotenv import load_dotenv
from openai import OpenAI
from playwright.async_api import async_playwright

# === 1️⃣ Cargar API Key y configurar OpenAI ===
load_dotenv(override=True)
api_key = os.getenv("OPENAI_API_KEY")

if api_key and api_key.startswith("sk-proj-"):
    print("✅ API key looks good so far")
else:
    print("⚠️ Check your API key in .env")

openai = OpenAI(api_key=api_key)
MODEL = "gpt-4o-mini"


# === 2️⃣ Scraper de Amazon ===
async def scrape_amazon_books(query="science fiction", max_books=5):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        # 🚀 Bloquear imágenes y CSS para velocidad
        await page.route("**/*", lambda route: route.abort()
                         if route.request.resource_type in ["image", "stylesheet", "font"]
                         else route.continue_())

        url = f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=stripbooks"
        await page.goto(url)
        await page.wait_for_selector("div.s-main-slot div[data-asin]", timeout=8000)

        items = await page.query_selector_all("div.s-main-slot div[data-asin]")
        books = []

        for item in items[:max_books]:
            asin = await item.get_attribute("data-asin")
            if not asin:
                continue

            # --- Title ---
            title = "Unknown"
            for selector in [
                "h2 > a > span",
                "span.a-size-medium.a-color-base.a-text-normal",
                ".a-link-normal.a-text-normal"
            ]:
                el = await item.query_selector(selector)
                if el:
                    text = (await el.inner_text()).strip()
                    if text and len(text) > 2 and not text.lower().startswith("visit the"):
                        title = text
                        break

            # --- Author ---
            author = "Unknown Author"
            author_candidates = await item.query_selector_all(
                ".a-row.a-size-base.a-color-secondary .a-size-base, "
                ".a-row.a-size-base.a-color-secondary .a-link-normal, "
                ".a-color-secondary .a-size-base"
            )
            for candidate in author_candidates:
                text = (await candidate.inner_text()).strip()
                if not text:
                    continue
                if text.lower() in ["by", "|", "audible audiobook", "paperback", "hardcover", "kindle edition"]:
                    continue
                if len(text) > 2:
                    author = text
                    break

            # --- Rating ---
            rating_el = await item.query_selector(".a-icon-alt")
            rating = (await rating_el.inner_text()).strip() if rating_el else "N/A"

            # --- Guardar ---
            if title != "Unknown":
                books.append({
                    "title": title,
                    "author": author,
                    "url": f"https://www.amazon.com/dp/{asin}",
                    "rating": rating
                })

        await browser.close()
        return books


# === 3️⃣ Construir prompt para el brochure ===
def get_books_details(explained):
    result = "## 📚 Books Extracted from Amazon\n\n"
    for i, book in enumerate(explained, 1):
        result += f"### {i}. {book['title']}\n"
        result += f"- 👤 Author: {book.get('author', 'Unknown')}\n"
        result += f"- ⭐ Rating: {book.get('rating', 'N/A')}\n"
        result += f"- 🔗 [Amazon Link]({book.get('url', '')})\n\n"
    return result


system_prompt = """You are a humorous assistant that writes a short, witty, and entertaining
brochure about a list of books. Describe each book as if it were part of a fun magazine
recommendation. Be creative, engaging, and respond in markdown.
"""

def build_user_prompt(topic, books):
    return f"These books were found on Amazon for the topic '{topic}':\n\n" + get_books_details(books)


# === 4️⃣ Flujo principal ===
async def main():
    topic = "data science"  # 🔍 Cambia la categoría que quieras
    print(f"🔎 Scraping Amazon for books about '{topic}'...")
    books = await scrape_amazon_books(topic, max_books=5)
    print(f"✅ Found {len(books)} books.")

    if not books:
        print("⚠️ No books found.")
        return

    user_prompt = build_user_prompt(topic, books)

    print("\n🧠 Generating brochure with GPT...\n")

    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    brochure = response.choices[0].message.content
    print("=== 📖 Generated Brochure ===\n")
    print(brochure)

    # Opcional: Guardar a archivo markdown
    filename = f"brochure_{topic.replace(' ', '_')}.md"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(brochure)
    print(f"\n💾 Saved brochure to {filename}")


await main()


✅ API key looks good so far
🔎 Scraping Amazon for books about 'data science'...
✅ Found 4 books.

🧠 Generating brochure with GPT...

=== 📖 Generated Brochure ===

# 📚 Data Science Dazzle: The Ultimate Bookshelf for Future Data Wizards! 🌟

Welcome, aspiring data detectives and number ninjas! If you're ready to dive into the world of data science and emerge with superpowers that even Captain DataBritain would envy, look no further. We’ve rounded up the hottest titles roaring through the Amazon jungle—each one guarantees to boost your knowledge while tickling your funny bone. 🤓 

---

### 1. **Data Science from Scratch: First Principles with Python**  
**👤 Author:** Joel Grus  
**⭐ Rating:** 4.4 out of 5 stars  
**🔗 [Amazon Link](https://www.amazon.com/dp/1492041130)**  

**Description:**  
Ever wanted to build a spaceship but didn't know where to begin? Well, buckle up! Joel Grus guides you through the cosmic chaos of data science with Python as your trusty spaceship engine. With no prio

In [13]:
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = """You are a creative assistant that builds a short, entertaining brochure
# about a list of books. The brochure should be written in a humorous, engaging tone,
# like a magazine feature for curious readers. Respond in markdown format.
# Include highlights of why these books are great and what kind of readers might enjoy them."""

# def get_brochure_user_prompt(book_list_name, explained):
#     user_prompt = f"You are looking at a collection called: {book_list_name}\n"
#     user_prompt += "Here are the details of the recommended books; use them to build a short, funny, attractive brochure in markdown.\n\n"
#     user_prompt += get_books_details(explained)
#     user_prompt = user_prompt[:5_000]  # evita que el prompt sea demasiado largo
#     return user_prompt


## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [17]:
import os
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv
from playwright.async_api import async_playwright

# === Configuración ===
load_dotenv(override=True)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# === Scraper de Amazon ===
async def scrape_amazon_books(query="fiction", max_books=5):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.set_extra_http_headers({"User-Agent": "Mozilla/5.0"})
        url = f"https://www.amazon.es/s?k={query.replace(' ', '+')}&i=stripbooks"
        await page.goto(url, timeout=15000)
        items = await page.query_selector_all("div[data-component-type='s-search-result']")
        books = []
        for item in items[:max_books]:
            asin = await item.get_attribute("data-asin")
            if not asin:
                continue
            title_el = await item.query_selector("h2 a span")
            author_el = await item.query_selector(".a-color-secondary .a-size-base")
            rating_el = await item.query_selector(".a-icon-alt")
            title = (await title_el.inner_text()).strip() if title_el else "Unknown"
            author = (await author_el.inner_text()).strip() if author_el else "Unknown"
            rating = (await rating_el.inner_text()).strip() if rating_el else "N/A"
            if title != "Unknown":
                books.append({
                    "title": title,
                    "author": author,
                    "rating": rating,
                    "url": f"https://www.amazon.es/dp/{asin}"
                })
        await browser.close()
        return books


# === Buscar libros en Amazon ===
async def buscar_libros(topic):
    libros = await scrape_amazon_books(topic, max_books=5)
    if not libros:
        return f"⚠️ No se encontraron libros para **{topic}**."
    markdown = f"## 📚 Resultados para: *{topic}*\n\n"
    for i, b in enumerate(libros, 1):
        markdown += f"### {i}. {b['title']}\n"
        markdown += f"- 👤 Autor: {b['author']}\n"
        markdown += f"- ⭐ Rating: {b['rating']}\n"
        markdown += f"- 🔗 [Ver en Amazon]({b['url']})\n\n"
    return markdown


# === Recomendador IA (multi-shot prompting) ===
def recomendar_libro(contexto, pedido):
    system_prompt = """Eres un asistente experto en libros.
Analiza los gustos previos del usuario y su solicitud actual para recomendar un libro adecuado.
Sé breve, concreto y entretenido. Incluye título, autor y por qué lo recomiendas."""

    user_prompt = f"""
Ejemplos o gustos previos:
{contexto}

Nueva solicitud del usuario:
{pedido}
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.9,
        max_tokens=400,
    )

    return response.choices[0].message.content


# === Interfaz ===
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# 📚 Buscador de Libros + Asistente IA")

    # --- Bloque 1: Buscar en Amazon ---
    gr.Markdown("### 🔍 Buscar libros en Amazon")
    gr.Markdown("Escribe el nombre de un autor, género o palabra clave y haz clic en buscar.")

    with gr.Row():
        topic_input = gr.Textbox(label="Autor o género", placeholder="Ej: fantasía, Stephen King, data science")
        search_btn = gr.Button("🔎 Buscar")

    resultados_busqueda = gr.Markdown()
    search_btn.click(fn=buscar_libros, inputs=topic_input, outputs=resultados_busqueda, show_progress=True)

    gr.Markdown("---")  # línea divisoria

    # --- Bloque 2: Multi-shot prompting separado ---
    gr.Markdown("## 💬 Solicitud personalizada al asistente IA")
    gr.Markdown(
        "Describe qué tipo de libro buscás o contale al asistente tus preferencias. "
        "Podés incluir ejemplos de libros que te gustaron y qué querés ahora."
    )

    with gr.Row():
        contexto = gr.Textbox(
            label="📖 Ejemplos o gustos previos",
            placeholder="Ej: Me gusto las tortugas ninja que otro historieta recomiendas?.",
            lines=4,
        )

    with gr.Row():
        pedido = gr.Textbox(
            label="🧠 Nueva solicitud",
            placeholder="Ej: Recomendame un libro nutricional.",
            lines=3,
        )

    boton_recomendar = gr.Button("✨ Recomendar libro")
    resultado_recomendacion = gr.Markdown()

    boton_recomendar.click(fn=recomendar_libro, inputs=[contexto, pedido], outputs=resultado_recomendacion)

demo.launch(inline=True)



* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.


