In [29]:
!pip install requests beautifulsoup4 python-dotenv




[notice] A new release of pip is available: 23.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [30]:
import os
import requests
import sqlite3
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from urllib.parse import urljoin
import time

# Cargar variables de entorno
load_dotenv()
API_KEY = os.getenv("GOOGLE_BOOKS_API_KEY")

# Crear conexión a la base de datos
conn = sqlite3.connect("libros.db")
cursor = conn.cursor()

In [None]:
cursor.executescript("""
CREATE TABLE IF NOT EXISTS libros (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    titulo TEXT NOT NULL,
    precio REAL NOT NULL,
    stock TEXT NOT NULL,
    rating INTEGER NOT NULL,
    url TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS autores (
    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
    nombre TEXT UNIQUE NOT NULL
);

CREATE TABLE IF NOT EXISTS autor_libro (
    libro_id INTEGER NOT NULL,
    autor_id INTEGER NOT NULL,
    PRIMARY KEY (libro_id, autor_id) NOT NULL,
    FOREIGN KEY (libro_id) REFERENCES libros(id),
    FOREIGN KEY (autor_id) REFERENCES autores(id)
);
""")

conn.commit()

In [None]:
base_url = "http://books.toscrape.com/"

def obtener_autor(titulo):
    params = {"q": titulo, "key": API_KEY}
    try:
        resp = requests.get("https://www.googleapis.com/books/v1/volumes", params=params)
        data = resp.json()
        if "items" in data:
            autores = data["items"][0]["volumeInfo"].get("authors", ["Desconocido"])
            return ", ".join(autores)
        return "No encontrado"
    except:
        return "Error API"

def rating_a_numero(rating_str):
    mapa = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}
    return mapa.get(rating_str, 0)


In [33]:
def procesar_libro(url_relativa):
    url = urljoin(base_url, url_relativa)
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    titulo = soup.h1.text.strip()
    precio = float(soup.select_one(".price_color").text[1:])
    stock = soup.select(".table.table-striped tr")[-1].td.text.strip()
    rating = rating_a_numero(soup.select_one("p.star-rating")["class"][1])

    cursor.execute("SELECT id FROM libros WHERE titulo = ? AND url = ?", (titulo, url))
    if cursor.fetchone():
        print(f"🔁 Libro duplicado: {titulo}")
        return

    cursor.execute("INSERT INTO libros (titulo, precio, stock, rating, url) VALUES (?, ?, ?, ?, ?)",
                   (titulo, precio, stock, rating, url))
    libro_id = cursor.lastrowid

    autor = obtener_autor(titulo)
    for nombre in autor.split(", "):
        cursor.execute("INSERT OR IGNORE INTO autores (nombre) VALUES (?)", (nombre.strip(),))
        cursor.execute("SELECT id FROM autores WHERE nombre = ?", (nombre.strip(),))
        autor_id = cursor.fetchone()[0]
        cursor.execute("INSERT INTO autor_libro (libro_id, autor_id) VALUES (?, ?)", (libro_id, autor_id))

    print(f"✅ Guardado: {titulo}")
    conn.commit()

In [66]:
def procesar_libro(url_relativa):
    url = f"http://books.toscrape.com/catalogue/{url_relativa}"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    # Validaciones
    titulo_tag = soup.h1
    precio_tag = soup.select_one(".price_color")
    stock_tag = soup.select(".table.table-striped tr")[-1].td if soup.select(".table.table-striped tr") else None
    rating_tag = soup.select_one("p.star-rating")

    if not (titulo_tag and precio_tag and stock_tag and rating_tag):
        print(f"⚠️ Elementos faltantes en {url}, se omite.")
        return

    titulo = titulo_tag.text.strip()
    try:
        precio = float(precio_tag.text[1:])
    except ValueError:
        print(f"⚠️ Precio inválido en {url}")
        return

    stock = stock_tag.text.strip()
    rating = rating_a_numero(rating_tag["class"][1])
    
    autor = obtener_autor_google_books(titulo)

    cursor.execute("INSERT OR IGNORE INTO libros (titulo, precio, stock, rating) VALUES (?, ?, ?, ?)",
                   (titulo, precio, stock, rating))
    conn.commit()

    cursor.execute("SELECT id FROM libros WHERE titulo = ?", (titulo,))
    libro_id = cursor.fetchone()[0]

    if autor:
        cursor.execute("INSERT OR IGNORE INTO autores (nombre) VALUES (?)", (autor,))
        conn.commit()
        cursor.execute("SELECT id FROM autores WHERE nombre = ?", (autor,))
        autor_id = cursor.fetchone()[0]
        cursor.execute("INSERT OR IGNORE INTO libro_autor (libro_id, autor_id) VALUES (?, ?)", (libro_id, autor_id))
        conn.commit()

    print(f"✅ Libro guardado: {titulo}")


In [85]:
print("\n📊 Consulta 1: Libros con 5 estrellas ordenados por precio (desc)")

cursor.execute("""
    SELECT titulo, precio, rating
    FROM libros
    WHERE rating = 5
    ORDER BY precio DESC;
""")
for row in cursor.fetchall():
    print(row)



print("\n📊 Consulta 2: Libros con más de 10 unidades en stock")
cursor.execute("""
    SELECT titulo, stock
    FROM libros
    WHERE CAST(SUBSTR(stock, 11, 2) AS INTEGER) > 10
""")
for row in cursor.fetchall():
    print(row)

print("\n📊 Consulta 3: Libros ordenados por rating (mayor a menor)")
cursor.execute("""
    SELECT titulo, rating
    FROM libros
    ORDER BY rating DESC;
""")
for row in cursor.fetchall():
    print(row)
for row in cursor.fetchall():
    print(row)

print("\n📊 Consulta 4: Libros con 'science' en el título")

cursor.execute("""
    SELECT titulo, precio, rating
    FROM libros
    WHERE LOWER(titulo) LIKE '%science%';
""")

for row in cursor.fetchall():
    print(row)


print("\n📊 Consulta 5: Libro más barato por cada rating")

cursor.execute("""
    SELECT rating, titulo, MIN(precio) as precio_minimo
    FROM libros
    GROUP BY rating
    ORDER BY rating DESC;
""")
for row in cursor.fetchall():
    print(f"⭐ {row[0]} estrellas - {row[1]} (£{row[2]})")




📊 Consulta 1: Libros con 5 estrellas ordenados por precio (desc)
('The Barefoot Contessa Cookbook', '£59.92', 5)
('Life Without a Recipe', '£59.04', 5)
('Approval Junkie: Adventures in Caring Too Much', '£58.81', 5)
('How to Speak Golf: An Illustrated Guide to Links Lingo', '£58.32', 5)
('Digital Fortress', '£58.00', 5)
('The Sound Of Love', '£57.84', 5)
('Travels with Charley: In Search of America', '£57.82', 5)
('El Deafo', '£57.62', 5)
('H is for Hawk', '£57.42', 5)
('Immunity: How Elie Metchnikoff Changed the Course of Modern Medicine', '£57.36', 5)
('The Disappearing Spoon: And Other True Tales of Madness, Love, and the History of the World from the Periodic Table of the Elements', '£57.35', 5)
('Kitchens of the Great Midwest', '£57.20', 5)
('A Piece of Sky, a Grain of Rice: A Memoir in Four Meditations', '£56.76', 5)
('Into the Wild', '£56.70', 5)
('Eleanor & Park', '£56.51', 5)
('Abstract City', '£56.37', 5)
('The False Prince (The Ascendance Trilogy #1)', '£56.00', 5)
('Future

In [76]:
cursor.execute("SELECT DISTINCT stock FROM libros LIMIT 10")
for row in cursor.fetchall():
    print(row[0])


In stock (19 available)
In stock (15 available)
In stock (14 available)
In stock (8 available)
In stock (7 available)
In stock (6 available)
In stock (3 available)
In stock (1 available)
In stock (20 available)
In stock (18 available)
