# Part-1

In [1]:
import requests
from bs4 import BeautifulSoup
from pymongo import MongoClient
import time

# Database Configuration
def get_database():
    client = MongoClient("mongodb+srv://scarp-db-testing:aiNEpGbqdUzYXTun@scarp-test-db.2prrz.mongodb.net/")
    db = client["books_db"]
    return db

# Save data to MongoDB
def save_to_mongo(db, data):
    collection = db["books"]
    collection.update_one(
        {"name": data["name"]},
        {"$set": data},
        upsert=True
    )

# Extract book details from a book page
def extract_book_details(book_url):
    response = requests.get(book_url)
    soup = BeautifulSoup(response.content, "html.parser")

    # Extract data fields
    name = soup.h1.text.strip()
    description = soup.find("meta", attrs={"name": "description"})
    description = description["content"].strip() if description else "No description available"
    category = soup.find("ul", class_="breadcrumb").find_all("li")[2].text.strip()
    price_incl_tax = soup.find("th", string="Price (incl. tax)").find_next_sibling("td").text.strip()
    price_excl_tax = soup.find("th", string="Price (excl. tax)").find_next_sibling("td").text.strip()
    availability = soup.find("th", string="Availability").find_next_sibling("td").text.strip()
    num_reviews = int(soup.find("th", string="Number of reviews").find_next_sibling("td").text.strip())
    image_url = soup.find("img")["src"].replace("../../", "https://books.toscrape.com/")

    # Determine rating
    rating_tag = soup.find("p", class_="star-rating")
    rating_classes = rating_tag.get("class", [])
    rating = [r for r in rating_classes if r != "star-rating"]
    rating = rating[0] if rating else "No rating"

    return {
        "name": name,
        "description": description,
        "category": category,
        "price_incl_tax": price_incl_tax,
        "price_excl_tax": price_excl_tax,
        "availability": availability,
        "num_reviews": num_reviews,
        "image_url": image_url,
        "rating": rating,
    }

# Crawl all books from the website
def crawl_books():
    base_url = "https://books.toscrape.com/catalogue/page-{}.html"
    db = get_database()

    page = 1
    while True:
        print(f"Crawling page {page}...")
        response = requests.get(base_url.format(page))

        if response.status_code != 200:
            break

        soup = BeautifulSoup(response.content, "html.parser")
        books = soup.find_all("article", class_="product_pod")

        if not books:
            break

        for book in books:
            book_url = book.find("h3").a["href"]
            book_url = "https://books.toscrape.com/catalogue/" + book_url.replace("../../../", "")
            book_data = extract_book_details(book_url)
            save_to_mongo(db, book_data)

        page += 1
        time.sleep(1)  # Be respectful by adding delay

if __name__ == "__main__":
    crawl_books()
    print("Crawling completed and data stored in MongoDB.")


Crawling page 1...
Crawling page 2...
Crawling page 3...
Crawling page 4...
Crawling page 5...
Crawling page 6...
Crawling page 7...
Crawling page 8...
Crawling page 9...
Crawling page 10...
Crawling page 11...
Crawling page 12...
Crawling page 13...
Crawling page 14...
Crawling page 15...
Crawling page 16...
Crawling page 17...
Crawling page 18...
Crawling page 19...
Crawling page 20...
Crawling page 21...
Crawling page 22...
Crawling page 23...
Crawling page 24...
Crawling page 25...
Crawling page 26...
Crawling page 27...
Crawling page 28...
Crawling page 29...
Crawling page 30...
Crawling page 31...
Crawling page 32...
Crawling page 33...
Crawling page 34...
Crawling page 35...
Crawling page 36...
Crawling page 37...
Crawling page 38...
Crawling page 39...
Crawling page 40...
Crawling page 41...
Crawling page 42...
Crawling page 43...
Crawling page 44...
Crawling page 45...
Crawling page 46...
Crawling page 47...
Crawling page 48...
Crawling page 49...
Crawling page 50...
Crawling 

# Part-2

In [2]:
from apscheduler.schedulers.background import BackgroundScheduler

# Function to check for new or updated books
def check_and_update_books():
    print("Running scheduler to check for new or updated books...")
    crawl_books()  # Reuse the crawl_books function to handle updates
    print("Scheduler check completed.")

# Start the scheduler
def start_scheduler():
    scheduler = BackgroundScheduler()
    scheduler.add_job(check_and_update_books, 'interval', hours=24)  # Runs every 24 hours
    scheduler.start()
    print("Scheduler started and running every 24 hours.")

if __name__ == "__main__":
    start_scheduler()
    print("Scheduler is running. Press Ctrl+C to stop.")
    try:
        while True:
            time.sleep(1)
    except (KeyboardInterrupt, SystemExit):
        print("Scheduler stopped.")


Scheduler started and running every 24 hours.
Scheduler is running. Press Ctrl+C to stop.
Scheduler stopped.


# Part-3

In [4]:
# Installing some necessary libraries
pip install fastapi uvicorn


Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)
                                              0.0/94.8 kB ? eta -:--:--
     ------------                             30.7/94.8 kB ? eta -:--:--
     ---------------------------------------- 94.8/94.8 kB 1.4 MB/s eta 0:00:00
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
                                              0.0/62.3 kB ? eta -:--:--
     ---------------------------------------- 62.3/62.3 kB 3.5 MB/s eta 0:00:00
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl (73 kB)
                                              0.0/73.2 kB ? eta -:--:--
     ---------------------------------------- 73.2/73.2 kB 2.0 MB/s eta 0:00:00
Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi)
  Downloading pydantic-2.10.4-py3-none-any.whl (431 kB)
                                              0.0/431.8 kB ? eta -:--:--


In [25]:
from fastapi import FastAPI, Query, HTTPException, Depends
from fastapi.security.api_key import APIKeyHeader
from pymongo import MongoClient
from typing import List, Optional

# MongoDB Database Configuration
def get_database():
    client = MongoClient("mongodb+srv://scarp-db-testing:aiNEpGbqdUzYXTun@scarp-test-db.2prrz.mongodb.net/")
    db = client["books_db"]
    return db

# FastAPI app initialization
app = FastAPI()

# API Key Configuration for Authentication
API_KEY = "ocmrjohy"  # Replace with your actual API Key
API_KEY_NAME = "X-API-KEY"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=True)

# API Key Validation
def validate_api_key(api_key: str = Depends(api_key_header)):
    if api_key != API_KEY:
        raise HTTPException(status_code=403, detail="Invalid API Key")

# Book List API (List of all books)
@app.get("/books")
async def list_books(
    category: Optional[str] = Query(None, description="Filter by category"),
    min_price: Optional[float] = Query(None, description="Filter by minimum price"),
    max_price: Optional[float] = Query(None, description="Filter by maximum price"),
    sort_by: Optional[str] = Query(None, description="Sort by price, rating, or reviews"),
    db = Depends(get_database),
    _: str = Depends(validate_api_key),
):
    collection = db["books"]
    query = {}

    # Apply filters if provided
    if category:
        query["category"] = category
    if min_price is not None:
        query["price_excl_tax"] = {"$gte": min_price}
    if max_price is not None:
        query["price_excl_tax"] = {"$lte": max_price}

    # Fetch books matching the query
    books = list(collection.find(query, {"_id": 0}))

    # Sorting
    if sort_by:
        reverse = False  # Default ascending order
        if sort_by in ["price", "rating"]:
            reverse = True  # Descending for these fields
        books = sorted(books, key=lambda x: x.get(sort_by, 0), reverse=reverse)

    print(f"Books found: {len(books)}")  # Print number of books found
    if not books:
        print("No books found matching the query.")
    else:
        for book in books:
            print(f"Book URL: {book.get('url', 'No URL found')}")  # Print book URL if it exists

    return {"books": books}

# Book Details API (Details of a single book)
@app.get("/books/{book_name}")
async def get_book(book_name: str, db = Depends(get_database), _: str = Depends(validate_api_key)):
    collection = db["books"]
    book = collection.find_one({"name": book_name}, {"_id": 0})

    if not book:
        print(f"Book '{book_name}' not found.")
        raise HTTPException(status_code=404, detail="Book not found")
    
    print(f"Book URL found: {book.get('url', 'No URL found')}")  # Print book URL if found
    return {"book": book}


Crawling page 45...
Crawling page 47...
Crawling page 46...
Crawling page 44...
Crawling page 46...
Crawling page 47...
Crawling page 48...
Crawling page 45...
Crawling page 47...
Crawling page 49...
Crawling page 48...
Crawling page 46...
Crawling page 48...
Crawling page 49...
Crawling page 50...
Crawling page 47...
Crawling page 49...
Crawling page 50...
Crawling page 51...
Scheduler check completed.
Crawling page 48...
Crawling page 50...
Crawling page 51...
Scheduler check completed.
Crawling page 49...
Crawling page 51...
Scheduler check completed.
Crawling page 50...
Crawling page 51...
Scheduler check completed.


In [44]:
import requests

# Send a GET request to the FastAPI root endpoint
response = requests.get("http://127.0.0.1:8000")

if response.status_code == 200:
    print("FastAPI is running!")
    print("Response:", response.json())
else:
    print("Error: FastAPI is not running.")


INFO:     127.0.0.1:51165 - "GET / HTTP/1.1" 200 OK
FastAPI is running!
Response: {'message': 'FastAPI is running!'}


In [39]:
from pymongo import MongoClient

# MongoDB connection (replace with your connection string)
client = MongoClient("mongodb+srv://scarp-db-testing:aiNEpGbqdUzYXTun@scarp-test-db.2prrz.mongodb.net/")
db = client["books_db"]

# Query the books collection
books = db.books.find({"category": "Fiction"})

# Print the results
for book in books:
    print(book)

# books = db.books.find({"category": "Fiction"})


{'_id': ObjectId('67641a1c05d0c9450158116e'), 'name': 'Soumission', 'availability': 'In stock (20 available)', 'category': 'Fiction', 'description': 'Dans une France assez proche de la nôtre, un homme s’engage dans la carrière universitaire. Peu motivé par l’enseignement, il s’attend à une vie ennuyeuse mais calme, protégée des grands drames historiques. Cependant les forces en jeu dans le pays ont fissuré le système politique jusqu’à provoquer son effondrement. Cette implosion sans soubresauts, sans vraie révolution, s Dans une France assez proche de la nôtre, un homme s’engage dans la carrière universitaire. Peu motivé par l’enseignement, il s’attend à une vie ennuyeuse mais calme, protégée des grands drames historiques. Cependant les forces en jeu dans le pays ont fissuré le système politique jusqu’à provoquer son effondrement. Cette implosion sans soubresauts, sans vraie révolution, se développe comme un mauvais rêve.Le talent de l’auteur, sa force visionnaire nous entraînent sur u

In [42]:
import requests

# Define the API URL
url = 'https://books.toscrape.com/catalogue/'

# Define the query parameters
params = {
    'category': 'Fiction',
    'min_price': 10,
    'sort_by': 'rating'
}

# Define the headers with the API key
headers = {
    'X-API-KEY': 'ocmrjohy'  # Your actual API key here
}

# Send the GET request
response = requests.get(url, params=params, headers=headers)

# Check the status code
print(response.status_code)  # 200 if successful, 404 if not found

# Print the response text (raw content)
print(response.text)  # This will print the raw response content


403
<html>
<head><title>403 Forbidden</title></head>
<body>
<center><h1>403 Forbidden</h1></center>
<hr><center>nginx/1.21.6</center>
</body>
</html>

