In [None]:
from bs4 import BeautifulSoup
import requests
from minio import Minio
from io import BytesIO
from datetime import datetime

def setup_minio_client():
    minio_client = Minio('localhost:9000',
                         access_key='ROOTUSER',
                         secret_key='DATAINCUBATOR',
                         secure=False)
    if not minio_client.bucket_exists('bronze'):
        minio_client.make_bucket('bronze')
        print("Bucket 'bronze' created successfully")
    return minio_client
minio_client = setup_minio_client()

In [None]:
def scrape_books_data():
    url = "https://books.toscrape.com/catalogue/page-1.html"
    response = requests.get(url)

    if response.status_code == 200:
        html_content = response.text
        soup = BeautifulSoup(html_content, 'html.parser')
        book_rows = soup.find_all('article', class_='product_pod')  
        
        books_data = []
        for book in book_rows:
            title = book.find('h3').find('a')['title']
            price = book.find('p', class_='price_color').text.strip()
            availability = book.find('p', class_='instock availability').text.strip()
            rating = book.find('p', class_='star-rating')['class'][1]

            books_data.append({
                'title': title,
                'price': price,
                'availability': availability,
                'rating': rating
            })

        return books_data
    else:
        print(f"Failed to fetch page, status code: {response.status_code}")
        return None

book_data = scrape_books_data()
for book in book_data:
    print(book)


In [None]:
import csv
from io import StringIO
from datetime import datetime

def save_books_data_to_minio(books_data, minio_client):
    current_datetime = datetime.now().strftime('%Y%m%d')
    object_name = f'books_data_{current_datetime}.csv'

    csv_data = StringIO()
    fieldnames = ["title", "price", "availability", "rating"]
    writer = csv.DictWriter(csv_data, fieldnames=fieldnames)
    writer.writeheader()
    
    for book in books_data:
        writer.writerow(book)

    csv_data_bytes = BytesIO(csv_data.getvalue().encode('utf-8'))
    
    try:
        minio_client.put_object(
            'bronze', object_name, csv_data_bytes, len(csv_data_bytes.getvalue())
        )
        print(f"Book data saved successfully as {object_name}")
    except Exception as e:
        print("An error occurred while uploading to Minio:", e)

if book_data:
    save_books_data_to_minio(book_data, minio_client)
