In [4]:
import requests
import json
import csv
import pandas as pd
import aiohttp
import asyncio
import nest_asyncio
import logging

In [5]:
logging.basicConfig(level=logging.INFO, filename="API_gutendex_log.log",filemode="w",
                    format="%(asctime)s %(levelname)s %(message)s")

In [6]:
'''
with open('scraping_log.log', 'w'):
    pass # Clearing logs
'''

"\nwith open('scraping_log.log', 'w'):\n    pass # Clearing logs\n"

In [9]:
def fetch_all_books(topic):
    logging.info('Book fetching started')
    url = "https://gutendex.com/books"
    params = {"topic": topic} if topic else {}
    all_books = []

    while url:
        try:
            response = requests.get(url, params=params if url == "https://gutendex.com/books" else None)
            data = response.json()
            all_books.extend(data["results"])
            url = data["next"]
        except Exception as e:
            logging.error(f"Error while requesting", exc_info=e)

    return all_books

def extract_book_data(books):
    logging.info(f'Book data extraction started')
    data = []
    for book in books:
        logging.info(f'Getting info about {book["title"]} books')
        authors = book.get("authors", [])
        if authors:
            logging.info(f'Getting info about {book["title"]} author')
            first_author = authors[0] 
        else:
            logging.warning(f'No info about {book["title"]} author')
            first_author = {"name": "N/A", "birth_year": "N/A", "death_year": "N/A"}

        data.append({
            "title": book["title"],
            "author_name": first_author["name"],
            "author_birth_year": first_author.get("birth_year", "N/A"),
            "author_death_year": first_author.get("death_year", "N/A"),
            "translators": ", ".join([t["name"] for t in book.get("translators", [])]),
            "subjects": ", ".join(book.get("subjects", [])),
            "bookshelves": ", ".join(book.get("bookshelves", [])),
            "copyright": book.get("copyright"),
            "download_count": book.get("download_count")
        })

    return pd.DataFrame(data)

In [None]:
books_child = fetch_all_books("children")
df_child = extract_book_data(books_child)
df_child.to_csv("children_books.csv", index=False)
logging.info(f'Dataset with children books saved as children_books.csv')
logging.info(f'Number of books for children: {len(df_child)}')

In [None]:
books_all = fetch_all_books("")  # Пустая строка или None получит все книги без конкретной темы
df_all = extract_book_data(books_all)
df_all.to_csv("non_children_books.csv", index=False)
logging.info(f'Dataset with all books saved as non_children_books.csv')
logging.info(f'Number of books for everyone: {len(df_all)}')