In [10]:
pip install BeautifulSoup4

Collecting BeautifulSoup4
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from BeautifulSoup4)
  Downloading soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Collecting typing-extensions>=4.0.0 (from BeautifulSoup4)
  Downloading typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Downloading beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Downloading soupsieve-2.7-py3-none-any.whl (36 kB)
Downloading typing_extensions-4.13.2-py3-none-any.whl (45 kB)
Installing collected packages: typing-extensions, soupsieve, BeautifulSoup4
Successfully installed BeautifulSoup4-4.13.4 soupsieve-2.7 typing-extensions-4.13.2
Note: you may need to restart the kernel to use updated packages.


In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm
import time

API_KEY  = "dL9GWZZEdAUVGhf56KVzxTr9t60rJAe9" 
# Replace this with your key
LIST_NAMES = [
    "combined-print-and-e-book-fiction",
    "combined-print-and-e-book-nonfiction"
]
START_DATE = datetime(2020, 1, 5)  # NYT lists update Sundays
END_DATE = datetime(2025, 4, 27)

def get_bestseller_list(date_str, list_name):
    url = f"https://api.nytimes.com/svc/books/v3/lists.json"
    params = {
        "api-key": API_KEY,
        "published-date": date_str,
        "list": list_name
    }
    r = requests.get(url, params=params)
    if r.status_code == 200:
        return r.json().get("results", [])
    else:
        print(f"❌ Failed for {list_name} on {date_str} (Status {r.status_code})")
        return []

def collect_books():
    all_books = []
    current = START_DATE

    while current <= END_DATE:
        date_str = current.strftime("%Y-%m-%d")
        for list_name in LIST_NAMES:
            results = get_bestseller_list(date_str, list_name)
            if results:
                for book in results:
                    info = book["book_details"][0]
                    all_books.append({
                        "bestseller_date": date_str,
                        "list_name": list_name,
                        "rank": book["rank"],
                        "weeks_on_list": book["weeks_on_list"],
                        "title": info.get("title"),
                        "author": info.get("author"),
                        "publisher": info.get("publisher"),
                        "description": info.get("description"),
                        "primary_isbn13": info.get("primary_isbn13"),
                        "primary_isbn10": info.get("primary_isbn10"),
                    })
            else:
                print(f"⚠️ No results for {list_name} on {date_str}")
            time.sleep(0.3)  # NYT rate limit
        current += timedelta(days=7)

    return all_books

print("📦 Fetching full NYT bestseller lists from 2020 to 2025...")
books = collect_books()

df = pd.DataFrame(books)
df.drop_duplicates(subset=["primary_isbn13", "bestseller_date", "list_name"], inplace=True)

if df.empty:
    print("⚠️ No data collected. Please check your API key and network.")
else:
    df.to_csv("nyt_top_books_2020_2025.csv", index=False)
    print(f"✅ Saved {len(df)} records to 'nyt_top_books_2020_2025.csv'")

📦 Fetching full NYT bestseller lists from 2020 to 2025...
❌ Failed for combined-print-and-e-book-nonfiction on 2020-01-19 (Status 429)
⚠️ No results for combined-print-and-e-book-nonfiction on 2020-01-19
❌ Failed for combined-print-and-e-book-fiction on 2020-01-26 (Status 429)
⚠️ No results for combined-print-and-e-book-fiction on 2020-01-26
❌ Failed for combined-print-and-e-book-nonfiction on 2020-01-26 (Status 429)
⚠️ No results for combined-print-and-e-book-nonfiction on 2020-01-26
❌ Failed for combined-print-and-e-book-fiction on 2020-02-02 (Status 429)
⚠️ No results for combined-print-and-e-book-fiction on 2020-02-02
❌ Failed for combined-print-and-e-book-nonfiction on 2020-02-02 (Status 429)
⚠️ No results for combined-print-and-e-book-nonfiction on 2020-02-02
❌ Failed for combined-print-and-e-book-fiction on 2020-02-09 (Status 429)
⚠️ No results for combined-print-and-e-book-fiction on 2020-02-09
❌ Failed for combined-print-and-e-book-nonfiction on 2020-02-09 (Status 429)
⚠️ No r

KeyboardInterrupt: 

In [37]:
import requests
import csv

# Set your NYT API Key
api_key = "dL9GWZZEdAUVGhf56KVzxTr9t60rJAe9"  # Replace with your actual API key




# Define the API endpoint for different lists (hardcover, paperback, e-books)
lists = [
    'hardcover-fiction', 'hardcover-nonfiction', 'combined-print-and-e-book-fiction', 
    'combined-print-and-e-book-nonfiction', 'e-book-fiction', 'e-book-nonfiction', 
    'paperback-fiction', 'paperback-nonfiction'
]

# Prepare a list to store all book data
all_books_data = []

# Loop through all the lists to get data
for list_name in lists:
    url = f'https://api.nytimes.com/svc/books/v3/lists/current/{list_name}.json'
    
    # Make a GET request to the API
    response = requests.get(url, params={'api-key': api_key})
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = response.json()
        
        # Extract the book information
        books = data['results']['books']
        
        for book in books:
            title = book['title']
            author = book['author']
            url = book['url']
            category = list_name.replace('-', ' ').title()  # Format the list name into a category name

            # Append book data with category to the list
            all_books_data.append([title, author, category, url])
            print([title, author, category, url])  # Check the data being extracted
    else:
        print(f"Failed to retrieve data for list: {list_name}. Status code: {response.status_code}")

# Save the data to a




KeyError: 'url'