In [40]:
import csv
import requests
from bs4 import BeautifulSoup
import json

# Base URLs
collection_url = "https://nylon.coffee/collections/coffee"
product_base_url = "https://nylon.coffee/products/"

# Headers to mimic a browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
}

# Output CSV file name
output_file = "coffee_products_with_variants.csv"

# Step 1: Scrape the collection page to get product handles
response = requests.get(collection_url, headers=headers)
collection_soup = BeautifulSoup(response.text, "html.parser")
product_cards = collection_soup.find_all("product-card")

# Prepare data for CSV
products_data = []

for card in product_cards:
    # Extract collection-level details
    handle = card.get("handle", "N/A")
    primary_image = card.find("img", class_="product-card__image--primary")
    primary_image_url = f"https:{primary_image['src']}" if primary_image else "N/A"
    sold_out_badge = card.find("sold-out-badge")
    is_sold_out = sold_out_badge.text.strip() if sold_out_badge else "In Stock"
    tasting_notes_element = card.find("div", class_="product-tasting")
    tasting_notes = tasting_notes_element.text.strip() if tasting_notes_element else "N/A"

    # Step 2: Visit product page for additional details
    product_url = f"{product_base_url}{handle}"
    product_response = requests.get(product_url, headers=headers)
    product_soup = BeautifulSoup(product_response.text, "html.parser")

    # Extract feature chart
    feature_chart = product_soup.find("div", class_="feature-chart__table")
    features = {}
    if feature_chart:
        rows = feature_chart.find_all("div", class_="feature-chart__table-row")
        for row in rows:
            heading = row.find("div", class_="feature-chart__heading")
            value = row.find("div", class_="feature-chart__value")
            if heading and value:
                features[heading.text.strip()] = value.text.strip()

    # Extract product variants
    script_tag = product_soup.find("script", type="application/ld+json")
    variants = []
    if script_tag:
        product_data_json = json.loads(script_tag.string)
        if "hasVariant" in product_data_json:
            for variant in product_data_json["hasVariant"]:
                variant_data = {
                    "Variant Name": variant.get("name", "N/A"),
                    "Variant Price": variant.get("offers", {}).get("price", "N/A"),
                    "Variant Availability": variant.get("offers", {}).get("availability", "N/A"),
                }
                variants.append(variant_data)

    # If variants exist, add them and skip the base product
    if variants:
        for variant in variants:
            variant_data = {
                "Handle": handle,
                "Primary Image URL": primary_image_url,
                "Availability": is_sold_out,
                "Tasting Notes": tasting_notes,
            }
            variant_data.update(features)
            variant_data.update(variant)
            products_data.append(variant_data)
    else:
        # If no variants, include the base product with price
        price_element = product_soup.find("meta", {"property": "product:price:amount"})
        base_price = price_element["content"] if price_element else "N/A"
        base_product_data = {
            "Handle": handle,
            "Primary Image URL": primary_image_url,
            "Availability": is_sold_out,
            "Tasting Notes": tasting_notes,
            "Variant Name": "Base Product",
            "Variant Price": base_price,
            "Variant Availability": is_sold_out,
        }
        base_product_data.update(features)
        products_data.append(base_product_data)

# Write data to CSV
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    # Determine CSV fieldnames from keys of the first product_data entry
    fieldnames = list(products_data[0].keys())
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    # Write header and rows
    writer.writeheader()
    writer.writerows(products_data)

print(f"Data successfully written to {output_file}")

Data successfully written to coffee_products_with_variants.csv
