In [4]:
import requests
import csv
import re
import json
from bs4 import BeautifulSoup

def parse_description_and_tasting(body_html):
    """
    Given the product's body_html, extract two parts:
      - description: text before the marker "Tasting Notes"
      - tasting_notes: text after the marker "Tasting Notes"
    If the marker is not found, the full text is returned as description and tasting_notes is empty.
    """
    soup = BeautifulSoup(body_html, 'html.parser')
    # Extract all text from the HTML (using a space as separator)
    full_text = soup.get_text(separator=" ", strip=True)
    
    # Define the marker to split on
    marker = "Tasting Notes"
    parts = re.split(re.compile(marker, re.IGNORECASE), full_text, maxsplit=1)
    
    if len(parts) > 1:
        description = parts[0].strip()
        tasting_notes = parts[1].strip()
    else:
        description = full_text.strip()
        tasting_notes = ""
    return description, tasting_notes

def process_products(json_data, csv_filename):
    """
    Processes the JSON product data and writes a CSV file with the following columns:
      - Title
      - Description (text before "Tasting Notes")
      - Tasting Notes (text after "Tasting Notes")
      - Primary Image URL
      - Overall Availability (based on variants)
      - Vendor
      - Product Type
      - Tags (as a comma-separated string)
      - Variant Title
      - Variant Price
      - Grams
      - Variant Availability
    Only products that have at least one tag containing the word "Coffee" are processed.
    """
    products = json_data.get("products", [])
    
    fieldnames = [
        "Title",
        "Description",
        "Tasting Notes",
        "Primary Image URL",
        "Availability",
        "Vendor",
        "Product Type",
        "Tags",
        "Variant Title",
        "Variant Price",
        "Grams",
        "Variant Availability"
    ]
    
    with open(csv_filename, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for product in products:
            # Only keep products where at least one tag contains "coffee"
            product_tags = product.get("tags", [])
            if not any("coffee" in tag.lower() for tag in product_tags):
                continue
            
            title = product.get("title", "")
            vendor = product.get("vendor", "")
            product_type = product.get("product_type", "")
            tags = ", ".join(product_tags)
            images = product.get("images", [])
            primary_image_url = images[0]["src"] if images else ""
            variants = product.get("variants", [])
            overall_available = any(v.get("available", False) for v in variants)
            availability = "Available" if overall_available else "Not Available"
            
            body_html = product.get("body_html", "")
            description, tasting_notes = parse_description_and_tasting(body_html)
            
            # Process each variant for this product
            for variant in variants:
                variant_title = variant.get("title", "")
                variant_price = variant.get("price", "")
                grams = variant.get("grams", "")
                variant_avail = "Available" if variant.get("available", False) else "Not Available"
                
                row = {
                    "Title": title,
                    "Description": description,
                    "Tasting Notes": tasting_notes,
                    "Primary Image URL": primary_image_url,
                    "Availability": availability,
                    "Vendor": vendor,
                    "Product Type": product_type,
                    "Tags": tags,
                    "Variant Title": variant_title,
                    "Variant Price": variant_price,
                    "Grams": grams,
                    "Variant Availability": variant_avail
                }
                writer.writerow(row)
    
    print(f"CSV file '{csv_filename}' created with {len(products)} products processed.")

if __name__ == "__main__":
    # URL from which to fetch the JSON product data
    url = "https://homegroundcoffeeroasters.com/collections/coffees-specialty/products.json"
    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()
    else:
        print("Error fetching JSON:", response.status_code)
        exit(1)
    
    csv_filename = "homeground_coffee_products.csv"
    process_products(json_data, csv_filename)

CSV file 'homeground_coffee_products.csv' created with 13 products processed.
