In [38]:
import requests
import re
import csv

# URL of the JSON endpoint
url = "https://20gramscoffeeroastery.com/products.json"

# Function to scrape and clean product data
def scrape_products():
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()  # Parse JSON response
        products = data.get("products", [])  # Extract products list

        # Extract and clean relevant details
        cleaned_output = []
        for product in products:
            product_title = product.get("title", "N/A")
            # Include only products with "Espresso" or "Filter" in the title
            if "Espresso" in product_title or "Filter" in product_title:
                description_data = extract_description_data(product.get("body_html", ""))
                product_details = {
                    "Product Title": product_title,
                    "Vendor": product.get("vendor", "N/A"),
                    "Type": product.get("product_type", "N/A"),
                    **description_data,  # Include extracted description data
                    "Variants": [
                        {
                            "Title": variant.get("title", "N/A"),
                            "Price": float(variant.get('price', '0')) / 100,
                            "Available": "Yes" if variant.get("available", False) else "No",
                        }
                        for variant in product.get("variants", [])
                    ],
                }
                cleaned_output.append(product_details)
        return cleaned_output
    else:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return []

# Helper function to clean the description and extract key-value pairs
def extract_description_data(description):
    if not description:
        return {}
    clean_text = re.sub(r"<[^>]+>", "", description)  # Remove HTML tags
    lines = [line.strip() for line in clean_text.split("\n") if line.strip()]  # Split and clean lines
    data = {}
    for line in lines:
        match = re.match(r"^(.*?):\s*(.*)$", line)  # Match "Key: Value" pattern
        if match:
            key, value = match.groups()
            data[key] = value
    return data

# Function to write the scraped data to a CSV file
def write_to_csv(products, filename="products.csv"):
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)

        # Write the header row
        header = [
            "Product Title",
            "Vendor",
            "Type",
            "Origin",
            "Farm",
            "Producer",
            "Location",
            "Varietal",
            "Processed",
            "Growing Altitude",
            "Harvest",
            "Flavour Notes",
            "Agtron Level",
            "Roasted Density",
            "Variant Title",
            "Price",
            "Available"
        ]
        writer.writerow(header)

        # Write product rows
        for product in products:
            for variant in product["Variants"]:
                row = [
                    product["Product Title"],
                    product["Vendor"],
                    product["Type"],
                    product.get("Origin", "N/A"),
                    product.get("Farm", "N/A"),
                    product.get("Producer", "N/A"),
                    product.get("Location", "N/A"),
                    product.get("Varietal", "N/A"),
                    product.get("Processed", "N/A"),
                    product.get("Growing Altitude", "N/A"),
                    product.get("Harvest", "N/A"),
                    product.get("Flavour Notes", "N/A"),
                    product.get("Agtron Level", "N/A"),
                    product.get("Roasted Density", "N/A"),
                    variant["Title"],
                    variant["Price"],
                    variant["Available"]
                ]
                writer.writerow(row)

# Main execution
if __name__ == "__main__":
    products = scrape_products()
    write_to_csv(products)
    print("Products have been written to 'products.csv'.")


Products have been written to 'products.csv'.
