<a href="https://colab.research.google.com/github/EtownWholesaleDev/take-home/blob/main/SKU_Image_Export_from_the_Bigcommerce.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import json
from typing import List, Dict, Optional
import time
from datetime import datetime

# Install required packages (run this cell first in Colab)
# !pip install requests pandas

# Configuration - Replace with your actual store details
STORE_HASH = "6knvqcnb7q"  # Found in your store URL
API_TOKEN = "hrxux0mjrpcve5jtj7dc8g5vneaet9t"    # Generate from Advanced Settings > API Accounts
API_VERSION = "v3"

# API Headers
headers = {
    'X-Auth-Token': API_TOKEN,
    'Content-Type': 'application/json',
    'Accept': 'application/json'
}

def get_variant_images(product_id: int, variant_id: int) -> List[Dict]:
    """
    Get images specifically associated with a variant
    """
    url = f"https://api.bigcommerce.com/stores/{STORE_HASH}/{API_VERSION}/catalog/products/{product_id}/variants/{variant_id}"

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        variant_data = response.json().get('data', {})

        # Get the image_url field which is the variant-specific image
        variant_image = variant_data.get('image_url', '')

        return variant_image
    except requests.exceptions.RequestException as e:
        print(f"Error fetching variant {variant_id} for product {product_id}: {e}")
        return None

def get_product_images(product_id: int) -> List[Dict]:
    """
    Get all images for a specific product
    """
    url = f"https://api.bigcommerce.com/stores/{STORE_HASH}/{API_VERSION}/catalog/products/{product_id}/images"

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        return response.json().get('data', [])
    except requests.exceptions.RequestException as e:
        print(f"Error fetching images for product {product_id}: {e}")
        return []

def match_variant_to_image(variant_image_url: str, product_images: List[Dict]) -> Optional[Dict]:
    """
    Match a variant's image URL to the full image data from product images
    """
    if not variant_image_url or not product_images:
        return None

    # BigCommerce variant image_url is often a thumbnail or modified version
    # We need to match it to the actual image in the product images list
    for img in product_images:
        # Check if the variant image URL contains the image filename or matches any URL format
        if (img.get('url_thumbnail', '') in variant_image_url or
            img.get('url_standard', '') in variant_image_url or
            variant_image_url in img.get('url_thumbnail', '') or
            variant_image_url in img.get('url_standard', '')):
            return img

    # If no exact match, return a basic structure with the variant image URL
    return {
        'id': None,
        'url_standard': variant_image_url,
        'url_thumbnail': variant_image_url,
        'url_zoom': variant_image_url,
        'sort_order': 0,
        'is_thumbnail': False
    }

def get_all_sku_specific_images():
    """
    Fetch all products and extract SKU-specific images (variant images only)
    """
    base_url = f"https://api.bigcommerce.com/stores/{STORE_HASH}/{API_VERSION}/catalog/products"

    all_sku_image_data = []
    page = 1
    limit = 250  # Maximum allowed per request

    print("Starting to fetch products with variant-specific images...")
    print("=" * 60)

    while True:
        params = {
            'page': page,
            'limit': limit,
            'include': 'variants'  # Include variants
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()
            products = data.get('data', [])

            if not products:
                print(f"\nNo more products found. Finished at page {page-1}.")
                break

            print(f"\nProcessing page {page} - {len(products)} products...")

            for product in products:
                product_id = product['id']
                product_name = product['name']
                product_sku = product.get('sku', '')

                # Get all product images once for matching
                product_images = get_product_images(product_id)
                time.sleep(0.2)  # Rate limiting

                # Process variants (which have individual SKUs)
                variants = product.get('variants', [])

                if variants:
                    print(f"  Product: {product_name} (ID: {product_id}) - {len(variants)} variants")

                    for variant in variants:
                        variant_id = variant['id']
                        variant_sku = variant.get('sku', '')

                        if not variant_sku:
                            continue

                        # Get the variant-specific image URL
                        variant_image_url = get_variant_images(product_id, variant_id)
                        time.sleep(0.2)  # Rate limiting

                        if variant_image_url:
                            # Try to match to full image data
                            matched_image = match_variant_to_image(variant_image_url, product_images)

                            if matched_image:
                                all_sku_image_data.append({
                                    'sku': variant_sku,
                                    'product_id': product_id,
                                    'product_name': product_name,
                                    'variant_id': variant_id,
                                    'image_id': matched_image.get('id'),
                                    'image_url_standard': matched_image.get('url_standard', ''),
                                    'image_url_thumbnail': matched_image.get('url_thumbnail', ''),
                                    'image_url_zoom': matched_image.get('url_zoom', ''),
                                    'is_thumbnail': matched_image.get('is_thumbnail', False),
                                    'sort_order': matched_image.get('sort_order', 0)
                                })
                                print(f"    ✓ SKU: {variant_sku} - Image found")
                            else:
                                print(f"    ✗ SKU: {variant_sku} - Image URL found but couldn't match to full data")
                        else:
                            # Variant exists but has no specific image
                            all_sku_image_data.append({
                                'sku': variant_sku,
                                'product_id': product_id,
                                'product_name': product_name,
                                'variant_id': variant_id,
                                'image_id': None,
                                'image_url_standard': 'NO VARIANT IMAGE',
                                'image_url_thumbnail': 'NO VARIANT IMAGE',
                                'image_url_zoom': 'NO VARIANT IMAGE',
                                'is_thumbnail': False,
                                'sort_order': 0
                            })
                            print(f"    ⚠ SKU: {variant_sku} - No variant-specific image")
                else:
                    # Product has no variants, only base product with SKU
                    if product_sku:
                        print(f"  Product: {product_name} (ID: {product_id}) - No variants (base product only)")

                        # For products without variants, we can optionally include the main product image
                        if product_images:
                            # Get the thumbnail/primary image
                            primary_image = next((img for img in product_images if img.get('is_thumbnail', False)),
                                                product_images[0] if product_images else None)

                            if primary_image:
                                all_sku_image_data.append({
                                    'sku': product_sku,
                                    'product_id': product_id,
                                    'product_name': product_name,
                                    'variant_id': None,
                                    'image_id': primary_image.get('id'),
                                    'image_url_standard': primary_image.get('url_standard', ''),
                                    'image_url_thumbnail': primary_image.get('url_thumbnail', ''),
                                    'image_url_zoom': primary_image.get('url_zoom', ''),
                                    'is_thumbnail': primary_image.get('is_thumbnail', False),
                                    'sort_order': primary_image.get('sort_order', 0)
                                })

            page += 1
            time.sleep(0.5)  # Rate limiting between pages

        except requests.exceptions.RequestException as e:
            print(f"\n❌ Error on page {page}: {e}")
            break

    return all_sku_image_data

def export_to_csv(data: List[Dict], filename='bigcommerce_sku_specific_images.csv'):
    """
    Export the SKU-specific image data to CSV
    """
    if not data:
        print("\n❌ No data to export!")
        return None

    df = pd.DataFrame(data)

    # Sort by SKU
    df = df.sort_values(['sku', 'sort_order'])

    # Save to CSV
    df.to_csv(filename, index=False)

    print("\n" + "=" * 60)
    print("✓ EXPORT COMPLETE")
    print("=" * 60)
    print(f"File saved: {filename}")
    print(f"Total records: {len(df)}")
    print(f"Unique SKUs: {df['sku'].nunique()}")
    print(f"SKUs with images: {len(df[df['image_url_standard'] != 'NO VARIANT IMAGE'])}")
    print(f"SKUs without images: {len(df[df['image_url_standard'] == 'NO VARIANT IMAGE'])}")

    # Show preview
    print("\n" + "=" * 60)
    print("PREVIEW OF DATA:")
    print("=" * 60)
    print(df.head(10).to_string())

    # Show some statistics
    print("\n" + "=" * 60)
    print("SAMPLE SKUs:")
    print("=" * 60)
    for idx, row in df.head(5).iterrows():
        print(f"\nSKU: {row['sku']}")
        print(f"  Product: {row['product_name']}")
        print(f"  Variant ID: {row['variant_id']}")
        print(f"  Image: {row['image_url_thumbnail'][:80]}...")

    return df

# Main execution
if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("BigCommerce SKU-SPECIFIC Image Export Tool")
    print("(Variant Images Only)")
    print("=" * 60)

    # Validate configuration
    if STORE_HASH == "your_store_hash_here" or API_TOKEN == "your_api_token_here":
        print("\n❌ ERROR: Please update STORE_HASH and API_TOKEN with your actual values!")
    else:
        print(f"\nStore Hash: {STORE_HASH}")
        print(f"API Version: {API_VERSION}")
        print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

        # Fetch data
        sku_image_data = get_all_sku_specific_images()

        # Export to CSV
        if sku_image_data:
            df = export_to_csv(sku_image_data)

            # Create additional summary report
            print("\n" + "=" * 60)
            print("Creating summary report...")

            if df is not None:
                summary_df = df.groupby('sku').agg({
                    'product_name': 'first',
                    'variant_id': 'first',
                    'image_url_standard': 'first',
                    'image_url_thumbnail': 'first'
                }).reset_index()

                summary_filename = 'sku_images_summary.csv'
                summary_df.to_csv(summary_filename, index=False)
                print(f"✓ Summary saved to '{summary_filename}'")
        else:
            print("\n❌ No data retrieved. Please check:")
            print("  1. Your API credentials are correct")
            print("  2. Your store hash is correct")
            print("  3. You have products with variants in your store")
            print("  4. Your API token has 'Products' read permissions")

        print(f"\nEnd Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print("=" * 60)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    ✓ SKU: 54826 - Image found
    ✓ SKU: 54827 - Image found
    ✓ SKU: 54828 - Image found
    ✓ SKU: 54829 - Image found
  Product: Half Bak'd THC-A 2g Cart (ID: 1544) - 5 variants
    ✓ SKU: 54831 - Image found
    ✓ SKU: 54832 - Image found
    ✓ SKU: 54833 - Image found
    ✓ SKU: 54834 - Image found
    ✓ SKU: 54889 - Image found
  Product: Cake Glow 3g Cartridges (ID: 1545) - 12 variants
    ✓ SKU: 55188 - Image found
    ✓ SKU: 55189 - Image found
    ✓ SKU: 55190 - Image found
    ✓ SKU: 55191 - Image found
    ✓ SKU: 55192 - Image found
    ✓ SKU: 55193 - Image found
    ✓ SKU: 55194 - Image found
    ✓ SKU: 55195 - Image found
    ✓ SKU: 55196 - Image found
    ✓ SKU: 55197 - Image found
    ✓ SKU: 55198 - Image found
    ✓ SKU: 55199 - Image found
  Product: Cake Stax 3g Disposable (ID: 1546) - 12 variants
    ✓ SKU: 55200 - Image found
    ✓ SKU: 55201 - Image found
    ✓ SKU: 55202 - Image found
    ✓ SKU: 