In [None]:
! pip install requests

Collecting requests
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Downloading idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Downloading urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Downloading requests-2.32.3-py3-none-any.whl (64 kB)
Downloading certifi-2025.1.31-py3-none-any.whl (166 kB)
Downloading charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl (197 kB)
Downloading idna-3.10-py3-none-any.whl (70 kB)
Downloading urllib3-2.3.0-py3-none-any.whl (128 kB)
Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
Successfully installed certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10

In [17]:
import requests
import json
import os
import time
import logging
from datetime import datetime

# Eventbrite API Configuration
# Replace with your OAuth token
OAUTH_TOKEN = "XKXB3ZZXDRXQFUN5ONMI"
# Replace with the event ID you want to scrape
EVENT_ID = "1246852341919"

# API request headers
headers = {
    "Authorization": f"Bearer {OAUTH_TOKEN}",
    "Content-Type": "application/json"
}

# Base URL for Eventbrite API
BASE_URL = "https://www.eventbriteapi.com/v3"

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("eventbrite_api.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


def validate_token():
    """
    Validate the OAuth token by making a request to the user endpoint
    
    Returns:
        bool: True if token is valid, False otherwise
    """
    url = f"{BASE_URL}/users/me/"
    logger.info("Validating OAuth token")
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        user_data = response.json()
        logger.info(f"Token validated successfully. User: {user_data.get('name', 'Unknown')}")
        return True
    except requests.exceptions.RequestException as e:
        logger.error(f"Token validation failed: {e}")
        return False


def make_api_request(url, max_retries=3, backoff_factor=1.5):
    """
    Make API request with retry logic for rate limiting
    
    Args:
        url (str): The API endpoint URL
        max_retries (int): Maximum number of retry attempts
        backoff_factor (float): Backoff factor for exponential delay
        
    Returns:
        dict: Response data or None if all retries fail
    """
    for attempt in range(max_retries):
        try:
            logger.debug(f"Making API request to: {url}")
            response = requests.get(url, headers=headers)
            
            # If rate limited, wait and retry
            if response.status_code == 429:
                wait_time = backoff_factor ** attempt
                logger.warning(f"Rate limited. Waiting {wait_time} seconds before retry.")
                time.sleep(wait_time)
                continue
                
            response.raise_for_status()
            return response.json()
            
        except requests.exceptions.RequestException as e:
            logger.error(f"Request failed (attempt {attempt+1}/{max_retries}): {e}")
            
            if attempt < max_retries - 1:
                wait_time = backoff_factor ** attempt
                logger.info(f"Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                logger.error("All retry attempts failed")
                if hasattr(e, 'response') and e.response is not None:
                    logger.debug(f"Error response: {e.response.text}")
                return None


def get_event_details(event_id):
    """
    Fetch detailed information about an event
    
    Args:
        event_id (str): The Eventbrite event ID
        
    Returns:
        dict: Event details or None if request fails
    """
    url = f"{BASE_URL}/events/{event_id}/"
    logger.info(f"Fetching event details for event ID: {event_id}")
    return make_api_request(url)


def get_ticket_classes(event_id):
    """
    Fetch ticket class information for an event
    
    Args:
        event_id (str): The Eventbrite event ID
        
    Returns:
        dict: Ticket class information or error details
    """
    url = f"{BASE_URL}/events/{event_id}/ticket_classes/"
    logger.info(f"Fetching ticket classes for event ID: {event_id}")
    
    data = make_api_request(url)
    if data:
        logger.info(f"Successfully retrieved {len(data.get('ticket_classes', []))} ticket classes")
        return data
    else:
        return {"error": "Failed to retrieve ticket classes"}


def get_venue_details(venue_id):
    """
    Fetch venue information for an event
    
    Args:
        venue_id (str): The Eventbrite venue ID
        
    Returns:
        dict: Venue details or None if request fails
    """
    url = f"{BASE_URL}/venues/{venue_id}/"
    logger.info(f"Fetching venue details for venue ID: {venue_id}")
    return make_api_request(url)


def get_organizer_details(organizer_id):
    """
    Fetch organizer information
    
    Args:
        organizer_id (str): The Eventbrite organizer ID
        
    Returns:
        dict: Organizer details or None if request fails
    """
    url = f"{BASE_URL}/organizers/{organizer_id}/"
    logger.info(f"Fetching organizer details for ID: {organizer_id}")
    return make_api_request(url)


def get_attendees(event_id, page=1, continuation=None):
    """
    Fetch attendee information for an event (requires appropriate permissions)
    
    Args:
        event_id (str): The Eventbrite event ID
        page (int): Page number for pagination
        continuation (str): Continuation token for pagination
        
    Returns:
        dict: Attendee information or None if request fails
    """
    base_url = f"{BASE_URL}/events/{event_id}/attendees/"
    
    # Add pagination parameters
    if continuation:
        url = f"{base_url}?continuation={continuation}"
    else:
        url = f"{base_url}?page={page}"
    
    logger.info(f"Fetching attendees for event ID: {event_id}, page: {page}")
    
    data = make_api_request(url)
    if not data:
        return None
    
    # Check if we need to fetch more pages
    pagination = data.get('pagination', {})
    has_more_items = pagination.get('has_more_items', False)
    continuation_token = pagination.get('continuation', None)
    
    if has_more_items and continuation_token:
        logger.info("Fetching additional attendee pages")
        next_page_data = get_attendees(event_id, page + 1, continuation_token)
        if next_page_data and 'attendees' in next_page_data:
            data['attendees'].extend(next_page_data['attendees'])
    
    logger.info(f"Successfully retrieved attendee data. Total attendees: {len(data.get('attendees', []))}")
    return data


def get_event_with_expansions(event_id):
    """
    Fetch event details with expanded information in a single request
    
    Args:
        event_id (str): The Eventbrite event ID
        
    Returns:
        dict: Comprehensive event details or None if request fails
    """
    # Use expand parameter to get related information in a single request
    url = f"{BASE_URL}/events/{event_id}/?expand=organizer,venue,ticket_classes,category,subcategory,format"
    logger.info(f"Fetching expanded event details for event ID: {event_id}")
    return make_api_request(url)


def get_event_orders(event_id, page=1):
    """
    Fetch order information for an event (requires appropriate permissions)
    
    Args:
        event_id (str): The Eventbrite event ID
        page (int): Page number for pagination
        
    Returns:
        dict: Order information or None if request fails
    """
    url = f"{BASE_URL}/events/{event_id}/orders/?page={page}"
    logger.info(f"Fetching orders for event ID: {event_id}, page: {page}")
    
    data = make_api_request(url)
    if not data:
        return None
    
    # Check if we need to fetch more pages
    pagination = data.get('pagination', {})
    page_count = pagination.get('page_count', 1)
    
    if page < page_count:
        logger.info(f"Fetching additional order pages. Current: {page}, Total: {page_count}")
        next_page_data = get_event_orders(event_id, page + 1)
        if next_page_data and 'orders' in next_page_data:
            data['orders'].extend(next_page_data['orders'])
    
    logger.info(f"Successfully retrieved order data. Total orders: {len(data.get('orders', []))}")
    return data


def analyze_event_data(event_data):
    """
    Process and analyze event data
    
    Args:
        event_data (dict): The complete event data
        
    Returns:
        dict: Processed and analyzed data
    """
    # Extract basic event information
    event = event_data.get('event', {})
    tickets = event_data.get('tickets', {}).get('ticket_classes', [])
    
    # Calculate ticket statistics
    free_tickets = [t for t in tickets if t.get('free', False)]
    paid_tickets = [t for t in tickets if not t.get('free', False)]
    
    # Get price range
    if paid_tickets:
        prices = [float(t.get('cost', {}).get('value', 0)) / 100 for t in paid_tickets if t.get('cost', {}).get('value')]
        min_price = min(prices) if prices else 0
        max_price = max(prices) if prices else 0
    else:
        min_price = max_price = 0
    
    # Format event date and time
    start = event.get('start', {})
    end = event.get('end', {})
    
    analysis = {
        "event_name": event.get('name', {}).get('text', 'Unknown'),
        "event_description": event.get('description', {}).get('text', 'No description')[:100] + "..." if event.get('description', {}).get('text') else "No description",
        "ticket_summary": {
            "total_ticket_types": len(tickets),
            "free_ticket_types": len(free_tickets),
            "paid_ticket_types": len(paid_tickets),
            "price_range": f"${min_price} - ${max_price}" if min_price != max_price else f"${min_price}"
        },
        "event_timing": {
            "start_date": start.get('local', 'Unknown'),
            "end_date": end.get('local', 'Unknown'),
            "timezone": start.get('timezone', 'Unknown')
        },
        "status": event.get('status', 'Unknown'),
        "is_online": event.get('online_event', False),
        "capacity": event.get('capacity', 'Unknown'),
        "url": event.get('url', 'Unknown')
    }
    
    return analysis


def format_ticket_info(ticket_classes):
    """
    Format ticket class information for better readability
    
    Args:
        ticket_classes (list): List of ticket class objects
        
    Returns:
        list: Formatted ticket information
    """
    formatted_tickets = []
    
    for ticket in ticket_classes:
        formatted_ticket = {
            "name": ticket.get('name', 'Unknown'),
            "description": ticket.get('description', 'No description'),
            "free": ticket.get('free', False),
            "quantity_total": ticket.get('quantity_total', 0),
            "quantity_sold": ticket.get('quantity_sold', 0),
            "sales_start": ticket.get('sales_start', 'Unknown'),
            "sales_end": ticket.get('sales_end', 'Unknown')
        }
        
        # Add cost information if it's a paid ticket
        if not ticket.get('free', False) and 'cost' in ticket:
            cost = ticket['cost']
            formatted_ticket["price"] = {
                "value": float(cost.get('value', 0)) / 100,  # Convert from cents to dollars/currency
                "currency": cost.get('currency', 'USD'),
                "display": cost.get('display', 'Unknown')
            }
        
        formatted_tickets.append(formatted_ticket)
    
    return formatted_tickets


def main():
    """Main function to orchestrate the data collection process"""
    # Create directory for saving data
    output_dir = "eventbrite_data"
    os.makedirs(output_dir, exist_ok=True)
    
    # Get current timestamp for filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    logger.info(f"Starting data collection for event ID: {EVENT_ID}")
    
    # Validate token before making other requests
    if not validate_token():
        logger.error("Invalid OAuth token. Please check your token and try again.")
        return
    
    # Try to get event with expansions first (more efficient)
    expanded_event_data = get_event_with_expansions(EVENT_ID)
    
    if expanded_event_data:
        logger.info(f"Successfully retrieved expanded event data: {expanded_event_data.get('name', {}).get('text', 'Unknown')}")
        
        # Extract and format ticket information
        ticket_classes = expanded_event_data.get('ticket_classes', [])
        formatted_tickets = format_ticket_info(ticket_classes)
        
        # Replace the raw ticket data with formatted version
        expanded_event_data['formatted_tickets'] = formatted_tickets
        
        # Create analysis of the event data
        event_analysis = analyze_event_data({"event": expanded_event_data, "tickets": {"ticket_classes": ticket_classes}})
        expanded_event_data['analysis'] = event_analysis
        
        # Save to JSON file
        filename = f"{output_dir}/event_{EVENT_ID}_{timestamp}_expanded.json"
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(expanded_event_data, f, ensure_ascii=False, indent=4)
        
        logger.info(f"Expanded data saved to file: {filename}")
        
        # Print summary
        print("\n=== Event Summary ===")
        print(f"Event Name: {event_analysis['event_name']}")
        print(f"Start Date: {event_analysis['event_timing']['start_date']}")
        print(f"Ticket Types: {event_analysis['ticket_summary']['total_ticket_types']}")
        print(f"Price Range: {event_analysis['ticket_summary']['price_range']}")
        print(f"Data saved to: {filename}")
        
    else:
        logger.warning("Failed to get expanded event data. Falling back to individual requests.")
        
        # Get event details
        event_data = get_event_details(EVENT_ID)
        if not event_data:
            logger.error("Failed to retrieve event data. Exiting.")
            return
        
        logger.info(f"Successfully retrieved event information: {event_data.get('name', {}).get('text', 'Unknown')}")
        
        # Try to get venue information
        venue_data = None
        if 'venue_id' in event_data:
            venue_data = get_venue_details(event_data['venue_id'])
            if venue_data:
                logger.info(f"Successfully retrieved venue information: {venue_data.get('name', 'Unknown')}")
        
        # Try to get organizer information
        organizer_data = None
        if 'organizer_id' in event_data:
            organizer_data = get_organizer_details(event_data['organizer_id'])
            if organizer_data:
                logger.info(f"Successfully retrieved organizer information: {organizer_data.get('name', 'Unknown')}")
        
        # Try to get ticket class information
        ticket_data = get_ticket_classes(EVENT_ID)
        if ticket_data and 'error' not in ticket_data:
            ticket_classes = ticket_data.get('ticket_classes', [])
            logger.info(f"Successfully retrieved ticket information, {len(ticket_classes)} ticket types")
            
            # Format ticket information
            formatted_tickets = format_ticket_info(ticket_classes)
            ticket_data['formatted_tickets'] = formatted_tickets
        
        # Combine all data
        complete_data = {
            "event": event_data,
            "venue": venue_data,
            "organizer": organizer_data,
            "tickets": ticket_data
        }
        
        # Create analysis of the event data
        event_analysis = analyze_event_data(complete_data)
        complete_data['analysis'] = event_analysis
        
        # Save to JSON file
        filename = f"{output_dir}/event_{EVENT_ID}_{timestamp}.json"
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(complete_data, f, ensure_ascii=False, indent=4)
        
        logger.info(f"Data saved to file: {filename}")
        
        # Print summary
        print("\n=== Event Summary ===")
        print(f"Event Name: {event_analysis['event_name']}")
        print(f"Start Date: {event_analysis['event_timing']['start_date']}")
        print(f"Ticket Types: {event_analysis['ticket_summary']['total_ticket_types']}")
        print(f"Price Range: {event_analysis['ticket_summary']['price_range']}")
        print(f"Data saved to: {filename}")
    
    # Optionally get attendees (requires appropriate permissions)
    try_attendees = False  # Set to True if you want to try fetching attendees
    if try_attendees:
        logger.info("Attempting to fetch attendee data (requires appropriate permissions)")
        attendees_data = get_attendees(EVENT_ID)
        if attendees_data and 'attendees' in attendees_data:
            attendee_count = len(attendees_data['attendees'])
            logger.info(f"Successfully retrieved {attendee_count} attendees")
            
            # Save attendees to separate file
            attendees_filename = f"{output_dir}/event_{EVENT_ID}_{timestamp}_attendees.json"
            with open(attendees_filename, "w", encoding="utf-8") as f:
                json.dump(attendees_data, f, ensure_ascii=False, indent=4)
            
            logger.info(f"Attendee data saved to file: {attendees_filename}")
        else:
            logger.warning("Failed to retrieve attendee data. This may be due to permission restrictions.")
    
    logger.info("Data collection complete")


if __name__ == "__main__":
    main()


2025-03-20 16:30:56,574 - INFO - Starting data collection for event ID: 1246852341919
2025-03-20 16:30:56,575 - INFO - Validating OAuth token
2025-03-20 16:30:56,830 - INFO - Token validated successfully. User: Nathan Cao
2025-03-20 16:30:56,832 - INFO - Fetching expanded event details for event ID: 1246852341919
2025-03-20 16:30:57,410 - INFO - Successfully retrieved expanded event data: SET with EELKE KLEIJN (DAYS Like NIGHTS) at The Great Northern SF
2025-03-20 16:30:57,414 - INFO - Expanded data saved to file: eventbrite_data/event_1246852341919_20250320_163056_expanded.json
2025-03-20 16:30:57,415 - INFO - Data collection complete



=== Event Summary ===
Event Name: SET with EELKE KLEIJN (DAYS Like NIGHTS) at The Great Northern SF
Start Date: 2025-03-22T21:30:00
Ticket Types: 6
Price Range: $15.0 - $40.0
Data saved to: eventbrite_data/event_1246852341919_20250320_163056_expanded.json
