In [1]:
import requests
import string
import time
import random
from collections import deque
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class RateLimitException(Exception):
    pass

def make_request(url, max_retries=5, initial_delay=1):
    retries = 0
    while retries < max_retries:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response
            elif response.status_code == 429:  # Too Many Requests
                raise RateLimitException
            else:
                logger.warning(f"Unexpected status code: {response.status_code}")
                return None
        except RateLimitException:
            delay = initial_delay * (2 ** retries) + random.uniform(0, 1)
            logger.info(f"Rate limited. Retrying in {delay:.2f} seconds")
            time.sleep(delay)
            retries += 1
        except requests.RequestException as e:
            logger.error(f"Request failed: {e}")
            return None
    
    logger.error("Max retries reached. Giving up.")
    return None

def extract_all_names():
    base_url = "http://35.200.185.69:8000/v3/autocomplete"  # v3 endpoint
    all_names = set()
    visited_prefixes = set()
    request_count = 0
    
    # Include digits, lowercase letters, and special characters for v3
    special_chars = ".-+*"
    valid_chars = string.ascii_lowercase + string.digits + special_chars
    
    # First check which single characters return results
    valid_first_chars = []
    logger.info("Starting single character search...")
    
    for char in valid_chars:
        # For special characters, we need to URL encode them
        encoded_char = requests.utils.quote(char)
        response = make_request(f"{base_url}?query={encoded_char}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # If we got any results, this is a valid first character
            if results:
                valid_first_chars.append(char)
                logger.info(f"Character '{char}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
            else:
                logger.info(f"Character '{char}' has no results. Total unique names: {len(all_names)}")
    
    logger.info(f"Completed single character search. Valid characters: {', '.join(valid_first_chars)}")
    logger.info(f"Current total unique names: {len(all_names)}")
    
    # Now explore further with BFS, but only for valid first characters
    queue = deque()
    for char in valid_first_chars:
        visited_prefixes.add(char)
        # Add two-character combinations for valid first characters
        for second_char in valid_chars:
            queue.append(char + second_char)
    
    logger.info(f"Starting multi-character search with {len(queue)} prefixes in queue...")
    
    while queue:
        prefix = queue.popleft()
        
        if prefix in visited_prefixes:
            continue
            
        visited_prefixes.add(prefix)
        
        # Make API request with URL encoding for special characters
        encoded_prefix = requests.utils.quote(prefix)
        response = make_request(f"{base_url}?query={encoded_prefix}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # Only explore further if we got any results
            if results:
                logger.info(f"Prefix '{prefix}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
                
                # If we got exactly 15 results (max limit), we need to explore further
                if len(results) == 15:  # Updated to 15 from 10
                    # Add new prefixes to the queue by extending current prefix
                    for char in valid_chars:
                        new_prefix = prefix + char
                        if new_prefix not in visited_prefixes:
                            queue.append(new_prefix)
            else:
                logger.info(f"Prefix '{prefix}' has no results. Total unique names: {len(all_names)}")
        
        if request_count % 50 == 0:
            logger.info(f"PROGRESS: {request_count} requests made. Found {len(all_names)} unique names. Queue size: {len(queue)}")
    
    logger.info(f"Extraction complete. Total requests made: {request_count}")
    return all_names, request_count

if __name__ == "__main__":
    start_time = time.time()
    names, request_count = extract_all_names()
    end_time = time.time()
    
    logger.info(f"SUMMARY: Total unique names found: {len(names)}")
    logger.info(f"SUMMARY: Total API requests made: {request_count}")
    logger.info(f"SUMMARY: Time taken: {end_time - start_time:.2f} seconds")
    
    # Optionally save the results to a file
    with open("autocomplete_names_v3.txt", "w") as f:
        for name in sorted(names):
            f.write(f"{name}\n")
    logger.info(f"Results saved to autocomplete_names_v3.txt")


2025-03-24 20:01:02,513 - INFO - Starting single character search...
2025-03-24 20:01:02,620 - INFO - Character 'a' is valid. Found 15 names, 15 new. Total unique names: 15
2025-03-24 20:01:02,723 - INFO - Character 'b' is valid. Found 15 names, 15 new. Total unique names: 30
2025-03-24 20:01:02,895 - INFO - Character 'c' is valid. Found 15 names, 15 new. Total unique names: 45
2025-03-24 20:01:03,027 - INFO - Character 'd' is valid. Found 15 names, 15 new. Total unique names: 60
2025-03-24 20:01:03,127 - INFO - Character 'e' is valid. Found 15 names, 15 new. Total unique names: 75
2025-03-24 20:01:03,269 - INFO - Character 'f' is valid. Found 15 names, 15 new. Total unique names: 90
2025-03-24 20:01:03,372 - INFO - Character 'g' is valid. Found 15 names, 15 new. Total unique names: 105
2025-03-24 20:01:03,472 - INFO - Character 'h' is valid. Found 15 names, 15 new. Total unique names: 120
2025-03-24 20:01:03,613 - INFO - Character 'i' is valid. Found 15 names, 15 new. Total unique nam