In [1]:
import requests
import time
from collections import deque
import string

In [None]:
API_URL = "http://35.200.185.69:8000/v2/autocomplete"

REQUESTS_PER_MINUTE = 100
DELAY = 60 / REQUESTS_PER_MINUTE
MAX_RETRIES = 3
RETRY_DELAY = 30

# Initialize search queue with numbers (0-9) and lowercase alphabets (a-z)
search_queue = deque()
found_names = set()
visited_queries = set()

characters = string.digits + string.ascii_lowercase  # '0123456789abcdefghijklmnopqrstuvwxyz'

for c in characters:
    search_queue.append(c)

In [None]:
def fetch_names(prefix):
    for attempt in range(MAX_RETRIES):
        try:
            response = requests.get(API_URL, params={'query': prefix}, timeout=10)
            
            if response.status_code == 200:
                return response.json().get('results', [])
            elif response.status_code == 429:
                retry_after = int(response.headers.get('Retry-After', RETRY_DELAY))
                print(f"Rate limited. Retrying after {retry_after} seconds...")
                time.sleep(retry_after)
            else:
                print(f"Error {response.status_code} for '{prefix}': {response.text}")
                return []
        except requests.exceptions.RequestException as e:
            print(f"Request failed for '{prefix}': {e}")
            time.sleep(RETRY_DELAY)
    
    print(f"Max retries exceeded for prefix '{prefix}'")
    return []

In [None]:
def extract_all_names():
    global found_names
    total_requests = 0
    start_time = time.time()

    while search_queue:
        query = search_queue.popleft()
        if query in visited_queries:
            continue
        visited_queries.add(query)
        
        names = fetch_names(query)
        total_requests += 1

        for name in names:
            if name not in found_names:
                found_names.add(name)

        if len(names) == 12:  # Update for v2: Checking for 12 words instead of 10
            twelfth_word = names[11]
            if len(twelfth_word) > len(query):
                next_char = twelfth_word[len(query)]

                for letter in characters[characters.index(next_char):]:  # Continue from the last found character
                    new_query = query + letter
                    if new_query not in visited_queries:
                        search_queue.append(new_query)
                        
        print(f"Queried: '{query}' → Found {len(names)} names, Total: {len(found_names)}")
        
        # Proper rate limit handling
        if total_requests % REQUESTS_PER_MINUTE == 0:
            elapsed_time = time.time() - start_time
            if elapsed_time < 60:
                sleep_time = 60 - elapsed_time
                print(f"Rate limit reached. Sleeping for {sleep_time:.2f} seconds...")
                time.sleep(sleep_time)
            # Reset start time only after sleeping
            start_time = time.time() 

    return found_names, total_requests

In [None]:
if __name__ == "__main__":
    all_names, request_count = extract_all_names()
    
    # Save results
    with open("extracted_names_v2.txt", "w") as f:
        for name in sorted(all_names):
            f.write(name + "\\n")
    
    print(f"\\nTotal unique names found: {len(all_names)}")
    print(f"Total API requests made: {request_count}")