In [1]:
import urllib.request
import json
import requests
import string
import time
import random
from collections import deque
import logging

In [13]:
url = "http://35.200.185.69:8000"
response = urllib.request.urlopen(url)
result = json.loads(response.read())
result

{'message': 'This is the root endpoint of the API.',
 'tips': ['Try different endpoints (make a guess) to get started.',
  'Explore the different API versions also.']}

In [14]:
# Function to check if an endpoint returns data
def check_endpoint(version, query="a"):
    url = f"http://35.200.185.69:8000/{version}/autocomplete?query={query}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data:
            print(f"Data found at {version}: {data}")
            return True
        else:
            print(f"No data at {version}")
            return False
    else:
        print(f"Error with {version} - Status Code: {response.status_code}")
        return False

# Function to test multiple API versions
def test_versions():
    versions = ['v1', 'v2', 'v3', 'v4', 'v5']  # Adding v4 and v5 to the list
    
    for version in versions:
        print(f"Testing {version}...")
        check_endpoint(version)

# Run the test
test_versions()


Testing v1...
Data found at v1: {'version': 'v1', 'count': 10, 'results': ['aa', 'aabdknlvkc', 'aabrkcd', 'aadgdqrwdy', 'aagqg', 'aaiha', 'aainmxg', 'aajfebume', 'aajwv', 'aakfubvxv']}
Testing v2...
Data found at v2: {'version': 'v2', 'count': 12, 'results': ['a0', 'a09p36zjy', 'a0d2vhq3i', 'a0ft3ec1tl', 'a0lnv81gm', 'a0pnt1', 'a0qm', 'a0twzs6', 'a1nvj3fpg', 'a1x1', 'a2', 'a2cqmcc7']}
Testing v3...
Data found at v3: {'version': 'v3', 'count': 15, 'results': ['a', 'a e+skbrns', 'a ifs1.-', 'a+woz7', 'a-.', 'a-g z', 'a-m.ffwo', 'a-o80', 'a.', 'a.-gowx3d', 'a..rmw83', 'a.1kh g', 'a.2xf', 'a.c', 'a.gi3m']}
Testing v4...
Error with v4 - Status Code: 404
Testing v5...
Error with v5 - Status Code: 404


NAME EXTRACTION FROM V1 VERSION OF API

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class RateLimitException(Exception):
    pass

def make_request(url, max_retries=5, initial_delay=1):
    retries = 0
    while retries < max_retries:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response
            elif response.status_code == 429:  # Too Many Requests
                raise RateLimitException
            else:
                logger.warning(f"Unexpected status code: {response.status_code}")
                return None
        except RateLimitException:
            delay = initial_delay * (2 ** retries) + random.uniform(0, 1)
            logger.info(f"Rate limited. Retrying in {delay:.2f} seconds")
            time.sleep(delay)
            retries += 1
        except requests.RequestException as e:
            logger.error(f"Request failed: {e}")
            return None
    
    logger.error("Max retries reached. Giving up.")
    return None

def extract_all_names():
    base_url = "http://35.200.185.69:8000/v1/autocomplete"
    all_names = set()
    visited_prefixes = set()
    request_count = 0
    
    # First check which single characters return results
    valid_first_chars = []
    logger.info("Starting single character search...")
    
    for char in string.ascii_lowercase:
        response = make_request(f"{base_url}?query={char}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # If we got any results, this is a valid first character
            if results:
                valid_first_chars.append(char)
                logger.info(f"Character '{char}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
            else:
                logger.info(f"Character '{char}' has no results. Total unique names: {len(all_names)}")
    
    logger.info(f"Completed single character search. Valid characters: {', '.join(valid_first_chars)}")
    logger.info(f"Current total unique names: {len(all_names)}")
    
    # Now explore further with BFS, but only for valid first characters
    queue = deque()
    for char in valid_first_chars:
        visited_prefixes.add(char)
        # Add two-letter combinations for valid first characters
        for second_char in string.ascii_lowercase:
            queue.append(char + second_char)
    
    logger.info(f"Starting multi-character search with {len(queue)} prefixes in queue...")
    
    while queue:
        prefix = queue.popleft()
        
        if prefix in visited_prefixes:
            continue
            
        visited_prefixes.add(prefix)
        
        # Make API request
        response = make_request(f"{base_url}?query={prefix}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # Only explore further if we got any results
            if results:
                logger.info(f"Prefix '{prefix}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
                
                # If we got exactly 10 results (max limit), we need to explore further
                if len(results) == 10:
                    # Add new prefixes to the queue by extending current prefix
                    for char in string.ascii_lowercase:
                        new_prefix = prefix + char
                        if new_prefix not in visited_prefixes:
                            queue.append(new_prefix)
            else:
                logger.info(f"Prefix '{prefix}' has no results. Total unique names: {len(all_names)}")
        
        if request_count % 50 == 0:
            logger.info(f"PROGRESS: {request_count} requests made. Found {len(all_names)} unique names. Queue size: {len(queue)}")
    
    logger.info(f"Extraction complete. Total requests made: {request_count}")
    return all_names, request_count

if __name__ == "__main__":
    start_time = time.time()
    names, request_count = extract_all_names()
    end_time = time.time()
    
    logger.info(f"SUMMARY: Total unique names found: {len(names)}")
    logger.info(f"SUMMARY: Total API requests made: {request_count}")
    logger.info(f"SUMMARY: Time taken: {end_time - start_time:.2f} seconds")



INFO:__main__:Starting single character search...
INFO:__main__:Character 'a' is valid. Found 10 names, 10 new. Total unique names: 10
INFO:__main__:Character 'b' is valid. Found 10 names, 10 new. Total unique names: 20
INFO:__main__:Character 'c' is valid. Found 10 names, 10 new. Total unique names: 30
INFO:__main__:Character 'd' is valid. Found 10 names, 10 new. Total unique names: 40
INFO:__main__:Character 'e' is valid. Found 10 names, 10 new. Total unique names: 50
INFO:__main__:Character 'f' is valid. Found 10 names, 10 new. Total unique names: 60
INFO:__main__:Character 'g' is valid. Found 10 names, 10 new. Total unique names: 70
INFO:__main__:Character 'h' is valid. Found 10 names, 10 new. Total unique names: 80
INFO:__main__:Character 'i' is valid. Found 10 names, 10 new. Total unique names: 90
INFO:__main__:Character 'j' is valid. Found 10 names, 10 new. Total unique names: 100
INFO:__main__:Character 'k' is valid. Found 10 names, 10 new. Total unique names: 110
INFO:__main__

NAME EXTRACTION FROM V2 VERSION OF API

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class RateLimitException(Exception):
    pass

def make_request(url, max_retries=5, initial_delay=1):
    retries = 0
    while retries < max_retries:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response
            elif response.status_code == 429:  # Too Many Requests
                raise RateLimitException
            else:
                logger.warning(f"Unexpected status code: {response.status_code}")
                return None
        except RateLimitException:
            delay = initial_delay * (2 ** retries) + random.uniform(0, 1)
            logger.info(f"Rate limited. Retrying in {delay:.2f} seconds")
            time.sleep(delay)
            retries += 1
        except requests.RequestException as e:
            logger.error(f"Request failed: {e}")
            return None
    
    logger.error("Max retries reached. Giving up.")
    return None

def extract_all_names():
    base_url = "http://35.200.185.69:8000/v2/autocomplete"  # Updated to v2 endpoint
    all_names = set()
    visited_prefixes = set()
    request_count = 0
    
    # Include digits along with lowercase letters for v2
    valid_chars = string.ascii_lowercase + string.digits
    
    # First check which single characters return results
    valid_first_chars = []
    logger.info("Starting single character search...")
    
    for char in valid_chars:
        response = make_request(f"{base_url}?query={char}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # If we got any results, this is a valid first character
            if results:
                valid_first_chars.append(char)
                logger.info(f"Character '{char}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
            else:
                logger.info(f"Character '{char}' has no results. Total unique names: {len(all_names)}")
    
    logger.info(f"Completed single character search. Valid characters: {', '.join(valid_first_chars)}")
    logger.info(f"Current total unique names: {len(all_names)}")
    
    # Now explore further with BFS, but only for valid first characters
    queue = deque()
    for char in valid_first_chars:
        visited_prefixes.add(char)
        # Add two-character combinations for valid first characters
        for second_char in valid_chars:
            queue.append(char + second_char)
    
    logger.info(f"Starting multi-character search with {len(queue)} prefixes in queue...")
    
    while queue:
        prefix = queue.popleft()
        
        if prefix in visited_prefixes:
            continue
            
        visited_prefixes.add(prefix)
        
        # Make API request
        response = make_request(f"{base_url}?query={prefix}")
        request_count += 1
        
        if response:
            data = response.json()
            results = data.get('results', [])
            
            # Add results to our collection
            previous_count = len(all_names)
            for result in results:
                all_names.add(result)
            new_names = len(all_names) - previous_count
            
            # Only explore further if we got any results
            if results:
                logger.info(f"Prefix '{prefix}' is valid. Found {len(results)} names, {new_names} new. Total unique names: {len(all_names)}")
                
                # If we got exactly 10 results (max limit), we need to explore further
                if len(results) == 12:
                    # Add new prefixes to the queue by extending current prefix
                    for char in valid_chars:  # Use valid_chars instead of just letters
                        new_prefix = prefix + char
                        if new_prefix not in visited_prefixes:
                            queue.append(new_prefix)
            else:
                logger.info(f"Prefix '{prefix}' has no results. Total unique names: {len(all_names)}")
        
        if request_count % 50 == 0:
            logger.info(f"PROGRESS: {request_count} requests made. Found {len(all_names)} unique names. Queue size: {len(queue)}")
    
    logger.info(f"Extraction complete. Total requests made: {request_count}")
    return all_names, request_count

if __name__ == "__main__":
    start_time = time.time()
    names, request_count = extract_all_names()
    end_time = time.time()
    
    logger.info(f"SUMMARY: Total unique names found: {len(names)}")
    logger.info(f"SUMMARY: Total API requests made: {request_count}")
    logger.info(f"SUMMARY: Time taken: {end_time - start_time:.2f} seconds")
    
    # Optionally save the results to a file
    with open("autocomplete_names_v2.txt", "w") as f:
        for name in sorted(names):
            f.write(f"{name}\n")
    logger.info(f"Results saved to autocomplete_names_v2.txt")




INFO:__main__:Starting single character search...
INFO:__main__:Character 'a' is valid. Found 12 names, 12 new. Total unique names: 12
INFO:__main__:Character 'b' is valid. Found 12 names, 12 new. Total unique names: 24
INFO:__main__:Character 'c' is valid. Found 12 names, 12 new. Total unique names: 36
INFO:__main__:Character 'd' is valid. Found 12 names, 12 new. Total unique names: 48
INFO:__main__:Character 'e' is valid. Found 12 names, 12 new. Total unique names: 60
INFO:__main__:Character 'f' is valid. Found 12 names, 12 new. Total unique names: 72
INFO:__main__:Character 'g' is valid. Found 12 names, 12 new. Total unique names: 84
INFO:__main__:Character 'h' is valid. Found 12 names, 12 new. Total unique names: 96
INFO:__main__:Character 'i' is valid. Found 12 names, 12 new. Total unique names: 108
INFO:__main__:Character 'j' is valid. Found 12 names, 12 new. Total unique names: 120
INFO:__main__:Character 'k' is valid. Found 12 names, 12 new. Total unique names: 132
INFO:__main_