# Neptune Connection Testing

This notebook tests direct connection to Neptune to debug connection issues.

In [None]:
import os
import time
import boto3
import logging
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.protocol import GremlinServerError
from gremlin_python.process.anonymous_traversal import traversal

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

## 1. Connection Configuration

In [None]:
# Neptune endpoint from existing cluster
ENDPOINT = "graph-rag-originofcovid19dataset-benchmark.cluster-c7m8ay28gj4o.us-west-2.neptune.amazonaws.com"

# Connection settings
MAX_RETRIES = 5
RETRY_DELAY = 5.0
CONNECTION_TIMEOUT = 60  # seconds

## 2. Test Network Connectivity

In [None]:
import socket

def test_network(endpoint):
    """Test basic network connectivity to endpoint."""
    try:
        # Try DNS resolution
        print(f"Resolving {endpoint}...")
        ip = socket.gethostbyname(endpoint)
        print(f"DNS resolution successful: {ip}")
        
        # Try TCP connection
        print("Testing TCP connection...")
        sock = socket.create_connection((endpoint, 8182), timeout=10)
        sock.close()
        print("TCP connection successful")
        
        return True
        
    except socket.gaierror as e:
        print(f"DNS resolution failed: {str(e)}")
        return False
    except socket.timeout as e:
        print(f"Connection timed out: {str(e)}")
        return False
    except Exception as e:
        print(f"Network test failed: {str(e)}")
        return False

# Test network connectivity
test_network(ENDPOINT)

## 3. Test IAM Permissions

In [None]:
def test_iam_permissions():
    """Test IAM permissions for Neptune access."""
    try:
        # Get identity
        sts = boto3.client('sts')
        identity = sts.get_caller_identity()
        print(f"Current identity: {identity['Arn']}")
        
        # List Neptune resources
        neptune = boto3.client('neptune')
        clusters = neptune.describe_db_clusters()
        print("\nAccessible Neptune clusters:")
        for cluster in clusters['DBClusters']:
            print(f"- {cluster['DBClusterIdentifier']}")
            
        return True
        
    except Exception as e:
        print(f"IAM permission test failed: {str(e)}")
        return False

# Test IAM permissions
test_iam_permissions()

## 4. Test Direct Connection

In [None]:
def test_connection(endpoint, max_retries=5, retry_delay=5.0, timeout=60):
    """Test Neptune connection with detailed logging."""
    
    database_url = f"wss://{endpoint}:8182/gremlin"
    logger.info(f"Attempting to connect to: {database_url}")
    
    # Get AWS credentials
    try:
        creds = boto3.Session().get_credentials().get_frozen_credentials()
        logger.info("AWS credentials retrieved successfully")
    except Exception as e:
        logger.error(f"Failed to get AWS credentials: {str(e)}")
        raise
    
    # Set up request with IAM auth
    try:
        request = AWSRequest(method="GET", url=database_url)
        SigV4Auth(creds, "neptune-db", boto3.Session().region_name).add_auth(request)
        logger.info("Request signed with SigV4")
    except Exception as e:
        logger.error(f"Failed to set up authentication: {str(e)}")
        raise
    
    # Attempt connection with retries
    last_error = None
    for attempt in range(max_retries):
        try:
            logger.info(f"Connection attempt {attempt + 1}/{max_retries}")
            
            # Initialize connection
            connection = DriverRemoteConnection(
                database_url,
                'g',
                headers=request.headers.items(),
                message_timeout=timeout
            )
            logger.info("Connection initialized")
            
            # Create traversal source
            g = traversal().withRemote(connection)
            logger.info("Traversal source created")
            
            # Test query
            logger.info("Testing connection with simple query...")
            result = g.V().limit(1).toList()
            logger.info(f"Query successful! Result: {result}")
            
            return connection, g
            
        except Exception as e:
            last_error = e
            logger.error(f"Attempt {attempt + 1} failed: {str(e)}")
            
            # Clean up connection on failure
            if 'connection' in locals():
                try:
                    connection.close()
                except:
                    pass
            
            if attempt < max_retries - 1:
                delay = min(60, retry_delay * (2 ** attempt))
                logger.info(f"Waiting {delay} seconds before retry...")
                time.sleep(delay)
            else:
                logger.error("All connection attempts failed")
                raise ConnectionError(
                    f"Failed to connect to Neptune after {max_retries} attempts"
                ) from last_error

In [None]:
# Test connection with current settings
try:
    connection, g = test_connection(
        endpoint=ENDPOINT,
        max_retries=MAX_RETRIES,
        retry_delay=RETRY_DELAY,
        timeout=CONNECTION_TIMEOUT
    )
    print("Connection successful!")
    
    # Clean up
    connection.close()
except Exception as e:
    print(f"Connection failed: {str(e)}")