In [2]:
print("hello world")

hello world


In [3]:
print("hello world")

hello world


In [4]:
from pathlib import Path

def find_project_root(start: Path, marker: str = "docker-compose.yml") -> Path:
    """
    Walk upwards from start directory to find project root.
    """
    for parent in [start, *start.parents]:
        if (parent / marker).exists():
            return parent
    return None

current_dir = Path.cwd()
project_root = find_project_root(current_dir)

if project_root:
    print(f"✓ Project root detected: {project_root}")
else:
    raise RuntimeError(
        "✗ docker-compose.yml not found.\n"
        "Please run this notebook inside the ARXIV_PAPER_CURATOR project."
    )


✓ Project root detected: /home/thang/projects/ARXIV_PAPER_CURATOR


In [5]:
# Check Docker
import subprocess

try:
    result = subprocess.run(["docker", "--version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ Docker: {result.stdout}")
    else:
        print("✗ Docker: Not working")
        exit()
except:
    print("✗ Docker: Not found")
    exit()

✓ Docker: Docker version 29.1.3, build f52814d



In [6]:
# Check Docker Compose
try:
    result = subprocess.run(["docker", "compose", "version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ Docker Compose: {result.stdout.split()[3]}")
    else:
        print("✗ Docker Compose: Not working")
        exit()
except:
    print("✗ Docker Compose: Not found")
    exit()

✓ Docker Compose: v2.40.3-desktop.1


In [7]:
# Check UV Package Manager
try:
    result = subprocess.run(["uv", "--version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ UV: {result.stdout.strip()}")
        print("\n✓ All required software ready!")
    else:
        print("✗ UV: Not working")
        exit()
except:
    print("✗ UV: Not found")
    exit()

✓ UV: uv 0.9.18

✓ All required software ready!


In [8]:
import subprocess, sys

try:
    result = subprocess.run(
        ["docker", "version", "--format", "{{.Server.Version}}"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        timeout=3,
        text=True,
    )
    if result.returncode == 0:
        print("✓ Docker is running")
    else:
        print("✗ Docker not running")
        sys.exit(1)
except Exception as e:
    print("✗ Docker not accessible:", e)
    sys.exit(1)


✓ Docker is running


In [9]:
# Check Current Containers
import json

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=10
    )
    
    if result.returncode == 0 and result.stdout.strip():
        print("Current containers:")
        for line in result.stdout.strip().split('\n'):
            if line.strip():
                try:
                    container = json.loads(line)
                    service = container.get('Service', 'unknown')
                    state = container.get('State', 'unknown')
                    print(f"  • {service}: {state}")
                except:
                    pass
    else:
        print("No containers running")
        
except Exception as e:
    print("Could not check containers")

Current containers:
  • clickhouse: running
  • langfuse-minio: running
  • langfuse-postgres: running
  • langfuse-redis: running
  • langfuse-web: running
  • langfuse-worker: restarting
  • opensearch: running
  • postgres: running
  • redis: running


In [10]:
import json

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=10
    )
    
    if result.returncode == 0 and result.stdout.strip():
        print("Current containers:")
        for line in result.stdout.strip().split('\n'):
            if line.strip():
                try:
                    container = json.loads(line)
                    service = container.get('Service', 'unknown')
                    state = container.get('State', 'unknown')
                    print(f"  • {service}: {state}")
                except:
                    pass
    else:
        print("No containers running")
        
except Exception as e:
    print("Could not check containers")

Current containers:
  • clickhouse: running
  • langfuse-minio: running
  • langfuse-postgres: running
  • langfuse-redis: running
  • langfuse-web: running
  • langfuse-worker: restarting
  • opensearch: running
  • postgres: running
  • redis: running


In [21]:
# Service Health Check
EXPECTED_SERVICES = {
    'api': 'FastAPI REST API server',
    'postgres': 'PostgreSQL database',
    'opensearch': 'OpenSearch search engine', 
    'opensearch-dashboards': 'OpenSearch web dashboard',
    'ollama': 'Local LLM inference server',
    'airflow': 'Workflow automation (optional - may be off)'
}

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=15
    )
    
    if result.returncode == 0:
        print("SERVICE STATUS")
        print("=" * 70)
        print(f"{'Service':<20} {'State':<15} {'Status':<15} {'Notes'}")
        print("-" * 70)
    else:
        print("Could not get service status")
        exit()
        
except Exception as e:
    print(f"Error checking services: {e}")
    exit()

# Parse Service Status
found_services = set()
service_states = {}

if result.stdout.strip():
    for line in result.stdout.strip().split('\n'):
        if line.strip():
            try:
                container = json.loads(line)
                service = container.get('Service', 'unknown')
                state = container.get('State', 'unknown')
                health = container.get('Health', 'no check')
                
                found_services.add(service)
                service_states[service] = {'state': state, 'health': health}
                
                if state == 'running' and health in ['healthy', 'no check']:
                    indicator = "✓"
                    notes = "Ready"
                elif state == 'running' and health == 'unhealthy':
                    indicator = "⚠"
                    notes = "Starting up..."
                elif state == 'exited':
                    indicator = "✗"
                    notes = "Failed to start"
                else:
                    indicator = "?"
                    notes = f"Status: {state}"
                
                print(f"{indicator} {service:<18} {state:<14} {health:<14} {notes}")
                
            except json.JSONDecodeError:
                pass

SERVICE STATUS
Service              State           Status          Notes
----------------------------------------------------------------------
✓ airflow            running        healthy        Ready
⚠ api                running        unhealthy      Starting up...
✓ clickhouse         running        healthy        Ready
✓ opensearch-dashboards running        healthy        Ready
✓ langfuse-minio     running        healthy        Ready
✓ langfuse-postgres  running        healthy        Ready
✓ langfuse-redis     running        healthy        Ready
⚠ langfuse-web       running        unhealthy      Starting up...
? langfuse-worker    restarting                    Status: restarting
✓ ollama             running        healthy        Ready
✓ opensearch         running        healthy        Ready
✓ postgres           running        healthy        Ready
✓ redis              running        healthy        Ready


In [20]:
# Check Missing Services
missing_services = set(EXPECTED_SERVICES.keys()) - found_services

if missing_services:
    print("\nMISSING SERVICES:")
    print("-" * 70)
    for service in missing_services:
        description = EXPECTED_SERVICES[service]
        if service == 'airflow':
            print(f"⚠ {service:<18} not running    {'(Optional)':<14} {description}")
        else:
            print(f"✗ {service:<18} not running    {'Required':<14} {description}")

failed_services = [s for s, info in service_states.items() 
                  if info['state'] in ['exited', 'restarting'] or info['health'] == 'unhealthy']

if failed_services:
    print(f"\nTROUBLESHOOTING:")
    for service in failed_services:
        print(f"   docker compose logs {service}")
elif missing_services and 'airflow' not in missing_services:
    print(f"\nACTION NEEDED:")
    print("Start missing services: docker compose up -d")


TROUBLESHOOTING:
   docker compose logs api
   docker compose logs langfuse-web
   docker compose logs langfuse-worker


In [17]:
# Test FastAPI Health
import requests

try:
    response = requests.get("http://localhost:8000/health", timeout=5)
    if response.status_code == 200:
        data = response.json()
        print("✓ FastAPI is responding")
        print(f"Status: {data.get('status', 'unknown')}")
    else:
        print(f"⚠ API returned status: {response.status_code}")
except requests.exceptions.ConnectionError:
    print("✗ API not responding - wait 1-2 minutes")
except Exception as e:
    print(f"✗ API test error: {e}")

✗ API not responding - wait 1-2 minutes


In [6]:
# Environment Check
import sys
from pathlib import Path

python_version = sys.version_info
print(f"Python Version: {python_version.major}.{python_version.minor}.{python_version.micro}")
print(f"Environment: {sys.executable}")

if python_version >= (3, 12):
    print("✓ Python version compatible")
else:
    print("✗ Need Python 3.12+")
    exit()

Python Version: 3.12.3
Environment: /home/thang/projects/ARXIV_PAPER_CURATOR/.venv/bin/python
✓ Python version compatible


In [None]:
# Check Docker
import subprocess

try:
    result = subprocess.run(["docker", "--version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ Docker: {result.stdout}")
    else:
        print("✗ Docker: Not working")
        exit()
except:
    print("✗ Docker: Not found")
    exit()

In [None]:
# Check Docker Compose
try:
    result = subprocess.run(["docker", "compose", "version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ Docker Compose: {result.stdout.split()[3]}")
    else:
        print("✗ Docker Compose: Not working")
        exit()
except:
    print("✗ Docker Compose: Not found")
    exit()

In [None]:
 #Check UV Package Manager
try:
    result = subprocess.run(["uv", "--version"], capture_output=True, text=True, timeout=5)
    if result.returncode == 0:
        print(f"✓ UV: {result.stdout.strip()}")
        print("\n✓ All required software ready!")
    else:
        print("✗ UV: Not working")
        exit()
except:
    print("✗ UV: Not found")
    exit()

In [None]:
# Check Docker Running
try:
    result = subprocess.run(["docker", "info"], capture_output=True, timeout=5)
    if result.returncode == 0:
        print("✓ Docker is running")
    else:
        print("✗ Docker not running - start Docker Desktop")
        exit()
except:
    print("✗ Docker daemon not accessible")
    exit()

In [None]:
# Check Current Containers
import json

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=10
    )
    
    if result.returncode == 0 and result.stdout.strip():
        print("Current containers:")
        for line in result.stdout.strip().split('\n'):
            if line.strip():
                try:
                    container = json.loads(line)
                    service = container.get('Service', 'unknown')
                    state = container.get('State', 'unknown')
                    print(f"  • {service}: {state}")
                except:
                    pass
    else:
        print("No containers running")
        
except Exception as e:
    print("Could not check containers")

In [None]:
# Service Health Check
EXPECTED_SERVICES = {
    'api': 'FastAPI REST API server',
    'postgres': 'PostgreSQL database',
    'opensearch': 'OpenSearch search engine', 
    'opensearch-dashboards': 'OpenSearch web dashboard',
    'ollama': 'Local LLM inference server',
    'airflow': 'Workflow automation (optional - may be off)'
}

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=15
    )
    
    if result.returncode == 0:
        print("SERVICE STATUS")
        print("=" * 70)
        print(f"{'Service':<20} {'State':<15} {'Status':<15} {'Notes'}")
        print("-" * 70)
    else:
        print("Could not get service status")
        exit()
        
except Exception as e:
    print(f"Error checking services: {e}")
    exit()

# Parse Service Status
found_services = set()
service_states = {}

if result.stdout.strip():
    for line in result.stdout.strip().split('\n'):
        if line.strip():
            try:
                container = json.loads(line)
                service = container.get('Service', 'unknown')
                state = container.get('State', 'unknown')
                health = container.get('Health', 'no check')
                
                found_services.add(service)
                service_states[service] = {'state': state, 'health': health}
                
                if state == 'running' and health in ['healthy', 'no check']:
                    indicator = "✓"
                    notes = "Ready"
                elif state == 'running' and health == 'unhealthy':
                    indicator = "⚠"
                    notes = "Starting up..."
                elif state == 'exited':
                    indicator = "✗"
                    notes = "Failed to start"
                else:
                    indicator = "?"
                    notes = f"Status: {state}"
                
                print(f"{indicator} {service:<18} {state:<14} {health:<14} {notes}")
                
            except json.JSONDecodeError:
                pass