In [1]:
"""
=============================================================================
SETUP AND VERIFICATION SCRIPT
=============================================================================
Script untuk memverifikasi semua dependencies terinstall dengan benar
dan setup project structure.

Run this FIRST before running other scripts!

Author: ML Team
Version: 1.0
=============================================================================
"""

import subprocess
import sys
import os

def print_header():
    print("""
    ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
    ‚ïë                                                                      ‚ïë
    ‚ïë          üß™ CHEMICAL DISCOVERY AGENT - SETUP VERIFICATION üß™         ‚ïë
    ‚ïë                                                                      ‚ïë
    ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
    """)

def check_python_version():
    """Check Python version"""
    print("\n" + "="*70)
    print("üêç CHECKING PYTHON VERSION")
    print("="*70)
    
    version = sys.version_info
    print(f"   Python version: {version.major}.{version.minor}.{version.micro}")
    
    if version.major >= 3 and version.minor >= 9:
        print("   ‚úÖ Python version OK (3.9+)")
        return True
    else:
        print("   ‚ùå Python 3.9+ required")
        return False

def check_dependencies():
    """Check if all required packages are installed"""
    print("\n" + "="*70)
    print("üì¶ CHECKING DEPENDENCIES")
    print("="*70)
    
    required_packages = {
        'pandas': 'pandas',
        'numpy': 'numpy',
        'tqdm': 'tqdm',
        'pubchempy': 'pubchempy',
        'rdkit': 'rdkit',
        'langchain': 'langchain',
        'langchain_openai': 'langchain-openai',
        'langchain_community': 'langchain-community',
        'chromadb': 'chromadb',
        'openai': 'openai',
        'flask': 'flask',
        'dotenv': 'python-dotenv',
    }
    
    optional_packages = {
        'chembl_webresource_client': 'chembl_webresource_client',
    }
    
    missing_required = []
    missing_optional = []
    
    # Check required packages
    for import_name, pip_name in required_packages.items():
        try:
            __import__(import_name)
            print(f"   ‚úÖ {pip_name}")
        except ImportError:
            print(f"   ‚ùå {pip_name} - NOT INSTALLED")
            missing_required.append(pip_name)
    
    # Check optional packages
    print("\n   Optional packages:")
    for import_name, pip_name in optional_packages.items():
        try:
            __import__(import_name)
            print(f"   ‚úÖ {pip_name}")
        except ImportError:
            print(f"   ‚ö†Ô∏è  {pip_name} - NOT INSTALLED (optional)")
            missing_optional.append(pip_name)
    
    return missing_required, missing_optional

def create_directory_structure():
    """Create required directories"""
    print("\n" + "="*70)
    print("üìÅ CREATING DIRECTORY STRUCTURE")
    print("="*70)
    
    directories = ['data', 'tools', 'chroma_db']
    
    for dir_name in directories:
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
            print(f"   ‚úÖ Created: {dir_name}/")
        else:
            print(f"   ‚úÖ Exists: {dir_name}/")
    
    # Create __init__.py in tools
    init_file = 'tools/__init__.py'
    if not os.path.exists(init_file):
        with open(init_file, 'w') as f:
            f.write('# Tools package\n')
        print(f"   ‚úÖ Created: {init_file}")

def check_env_file():
    """Check for .env file"""
    print("\n" + "="*70)
    print("üîë CHECKING ENVIRONMENT FILE")
    print("="*70)
    
    if os.path.exists('.env'):
        print("   ‚úÖ .env file found")
        
        # Check if OPENAI_API_KEY is set
        from dotenv import load_dotenv
        load_dotenv()
        
        if os.getenv('OPENAI_API_KEY'):
            key = os.getenv('OPENAI_API_KEY')
            masked_key = key[:8] + '...' + key[-4:] if len(key) > 12 else '***'
            print(f"   ‚úÖ OPENAI_API_KEY is set: {masked_key}")
            return True
        else:
            print("   ‚ö†Ô∏è  OPENAI_API_KEY not found in .env")
            return False
    else:
        print("   ‚ö†Ô∏è  .env file not found")
        print("\n   Creating template .env file...")
        
        with open('.env', 'w') as f:
            f.write("# API Keys\n")
            f.write("OPENAI_API_KEY=your_openai_api_key_here\n")
            f.write("\n# Optional\n")
            f.write("# ANTHROPIC_API_KEY=your_anthropic_api_key_here\n")
        
        print("   ‚úÖ Created .env template")
        print("   ‚ö†Ô∏è  Please edit .env and add your API key!")
        return False

def install_missing_packages(packages):
    """Install missing packages"""
    if not packages:
        return
    
    print("\n" + "="*70)
    print("üì• INSTALLING MISSING PACKAGES")
    print("="*70)
    
    for package in packages:
        print(f"\n   Installing {package}...")
        try:
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '-q'])
            print(f"   ‚úÖ {package} installed successfully")
        except subprocess.CalledProcessError:
            print(f"   ‚ùå Failed to install {package}")

def print_next_steps(has_api_key, missing_required):
    """Print next steps"""
    print("\n" + "="*70)
    print("üìã NEXT STEPS")
    print("="*70)
    
    steps = []
    
    if missing_required:
        steps.append("1. Install missing required packages:")
        steps.append(f"   pip install {' '.join(missing_required)}")
    
    if not has_api_key:
        step_num = 2 if missing_required else 1
        steps.append(f"{step_num}. Add your OpenAI API key to .env file:")
        steps.append("   Open .env and replace 'your_openai_api_key_here' with your actual key")
    
    if not steps:
        steps.append("‚úÖ All checks passed! You're ready to go!")
        steps.append("")
        steps.append("Run the pipeline:")
        steps.append("   python 01_download_datasets.py")
        steps.append("   python 02_enrich_dataset.py")
        steps.append("   python 03_build_vector_database.py")
    
    for step in steps:
        print(f"   {step}")

def verify_rdkit():
    """Verify RDKit is working correctly"""
    print("\n" + "="*70)
    print("üî¨ VERIFYING RDKIT")
    print("="*70)
    
    try:
        from rdkit import Chem
        from rdkit.Chem import Descriptors
        
        # Test with simple molecule
        mol = Chem.MolFromSmiles('CCO')  # Ethanol
        if mol:
            mw = Descriptors.MolWt(mol)
            print(f"   ‚úÖ RDKit working correctly")
            print(f"   Test: Ethanol (CCO) MW = {mw:.2f} g/mol")
            return True
        else:
            print("   ‚ùå RDKit failed to parse molecule")
            return False
    except Exception as e:
        print(f"   ‚ùå RDKit error: {e}")
        return False

def verify_pubchempy():
    """Verify PubChemPy is working"""
    print("\n" + "="*70)
    print("üåê VERIFYING PUBCHEMPY")
    print("="*70)
    
    try:
        import pubchempy as pcp
        
        # Test simple query
        compounds = pcp.get_compounds('water', 'name')
        if compounds:
            print(f"   ‚úÖ PubChemPy working correctly")
            print(f"   Test: Found {len(compounds)} result(s) for 'water'")
            return True
        else:
            print("   ‚ö†Ô∏è  PubChemPy returned no results (might be network issue)")
            return True  # Might be temporary network issue
    except Exception as e:
        print(f"   ‚ùå PubChemPy error: {e}")
        return False

def main():
    """Main verification function"""
    print_header()
    
    # Check Python version
    python_ok = check_python_version()
    if not python_ok:
        print("\n‚ùå Please upgrade Python to 3.9+")
        return
    
    # Check dependencies
    missing_required, missing_optional = check_dependencies()
    
    # Offer to install missing packages
    if missing_required:
        print("\n" + "="*70)
        response = input("Would you like to install missing required packages? (y/n): ")
        if response.lower() == 'y':
            install_missing_packages(missing_required)
            # Re-check
            missing_required, _ = check_dependencies()
    
    if missing_optional:
        print("\n" + "="*70)
        response = input("Would you like to install optional packages? (y/n): ")
        if response.lower() == 'y':
            install_missing_packages(missing_optional)
    
    # Create directories
    create_directory_structure()
    
    # Check .env
    has_api_key = check_env_file()
    
    # Verify RDKit
    if not missing_required:
        verify_rdkit()
        verify_pubchempy()
    
    # Print next steps
    print_next_steps(has_api_key, missing_required)
    
    print("\n" + "="*70)
    print("‚úÖ SETUP VERIFICATION COMPLETE")
    print("="*70)


if __name__ == "__main__":
    main()


    ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
    ‚ïë                                                                      ‚ïë
    ‚ïë          üß™ CHEMICAL DISCOVERY AGENT - SETUP VERIFICATION üß™         ‚ïë
    ‚ïë                                                                      ‚ïë
    ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
    

üêç CHECKING PYTHON VERSION
   Python version: 3.12.7
   ‚úÖ Python version OK (3.9+)

üì¶ CHECKING DEPENDENCIES
   ‚úÖ pandas
   ‚úÖ numpy
   ‚úÖ tqdm
   ‚úÖ pubchempy
   ‚úÖ rdkit
   ‚úÖ langchain
   ‚úÖ langchain-openai
   ‚úÖ langchain-community
   ‚ùå chromadb - NOT INSTALLED
   ‚úÖ openai
   ‚ùå