# SwellSight Real-to-Synthetic Pipeline - Setup and Installation (Enhanced)

This notebook handles environment preparation and dependency management for the SwellSight real-to-synthetic image generation pipeline with integrated utilities.

## New Features
- ‚ú® Integrated SwellSight utilities for better error handling and progress tracking
- üîß Automatic configuration management
- üìä Memory optimization and monitoring
- üõ°Ô∏è Robust error handling with retry logic
- üìà Progress tracking and performance feedback

---

## 1. Import SwellSight Utilities

In [6]:
import sys
import os
from pathlib import Path
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add project root to Python path so `import utils` works
utils_path = Path.cwd()
if str(utils_path) not in sys.path:
    sys.path.insert(0, str(utils_path))

try:
    # Import SwellSight utilities
    from utils import (
        load_config, validate_config,
        validate_image_quality, validate_depth_map_quality,
        get_optimal_batch_size, cleanup_variables, monitor_memory,
        retry_with_backoff, handle_gpu_memory_error,
        create_progress_bar, display_stage_summary,
        save_stage_results, load_previous_results, check_dependencies
    )
    print("‚úÖ SwellSight utilities loaded successfully")
except ImportError as e:
    print(f"‚ö†Ô∏è Could not import utilities: {e}")
    print("Continuing with basic functionality...")

‚ö†Ô∏è Could not import utilities: No module named 'utils'
Continuing with basic functionality...


## 2. Load Configuration and Environment Setup

In [2]:
# Load pipeline configuration
try:
    config = load_config("config.json")
    print(f"‚úÖ Configuration loaded: {config['pipeline']['name']} v{config['pipeline']['version']}")
    
    # Extract commonly used settings
    batch_size = config['processing']['batch_size']
    quality_threshold = config['processing']['quality_threshold']
    data_dir = Path(config['paths']['data_dir'])
    output_dir = Path(config['paths']['output_dir'])
    checkpoint_dir = Path(config['paths']['checkpoint_dir'])
    
    print(f"üìÅ Data directory: {data_dir}")
    print(f"üìÅ Output directory: {output_dir}")
    print(f"üìÅ Checkpoint directory: {checkpoint_dir}")
    print(f"üéØ Quality threshold: {quality_threshold}")
    
except Exception as e:
    print(f"‚ö†Ô∏è Could not load configuration: {e}")
    print("Using default settings...")
    
    # Fallback configuration
    batch_size = "auto"
    quality_threshold = 0.7
    data_dir = Path("./data")
    output_dir = Path("./outputs")
    checkpoint_dir = Path("./checkpoints")

‚ö†Ô∏è Could not load configuration: name 'load_config' is not defined
Using default settings...


## 3. Environment Detection and Hardware Configuration

In [3]:
import torch
import platform

# Check if running in Google Colab
if 'google.colab' in sys.modules:
    from google.colab import drive
    print("üîó Mounting Google Drive...")
    
    def mount_drive():
        drive.mount('/content/drive')
        return True
    
    try:
        # Use retry logic for drive mounting
        retry_with_backoff(mount_drive, max_retries=2)
        print("‚úÖ Google Drive mounted successfully")
    except Exception as e:
        print(f"‚ö†Ô∏è Drive mounting failed: {e}")

# Hardware detection with memory monitoring
print("\nüîç Hardware Detection:")
print(f"Platform: {platform.system()} {platform.release()}")
print(f"Python: {sys.version.split()[0]}")

# GPU detection
if torch.cuda.is_available():
    gpu_count = torch.cuda.device_count()
    current_gpu = torch.cuda.current_device()
    gpu_name = torch.cuda.get_device_name(current_gpu)
    gpu_memory = torch.cuda.get_device_properties(current_gpu).total_memory / (1024**3)
    
    print(f"üöÄ GPU Available: {gpu_name}")
    print(f"üíæ GPU Memory: {gpu_memory:.1f} GB")
    print(f"üî¢ GPU Count: {gpu_count}")
else:
    print("‚ö†Ô∏è No GPU available - using CPU")

# Memory monitoring
try:
    memory_info = monitor_memory()
    print(f"\nüíª System Memory: {memory_info.get('system_total_gb', 0):.1f} GB total, {memory_info.get('system_percent', 0):.1f}% used")
    if 'gpu_total_gb' in memory_info:
        print(f"üéÆ GPU Memory: {memory_info.get('gpu_total_gb', 0):.1f} GB total, {memory_info.get('gpu_percent', 0):.1f}% used")
except Exception as e:
    print(f"‚ö†Ô∏è Could not get memory info: {e}")

üîó Mounting Google Drive...
‚ö†Ô∏è Drive mounting failed: name 'retry_with_backoff' is not defined

üîç Hardware Detection:
Platform: Linux 6.6.105+
Python: 3.12.12
üöÄ GPU Available: NVIDIA A100-SXM4-40GB
üíæ GPU Memory: 39.6 GB
üî¢ GPU Count: 1
‚ö†Ô∏è Could not get memory info: name 'monitor_memory' is not defined


## 4. Create Directory Structure with Progress Tracking

In [4]:
# Create necessary directories
directories_to_create = [
    data_dir / "real" / "images",
    data_dir / "processed",
    data_dir / "depth_maps",
    data_dir / "synthetic",
    output_dir,
    checkpoint_dir,
    Path("models"),
    Path("logs")
]

print("üìÅ Creating directory structure...")
progress_bar = create_progress_bar(len(directories_to_create), "Creating directories")

created_dirs = []
for directory in directories_to_create:
    try:
        directory.mkdir(parents=True, exist_ok=True)
        created_dirs.append(str(directory))
        progress_bar.update(1)
    except Exception as e:
        print(f"‚ö†Ô∏è Could not create directory {directory}: {e}")

progress_bar.close()
print(f"‚úÖ Created {len(created_dirs)} directories")

# Display directory structure
print("\nüìÇ Directory Structure:")
for directory in created_dirs:
    print(f"  üìÅ {directory}")

üìÅ Creating directory structure...


NameError: name 'create_progress_bar' is not defined

## 5. Install Dependencies with Error Handling

In [None]:
import subprocess

# Define required packages
required_packages = [
    "torch>=1.9.0",
    "torchvision>=0.10.0",
    "transformers>=4.20.0",
    "diffusers>=0.10.0",
    "accelerate>=0.20.0",
    "opencv-python>=4.5.0",
    "Pillow>=8.0.0",
    "numpy>=1.21.0",
    "tqdm>=4.60.0",
    "psutil>=5.8.0"
]

def install_package(package):
    """Install a package using pip"""
    result = subprocess.run(
        [sys.executable, "-m", "pip", "install", package],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        raise Exception(f"Installation failed: {result.stderr}")
    return result.stdout

print("üì¶ Installing required packages...")
progress_bar = create_progress_bar(len(required_packages), "Installing packages")

installed_packages = []
failed_packages = []

for package in required_packages:
    try:
        # Use retry logic for package installation
        retry_with_backoff(lambda: install_package(package), max_retries=2)
        installed_packages.append(package)
        progress_bar.update(1)
    except Exception as e:
        print(f"\n‚ö†Ô∏è Failed to install {package}: {e}")
        failed_packages.append(package)
        progress_bar.update(1)

progress_bar.close()

# Display installation summary
installation_metrics = {
    'total_packages': len(required_packages),
    'installed_successfully': len(installed_packages),
    'failed_installations': len(failed_packages),
    'success_rate': len(installed_packages) / len(required_packages)
}

display_stage_summary("Package Installation", installation_metrics)

if failed_packages:
    print("\n‚ö†Ô∏è Failed packages:")
    for package in failed_packages:
        print(f"  - {package}")
    print("\nüí° Try installing failed packages manually or check your internet connection.")

üì¶ Installing required packages...


Installing packages: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:29<00:00,  2.91s/items]


STAGE SUMMARY: PACKAGE INSTALLATION
Total Packages: 10
Installed Successfully: 10
Failed Installations: 0
Success Rate: 1.000






## 6. Optimize Memory and Batch Size Configuration

In [None]:
# Calculate optimal batch size based on available memory
print("üß† Optimizing memory configuration...")

if batch_size == "auto":
    try:
        optimal_batch_size = get_optimal_batch_size(max_batch_size=32)
        print(f"‚úÖ Calculated optimal batch size: {optimal_batch_size}")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not calculate optimal batch size: {e}")
        optimal_batch_size = 4  # Conservative fallback
        print(f"Using conservative batch size: {optimal_batch_size}")
else:
    optimal_batch_size = batch_size
    print(f"‚úÖ Using configured batch size: {optimal_batch_size}")

# Update configuration with optimized settings
try:
    config['processing']['batch_size'] = optimal_batch_size
    
    # Save updated configuration
    import json
    with open("config.json", "w") as f:
        json.dump(config, f, indent=2)
    
    print("‚úÖ Configuration updated with optimized settings")
except Exception as e:
    print(f"‚ö†Ô∏è Could not save updated configuration: {e}")

# Display memory optimization suggestions
try:
    from utils.memory_optimizer import MemoryOptimizer
    optimizer = MemoryOptimizer()
    suggestions = optimizer.suggest_memory_optimizations()
    
    if suggestions:
        print("\nüí° Memory Optimization Suggestions:")
        for i, suggestion in enumerate(suggestions[:3], 1):
            print(f"  {i}. {suggestion}")
except Exception as e:
    print(f"‚ö†Ô∏è Could not generate optimization suggestions: {e}")

üß† Optimizing memory configuration...
‚úÖ Using configured batch size: 1
‚úÖ Configuration updated with optimized settings

üí° Memory Optimization Suggestions:
  1. System memory usage is high (>80%). Consider reducing batch size.
  2. Close unnecessary applications to free up system memory.
  3. Process data in smaller batches to reduce memory pressure.


## 7. Save Setup Results and Environment Information

In [None]:
# Collect environment information
environment_info = {
    'platform': platform.system(),
    'python_version': sys.version.split()[0],
    'torch_version': torch.__version__,
    'cuda_available': torch.cuda.is_available(),
    'gpu_count': torch.cuda.device_count() if torch.cuda.is_available() else 0,
    'optimal_batch_size': optimal_batch_size,
    'directories_created': created_dirs,
    'installed_packages': installed_packages,
    'failed_packages': failed_packages
}

if torch.cuda.is_available():
    environment_info.update({
        'gpu_name': torch.cuda.get_device_name(0),
        'gpu_memory_gb': torch.cuda.get_device_properties(0).total_memory / (1024**3)
    })

# Save setup results
setup_results = {
    'environment_info': environment_info,
    'configuration': config,
    'setup_status': 'completed',
    'installation_metrics': installation_metrics
}

setup_metadata = {
    'setup_time': '2024-01-12T00:00:00Z',  # This would be actual timestamp
    'notebook_version': '1.0_enhanced',
    'utilities_version': '1.0'
}

try:
    success = save_stage_results(setup_results, "setup", setup_metadata)
    if success:
        print("‚úÖ Setup results saved successfully")
        print(f"üìÅ Results saved to: {output_dir / 'setup'}")
    else:
        print("‚ö†Ô∏è Could not save setup results")
except Exception as e:
    print(f"‚ö†Ô∏è Error saving setup results: {e}")

print("\nüéâ Setup and installation completed!")
print("\nüìã Next Steps:")
print("1. üìÇ Add your real beach images to the data/real/images/ directory")
print("2. ‚ñ∂Ô∏è Run notebook 02: Data Import and Preprocessing")
print("3. üîÑ Continue with the remaining pipeline notebooks")

## 8. Memory Cleanup

In [None]:
# Clean up large variables to free memory
large_variables = [
    # Add any large variables that were created during setup
]

try:
    cleanup_variables(large_variables)
    print("‚úÖ Memory cleanup completed")
    
    # Show final memory status
    final_memory = monitor_memory()
    print(f"üíª Final system memory usage: {final_memory.get('system_percent', 0):.1f}%")
    if 'gpu_percent' in final_memory:
        print(f"üéÆ Final GPU memory usage: {final_memory.get('gpu_percent', 0):.1f}%")
        
except Exception as e:
    print(f"‚ö†Ô∏è Memory cleanup warning: {e}")