# Streamlit Deployment Demonstration\n\nThis notebook demonstrates how to deploy and run our exoplanet detection Streamlit application in various environments, including Google Colab, Kaggle, and local setups.\n\n## Objectives:\n1. **Environment Setup**: Configure Streamlit in notebook environments\n2. **Model Loading**: Load trained models for the web interface\n3. **App Deployment**: Launch the Streamlit application\n4. **Testing**: Verify functionality with sample data\n5. **Troubleshooting**: Handle common deployment issues

In [None]:
# Environment detection and setup\nimport sys\nimport os\nfrom pathlib import Path\nimport subprocess\nimport time\nimport threading\n\n# Detect environment\ndef detect_environment():\n    \"\"\"Detect the current execution environment.\"\"\"\n    if 'google.colab' in sys.modules:\n        return 'colab'\n    elif 'kaggle_secrets' in sys.modules or '/kaggle/' in os.getcwd():\n        return 'kaggle'\n    else:\n        return 'local'\n\nenvironment = detect_environment()\nprint(f\"Detected environment: {environment}\")\n\n# Add src to path\nsys.path.insert(0, str(Path.cwd().parent / 'src'))\n\nimport warnings\nwarnings.filterwarnings('ignore')\n\nprint(\"Environment setup complete!\")

## 1. Install Dependencies\n\nFirst, let's ensure all required packages are installed.

In [None]:
# Install required packages\nrequired_packages = [\n    'streamlit',\n    'plotly',\n    'torch',\n    'scikit-learn',\n    'pandas',\n    'numpy',\n    'matplotlib',\n    'seaborn'\n]\n\ndef install_packages(packages):\n    \"\"\"Install required packages.\"\"\"\n    for package in packages:\n        try:\n            __import__(package)\n            print(f\"✓ {package} already installed\")\n        except ImportError:\n            print(f\"Installing {package}...\")\n            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])\n            print(f\"✓ {package} installed\")\n\n# Install packages\ninstall_packages(required_packages)\n\n# Special handling for environment-specific packages\nif environment == 'colab':\n    # Install pyngrok for Colab tunneling\n    try:\n        import pyngrok\n        print(\"✓ pyngrok already available\")\n    except ImportError:\n        print(\"Installing pyngrok for Colab...\")\n        subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pyngrok'])\n        print(\"✓ pyngrok installed\")\n\nprint(\"\\nAll dependencies installed successfully!\")

## 2. Prepare Application Files\n\nLet's ensure our Streamlit application files are properly set up.

In [None]:
# Check Streamlit app structure\nstreamlit_dir = Path.cwd().parent / 'streamlit_app'\n\nrequired_files = [\n    'main.py',\n    'pages/beginner_mode.py',\n    'pages/research_mode.py',\n    'pages/model_comparison.py',\n    'pages/about.py',\n    'utils/app_utils.py',\n    'requirements.txt'\n]\n\nprint(f\"Checking Streamlit app structure in: {streamlit_dir}\")\n\nmissing_files = []\nfor file_path in required_files:\n    full_path = streamlit_dir / file_path\n    if full_path.exists():\n        print(f\"✓ {file_path}\")\n    else:\n        print(f\"✗ {file_path} (missing)\")\n        missing_files.append(file_path)\n\nif missing_files:\n    print(f\"\\nWarning: {len(missing_files)} files are missing. The app may not work correctly.\")\nelse:\n    print(\"\\n✓ All required files are present!\")\n\n# Check for model files\nmodels_dir = Path.cwd().parent / 'models'\nprint(f\"\\nChecking for trained models in: {models_dir}\")\n\nif models_dir.exists():\n    model_files = list(models_dir.glob('*.pt'))\n    if model_files:\n        print(f\"Found {len(model_files)} model files:\")\n        for model_file in model_files[:5]:  # Show first 5\n            print(f\"  - {model_file.name}\")\n        if len(model_files) > 5:\n            print(f\"  ... and {len(model_files) - 5} more\")\n    else:\n        print(\"No trained models found. You may need to train models first.\")\nelse:\n    print(\"Models directory not found. Creating mock models for demo...\")\n    models_dir.mkdir(parents=True, exist_ok=True)

## 3. Create Sample Data\n\nLet's create some sample data for testing the application.

In [None]:
import numpy as np\nimport pandas as pd\n\ndef create_sample_data():\n    \"\"\"Create sample light curve data for testing.\"\"\"\n    \n    np.random.seed(42)\n    \n    # Create sample light curves\n    n_samples = 10\n    sequence_length = 2048\n    \n    sample_data = []\n    \n    for i in range(n_samples):\n        # Create base stellar signal\n        time = np.arange(sequence_length)\n        flux = np.random.normal(1.0, 0.01, sequence_length)\n        \n        # Add some stellar variability\n        if np.random.random() < 0.3:\n            period = np.random.uniform(10, 50)\n            amplitude = np.random.uniform(0.005, 0.02)\n            flux += amplitude * np.sin(2 * np.pi * time / period)\n        \n        # Add transit for some samples\n        has_planet = i < 3  # First 3 have planets\n        if has_planet:\n            transit_center = np.random.randint(500, 1500)\n            transit_width = np.random.randint(20, 60)\n            transit_depth = np.random.uniform(0.005, 0.02)\n            \n            start = max(0, transit_center - transit_width // 2)\n            end = min(sequence_length, transit_center + transit_width // 2)\n            flux[start:end] -= transit_depth\n        \n        sample_data.append({\n            'star_id': f'SAMPLE_{i:03d}',\n            'time': time.tolist(),\n            'flux': flux.tolist(),\n            'has_planet': has_planet,\n            'magnitude': np.random.uniform(10, 16),\n            'temperature': np.random.uniform(3500, 7000)\n        })\n    \n    return sample_data\n\n# Create and save sample data\nsample_data = create_sample_data()\n\n# Save as CSV for the app\nsample_df = pd.DataFrame([\n    {\n        'star_id': item['star_id'],\n        'magnitude': item['magnitude'],\n        'temperature': item['temperature'],\n        'has_planet': item['has_planet']\n    }\n    for item in sample_data\n])\n\nsample_csv_path = streamlit_dir / 'sample_data.csv'\nsample_df.to_csv(sample_csv_path, index=False)\n\nprint(f\"Created sample data with {len(sample_data)} light curves\")\nprint(f\"Saved sample CSV to: {sample_csv_path}\")\nprint(f\"Planet distribution: {sample_df['has_planet'].value_counts().to_dict()}\")

## 4. Launch Streamlit Application\n\nNow let's launch the Streamlit application based on the detected environment.

In [None]:
def launch_streamlit_local():\n    \"\"\"Launch Streamlit application locally.\"\"\"\n    \n    app_path = streamlit_dir / 'main.py'\n    \n    print(\"Launching Streamlit application locally...\")\n    print(f\"App path: {app_path}\")\n    print(\"\\nThe application will open in your default browser.\")\n    print(\"To stop the application, interrupt the kernel or press Ctrl+C in the terminal.\\n\")\n    \n    # Launch Streamlit\n    cmd = [\n        sys.executable, '-m', 'streamlit', 'run', \n        str(app_path),\n        '--server.port', '8501',\n        '--server.headless', 'false'\n    ]\n    \n    try:\n        subprocess.run(cmd, cwd=str(streamlit_dir))\n    except KeyboardInterrupt:\n        print(\"\\nStreamlit application stopped.\")\n\ndef launch_streamlit_colab():\n    \"\"\"Launch Streamlit application in Google Colab using ngrok.\"\"\"\n    \n    try:\n        from pyngrok import ngrok\n    except ImportError:\n        print(\"pyngrok not available. Installing...\")\n        subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pyngrok'])\n        from pyngrok import ngrok\n    \n    app_path = streamlit_dir / 'main.py'\n    \n    print(\"Setting up Streamlit for Google Colab...\")\n    \n    # Kill any existing ngrok tunnels\n    ngrok.kill()\n    \n    # Start Streamlit in background\n    cmd = [\n        sys.executable, '-m', 'streamlit', 'run',\n        str(app_path),\n        '--server.port', '8501',\n        '--server.headless', 'true',\n        '--server.enableCORS', 'false',\n        '--server.enableXsrfProtection', 'false'\n    ]\n    \n    print(\"Starting Streamlit server...\")\n    process = subprocess.Popen(cmd, cwd=str(streamlit_dir))\n    \n    # Wait for server to start\n    time.sleep(10)\n    \n    # Create ngrok tunnel\n    print(\"Creating ngrok tunnel...\")\n    public_url = ngrok.connect(8501)\n    \n    print(f\"\\n🚀 Streamlit app is running!\")\n    print(f\"📱 Access your app at: {public_url}\")\n    print(f\"\\n⚠️  Keep this notebook running to maintain the connection.\")\n    \n    return process, public_url\n\ndef launch_streamlit_kaggle():\n    \"\"\"Launch Streamlit application in Kaggle environment.\"\"\"\n    \n    app_path = streamlit_dir / 'main.py'\n    \n    print(\"Setting up Streamlit for Kaggle...\")\n    \n    # Kaggle-specific configuration\n    cmd = [\n        sys.executable, '-m', 'streamlit', 'run',\n        str(app_path),\n        '--server.port', '8501',\n        '--server.headless', 'true',\n        '--server.address', '0.0.0.0'\n    ]\n    \n    print(\"Starting Streamlit server...\")\n    print(\"Note: In Kaggle, you may need to use port forwarding to access the app.\")\n    \n    try:\n        subprocess.run(cmd, cwd=str(streamlit_dir))\n    except KeyboardInterrupt:\n        print(\"\\nStreamlit application stopped.\")\n\n# Launch based on environment\nif environment == 'local':\n    print(\"Launching Streamlit locally...\")\n    launch_streamlit_local()\nelif environment == 'colab':\n    print(\"Launching Streamlit in Google Colab...\")\n    process, url = launch_streamlit_colab()\nelif environment == 'kaggle':\n    print(\"Launching Streamlit in Kaggle...\")\n    launch_streamlit_kaggle()\nelse:\n    print(f\"Unknown environment: {environment}. Trying local launch...\")\n    launch_streamlit_local()

## 5. Test Application Functionality\n\nLet's test key functionality of our application programmatically.

In [None]:
# Test application components\nimport requests\nimport json\n\ndef test_app_health(url=\"http://localhost:8501\"):\n    \"\"\"Test if the Streamlit app is responding.\"\"\"\n    \n    try:\n        response = requests.get(f\"{url}/healthz\", timeout=5)\n        if response.status_code == 200:\n            print(\"✓ Application is healthy and responding\")\n            return True\n        else:\n            print(f\"⚠️  Application responded with status: {response.status_code}\")\n            return False\n    except requests.exceptions.RequestException as e:\n        print(f\"✗ Application health check failed: {e}\")\n        return False\n\ndef test_model_loading():\n    \"\"\"Test model loading functionality.\"\"\"\n    \n    try:\n        # Import app utilities\n        sys.path.insert(0, str(streamlit_dir))\n        from utils.app_utils import load_models, create_mock_models\n        \n        print(\"Testing model loading...\")\n        \n        # Try to load models\n        models = load_models()\n        \n        if models:\n            print(f\"✓ Successfully loaded {len(models)} models\")\n            for model_name in models.keys():\n                print(f\"  - {model_name}\")\n        else:\n            print(\"⚠️  No models loaded, creating mock models...\")\n            mock_models = create_mock_models()\n            print(f\"✓ Created {len(mock_models)} mock models for demo\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"✗ Model loading test failed: {e}\")\n        return False\n\ndef test_data_processing():\n    \"\"\"Test data processing functionality.\"\"\"\n    \n    try:\n        # Test with sample data\n        sample_csv = streamlit_dir / 'sample_data.csv'\n        \n        if sample_csv.exists():\n            df = pd.read_csv(sample_csv)\n            print(f\"✓ Sample data loaded: {len(df)} samples\")\n            \n            # Test data validation\n            required_columns = ['star_id', 'magnitude', 'temperature']\n            missing_cols = [col for col in required_columns if col not in df.columns]\n            \n            if not missing_cols:\n                print(\"✓ Data format validation passed\")\n            else:\n                print(f\"⚠️  Missing columns: {missing_cols}\")\n            \n            return True\n        else:\n            print(\"✗ Sample data file not found\")\n            return False\n            \n    except Exception as e:\n        print(f\"✗ Data processing test failed: {e}\")\n        return False\n\n# Run tests\nprint(\"Running application tests...\\n\")\n\ntest_results = {\n    'model_loading': test_model_loading(),\n    'data_processing': test_data_processing()\n}\n\n# Summary\npassed_tests = sum(test_results.values())\ntotal_tests = len(test_results)\n\nprint(f\"\\n📊 Test Results: {passed_tests}/{total_tests} tests passed\")\n\nif passed_tests == total_tests:\n    print(\"🎉 All tests passed! The application is ready to use.\")\nelse:\n    print(\"⚠️  Some tests failed. Check the output above for details.\")\n    failed_tests = [test for test, result in test_results.items() if not result]\n    print(f\"Failed tests: {failed_tests}\")

## 6. Troubleshooting Guide\n\nCommon issues and solutions for deployment.

In [None]:
def print_troubleshooting_guide():\n    \"\"\"Print comprehensive troubleshooting guide.\"\"\"\n    \n    guide = \"\"\"\n🔧 TROUBLESHOOTING GUIDE\n========================\n\n1. **Port Already in Use**\n   - Kill existing Streamlit processes: `pkill -f streamlit`\n   - Use different port: `streamlit run app.py --server.port 8502`\n\n2. **Module Import Errors**\n   - Ensure all dependencies are installed: `pip install -r requirements.txt`\n   - Check Python path includes src directory\n   - Verify file structure is correct\n\n3. **Model Loading Issues**\n   - Check if model files exist in models/ directory\n   - Verify model file format (.pt files)\n   - Use mock models for demo if real models unavailable\n\n4. **Google Colab Issues**\n   - Install pyngrok: `!pip install pyngrok`\n   - Set ngrok auth token if required\n   - Keep notebook running to maintain tunnel\n\n5. **Kaggle Issues**\n   - Enable internet access in Kaggle settings\n   - Use port forwarding for external access\n   - Check Kaggle's networking restrictions\n\n6. **Performance Issues**\n   - Reduce model complexity for demo\n   - Use smaller sample datasets\n   - Enable caching in Streamlit\n\n7. **File Permission Issues**\n   - Check file permissions: `chmod +x run_app.py`\n   - Ensure write access to cache directories\n\n8. **Memory Issues**\n   - Reduce batch size for predictions\n   - Use CPU instead of GPU if memory limited\n   - Clear cache periodically\n\n📞 **Getting Help**\n- Check Streamlit documentation: https://docs.streamlit.io\n- Review application logs for detailed error messages\n- Ensure all requirements are met for your environment\n    \"\"\"\n    \n    print(guide)\n\ndef check_system_requirements():\n    \"\"\"Check system requirements and configuration.\"\"\"\n    \n    print(\"🔍 SYSTEM REQUIREMENTS CHECK\")\n    print(\"============================\\n\")\n    \n    # Python version\n    python_version = sys.version_info\n    print(f\"Python version: {python_version.major}.{python_version.minor}.{python_version.micro}\")\n    \n    if python_version >= (3, 7):\n        print(\"✓ Python version is compatible\")\n    else:\n        print(\"✗ Python 3.7+ required\")\n    \n    # Memory check\n    try:\n        import psutil\n        memory = psutil.virtual_memory()\n        print(f\"Available memory: {memory.available / (1024**3):.1f} GB\")\n        \n        if memory.available > 2 * (1024**3):  # 2GB\n            print(\"✓ Sufficient memory available\")\n        else:\n            print(\"⚠️  Low memory - consider using smaller models\")\n    except ImportError:\n        print(\"Memory check skipped (psutil not available)\")\n    \n    # Disk space\n    try:\n        import shutil\n        disk_usage = shutil.disk_usage('.')\n        free_gb = disk_usage.free / (1024**3)\n        print(f\"Free disk space: {free_gb:.1f} GB\")\n        \n        if free_gb > 1:\n            print(\"✓ Sufficient disk space\")\n        else:\n            print(\"⚠️  Low disk space\")\n    except:\n        print(\"Disk space check skipped\")\n    \n    # Network connectivity\n    try:\n        import urllib.request\n        urllib.request.urlopen('https://www.google.com', timeout=5)\n        print(\"✓ Internet connectivity available\")\n    except:\n        print(\"⚠️  Limited internet connectivity\")\n    \n    print(\"\\n\" + \"=\"*50)\n\n# Run system check\ncheck_system_requirements()\n\n# Print troubleshooting guide\nprint_troubleshooting_guide()

## 7. Quick Start Commands\n\nReady-to-use commands for different scenarios.

In [None]:
def print_quick_start_commands():\n    \"\"\"Print quick start commands for different environments.\"\"\"\n    \n    commands = f\"\"\"\n🚀 QUICK START COMMANDS\n======================\n\n**Local Development:**\n```bash\ncd {streamlit_dir}\nstreamlit run main.py\n```\n\n**With Custom Port:**\n```bash\nstreamlit run main.py --server.port 8502\n```\n\n**Background Mode:**\n```bash\nnohup streamlit run main.py &\n```\n\n**Google Colab:**\n```python\n!pip install pyngrok streamlit\n# Run this notebook's Colab launch function\n```\n\n**Kaggle:**\n```python\n!pip install streamlit\n# Run this notebook's Kaggle launch function\n```\n\n**Docker (if available):**\n```bash\ndocker build -t exoplanet-app .\ndocker run -p 8501:8501 exoplanet-app\n```\n\n**Testing:**\n```bash\n# Test with sample data\ncurl http://localhost:8501/healthz\n```\n    \"\"\"\n    \n    print(commands)\n\n# Print commands\nprint_quick_start_commands()\n\n# Final summary\nprint(\"\\n\" + \"=\"*60)\nprint(\"🎯 DEPLOYMENT SUMMARY\")\nprint(\"=\"*60)\nprint(f\"Environment: {environment}\")\nprint(f\"Streamlit app directory: {streamlit_dir}\")\nprint(f\"Sample data available: {(streamlit_dir / 'sample_data.csv').exists()}\")\nprint(f\"Models directory: {models_dir}\")\nprint()\nprint(\"✅ Deployment notebook completed successfully!\")\nprint(\"🚀 Your exoplanet detection app is ready to launch!\")\nprint()\nprint(\"Next steps:\")\nprint(\"1. Run the launch command for your environment\")\nprint(\"2. Open the provided URL in your browser\")\nprint(\"3. Test with the sample data or upload your own\")\nprint(\"4. Explore both Beginner and Research modes\")\nprint(\"=\"*60)