In [None]:
import os
import warnings
import joblib
import numpy as np
import pandas as pd
import panel as pn
import holoviews as hv
import hvplot.pandas
from pathlib import Path

# Initialize Panel and HoloViews
hv.extension('bokeh')
pn.extension()
warnings.filterwarnings('ignore')

# Add debugging information
print("==== DASHBOARD INITIALIZATION ====")
print(f"Current working directory: {os.getcwd()}")
print(f"Directory contents: {os.listdir('.')}")

# Create Model directory if it doesn't exist
Path('Model').mkdir(exist_ok=True)
print(f"Model directory exists: {Path('Model').exists()}")
if Path('Model').exists():
    print(f"Model directory contents: {list(Path('Model').glob('*'))}")

class ToxinPredictionDashboard:
    def __init__(self):
        self.models_loaded = self._load_models()
        
        # Feature ranges and descriptions
        self.feature_ranges = {
            'Q':          {'min': -333, 'max': 283, 'initial': 0},
            'Q_1m':       {'min': -300, 'max': 1020, 'initial': 0},
            'SpecCond.':  {'min': 56,  'max': 10834, 'initial': 400},
            'OrgC':       {'min': 1.4,  'max': 5.5,  'initial': 3},
            'OrgN':       {'min': 0.0,  'max': 1.6,  'initial': 1},
            'PO4':        {'min': 0.0,  'max': 0.53,  'initial': 0.3},
            'WaterTemp':  {'min': 13,   'max': 26.6,   'initial': 25},
            'DO':         {'min': 6.6,    'max': 10.7,   'initial': 7.5},
            'pH':         {'min': 7.0,  'max': 8.8,  'initial': 8}
        }

        # Updated feature descriptions
        self.feature_descriptions = {
            'Q':          'Flow (Q, m³/s)',
            'Q_1m':       'Antecedent Flow (Q_1mon, m³/s)',
            'SpecCond.':  'Specific Conductance (μS/cm)',
            'OrgC':       'Dissolved Organic Carbon (DOC, mg/l)',
            'OrgN':       'Dissolved Organic Nitrogen (DON, mg/l)',
            'PO4':        'Dissolved Orthophosphate (PO4, mg/l)',
            'WaterTemp':  'Water Temperature (°C)',
            'DO':         'Dissolved Oxygen (DO, mg/l)',
            'pH':         'pH'
        }

        self.setup_components()
        self.create_layout_components()
        
        # Only update predictions if models are loaded
        if self.models_loaded:
            self.update_predictions(None)
        else:
            print("Dashboard is created with default placeholders due to model loading issues.")

    def _load_models(self):
        try:
            # Try multiple potential model paths (to handle different Azure configurations)
            potential_model_paths = [
                (Path('Model') / 'rf_pipeline.joblib', Path('Model') / 'xgb_pipeline.joblib'),
                (Path('./Model') / 'rf_pipeline.joblib', Path('./Model') / 'xgb_pipeline.joblib'),
                (Path('../Model') / 'rf_pipeline.joblib', Path('../Model') / 'xgb_pipeline.joblib'),
                (Path('/home/site/wwwroot/Model') / 'rf_pipeline.joblib', Path('/home/site/wwwroot/Model') / 'xgb_pipeline.joblib')
            ]
            
            for rf_path, xgb_path in potential_model_paths:
                print(f"Trying model paths: {rf_path}, {xgb_path}")
                if rf_path.exists() and xgb_path.exists():
                    print(f"Found models at: {rf_path}, {xgb_path}")
                    self.random_forest_model = joblib.load(rf_path)
                    self.xgboost_model = joblib.load(xgb_path)
                    return True
            
            # If we got here, no models were found
            print("No models found in any of the potential locations")
            return False
            
        except Exception as e:
            print(f"Error loading models: {str(e)}")
            return False

    def setup_components(self):
        # Create sliders with larger text
        self.feature_sliders = {}
        for feature, range_info in self.feature_ranges.items():
            slider = pn.widgets.FloatSlider(
                name=self.feature_descriptions[feature],
                start=range_info['min'],
                end=range_info['max'],
                value=range_info['initial'],
                step=(range_info['max'] - range_info['min']) / 100,
                width=450,  # Increased width for better visibility
                styles={'font-size': '16px'}  # Increased font size from 14px to 16px
            )
            self.feature_sliders[feature] = slider

        # Add callback only if models are loaded
        if self.models_loaded:
            for feature, slider in self.feature_sliders.items():
                slider.param.watch(self.update_predictions, 'value')

        # Create sensitivity selector with larger text
        self.sensitivity_feature_selector = pn.widgets.Select(
            name='Select Feature for Sensitivity Analysis',
            options=list(self.feature_descriptions.values()),
            value=list(self.feature_descriptions.values())[0],
            width=450,  # Increased width
            styles={'font-size': '16px'}  # Increased font size from 14px to 16px
        )
        
        # Add callback only if models are loaded
        if self.models_loaded:
            self.sensitivity_feature_selector.param.watch(
                self.update_sensitivity_analysis_plot, 'value'
            )

    def create_layout_components(self):
        # Check if image files exist and use placeholders if not
        logo_path = Path('Logo.png')
        map_path = Path('Map.png')
        
        # Use placeholders for missing images
        if logo_path.exists():
            self.logo = pn.pane.Image('Logo.png', width=600)
            print(f"Using logo from: {logo_path.absolute()}")
        else:
            self.logo = pn.pane.Markdown("# HAB Prediction Dashboard", styles={'color': 'navy', 'font-size': '24px'})
            print(f"Logo file not found at {logo_path.absolute()}")
        
        if map_path.exists():
            self.map_image = pn.pane.Image('Map.png', width=500)
            print(f"Using map from: {map_path.absolute()}")
        else:
            self.map_image = pn.pane.Markdown("""
            **Map image not found.**
            
            The map should show the Sacramento – San Joaquin Delta (Delta) with data collection locations.
            """, width=500, styles={'background': '#f9f9f9', 'padding': '10px', 'border': '1px solid #ddd', 'font-size': '14px'})
            print(f"Map file not found at {map_path.absolute()}")

        # Updated map description as requested
        self.map_description = pn.pane.Markdown("""
            Study area map demonstrating data collection locations within the Sacramento – San Joaquin Delta (Delta): 
            (a) Selected 14 data collection locations where *Microcystis* (cells/ml) and other 14 environmental variables' data were collected between 2014 and 2019 within Delta and 
            (b) Range of maximum values of *Microcystis* (cells/ml) throughout the Delta. 
            Note: *Microcystis* (cells/ml) represents qPCR-based lab-analyzed toxin-producing *Microcystis* (cells/ml)
            """, width=500, styles={'font-size': '16px'})  # Increased from 14px to 16px
        
        # Initialize with empty placeholders - larger text
        self.random_forest_markdown = pn.pane.Markdown(
            "### Random Forest Prediction\n**Results will appear here when models are loaded**" 
            if not self.models_loaded else "",
            width=400,
            styles={'font-size': '18px'}  # Increased from 14px to 18px
        )
        self.xgboost_markdown = pn.pane.Markdown(
            "### XGBoost Prediction\n**Results will appear here when models are loaded**" 
            if not self.models_loaded else "",
            width=400,
            styles={'font-size': '18px'}  # Increased from 14px to 18px
        )
        
        # Create empty plots
        if self.models_loaded:
            self.model_comparison_plot = pn.pane.HoloViews(width=450)  # Increased width
            self.sensitivity_analysis_plot = pn.pane.HoloViews(width=700)
        else:
            # Create placeholder plots with larger text
            placeholder_df = pd.DataFrame({
                'Model': ['Random Forest', 'XGBoost'],
                'Probability': [0, 0]
            })
            
            self.model_comparison_plot = pn.pane.HoloViews(
                placeholder_df.hvplot.bar(
                    x='Model', y='Probability',
                    title='Comparison of Models\' Predicted Probabilities',
                    width=450, height=350  # Increased dimensions
                ),
                width=450  # Increased width
            )
            
            placeholder_sensitivity_df = pd.DataFrame({
                'Value': np.linspace(0, 1, 20),
                'Random Forest': [0] * 20,
                'XGBoost': [0] * 20
            })
            
            self.sensitivity_analysis_plot = pn.pane.HoloViews(
                placeholder_sensitivity_df.hvplot.line(
                    x='Value', y=['Random Forest', 'XGBoost'],
                    title='Sensitivity Analysis (Models Not Loaded)',
                    width=700, height=450  # Increased dimensions
                ),
                width=700
            )
        
        # Updated disclaimer with larger text
        self.disclaimer = pn.pane.Markdown("""
        ---
        **Disclaimer: This dashboard is still in beta.**
        
        **Thank you for evaluating the HAB Dashboard.**
        
        **If you have feedback, suggestions or questions, please contact:**
        - **Peyman Namadi (Peyman.Hosseinzadehnamadi@Water.ca.gov)**
        """, styles={'font-size': '14px'})

    def update_predictions(self, event):
        if not self.models_loaded:
            print("Cannot update predictions: models not loaded")
            return
            
        # Get current values and make predictions
        current_values = {f: s.value for f, s in self.feature_sliders.items()}
        feature_order = ["Q", "Q_1m", "SpecCond.", "OrgC", "OrgN", "PO4", "WaterTemp", "DO", "pH"]
        sample_df = pd.DataFrame([current_values])[feature_order]

        rf_label = self.random_forest_model.predict(sample_df)[0]
        rf_prob = self.random_forest_model.predict_proba(sample_df)[0][1]
        xgb_label = self.xgboost_model.predict(sample_df)[0]
        xgb_prob = self.xgboost_model.predict_proba(sample_df)[0][1]

        # Update prediction displays with larger text
        rf_result_text = "Risky (caution) HAB event (Microcystis > 4,000 cells/ml)" if rf_label == 1 else "Low (≤4000 cells/ml)"
        xgb_result_text = "Risky (caution) HAB event (Microcystis > 4,000 cells/ml)" if xgb_label == 1 else "Low (≤4000 cells/ml)"

        self.random_forest_markdown.object = f"""
        ### Random Forest Prediction
        **Result:** {rf_result_text}
        **Probability:** {rf_prob:.2%} chance of having Risky (caution) HAB events
        """
        
        self.xgboost_markdown.object = f"""
        ### XGBoost Prediction
        **Result:** {xgb_result_text}
        **Probability:** {xgb_prob:.2%} chance of having Risky (caution) HAB events
        """

        # Update comparison plot with larger text but no title
        comparison_df = pd.DataFrame({
            'Model': ['Random Forest', 'XGBoost'],
            'Probability': [rf_prob*100, xgb_prob*100]
        })

        self.model_comparison_plot.object = comparison_df.hvplot.bar(
            x='Model',
            y='Probability',
            width=450,  # Increased width
            height=350,  # Increased height
            color=['#1f77b4', '#ff7f0e']
        ).opts(
            tools=['hover'],
            yformatter='%.0f%%',  # Format as percentage
            ylabel="Probability of having\nRisky (caution) HAB event (%)",
            toolbar=None,
            fontsize={'labels': 16, 'xticks': 16, 'yticks': 16},  # Increased font sizes further
            bar_width=0.3
        )
        
        self.update_sensitivity_analysis_plot(None)

    def update_sensitivity_analysis_plot(self, event):
        if not self.models_loaded:
            print("Cannot update sensitivity analysis: models not loaded")
            return
            
        # Get current values for analysis
        current_values = {f: s.value for f, s in self.feature_sliders.items()}
        selected_feature_desc = self.sensitivity_feature_selector.value
        selected_feature = next(k for k, v in self.feature_descriptions.items() if v == selected_feature_desc)
        range_info = self.feature_ranges[selected_feature]

        x_values = np.linspace(range_info['min'], range_info['max'], 20)
        rf_probs = []
        xgb_probs = []

        feature_order = ["Q", "Q_1m", "SpecCond.", "OrgC", "OrgN", "PO4", "WaterTemp", "DO", "pH"]
        for x_val in x_values:
            temp_values = current_values.copy()
            temp_values[selected_feature] = x_val
            temp_df = pd.DataFrame([temp_values])[feature_order]

            rf_probs.append(self.random_forest_model.predict_proba(temp_df)[0][1] * 100)  # Convert to percentage
            xgb_probs.append(self.xgboost_model.predict_proba(temp_df)[0][1] * 100)  # Convert to percentage

        # Calculate y-axis limits
        min_val = min(min(rf_probs), min(xgb_probs)) - 5  # 5% padding below
        min_val = max(25, min_val)  # Don't go below 25% as requested in your previous conversations
        max_val = max(max(rf_probs), max(xgb_probs)) + 5  # 5% padding above
        max_val = min(100, max_val)  # Don't go above 100%

        # Create dataframe for plotting with "Model: " prefix for legend items
        sensitivity_df = pd.DataFrame({
            'Value': x_values,
            'Model: Random Forest': rf_probs,
            'Model: XGBoost': xgb_probs
        })

        # Use hvplot with appropriate sizing and larger text
        plot = sensitivity_df.hvplot.line(
            x='Value',
            y=['Model: Random Forest', 'Model: XGBoost'],
            title=f'Sensitivity Analysis: {selected_feature_desc}',
            width=700,
            height=450,  # Increased height
            xlabel=selected_feature_desc,
            ylabel='Probability of having\nRisky (caution) HAB event\n(Microcystis > 4,000 cells/ml) (%)',
            color=['#1f77b4', '#ff7f0e'],
            ylim=(min_val, max_val),
            line_width=3  # Thicker lines for better visibility
        ).opts(
            legend_position='bottom_right',
            tools=['hover'],
            yformatter='%.0f%%',  # Format as percentage
            toolbar=None,
            fontsize={'title': 16, 'labels': 14, 'xticks': 14, 'yticks': 14}  # Increased font sizes
        )
        
        # Manually remove the "Variable" label by directly manipulating the plot object
        if hasattr(plot, 'legend') and hasattr(plot.legend[0], 'title'):
            plot.legend[0].title = None
        
        self.sensitivity_analysis_plot.object = plot

    def create_dashboard_layout(self):
        # Headers with consistent styling and larger text
        input_header = pn.pane.Markdown("## Input Environmental Variables", css_classes=['section-header'])
        results_header = pn.pane.Markdown("## Prediction Results", css_classes=['section-header'])
        comparison_header = pn.pane.Markdown("## Model Comparison", css_classes=['section-header'])
        sensitivity_header = pn.pane.Markdown("## Sensitivity Analysis", css_classes=['section-header'])

        # Apply CSS styles with larger text
        style = """
        .section-header {
            background-color: #2c3e50;
            color: white;
            padding: 10px;
            margin-bottom: 10px;
            border-radius: 5px;
            font-size: 22px;  /* Increased font size from 18px to 22px */
        }
        
        .bk-root {
            font-family: Arial, sans-serif;
            font-size: 16px;  /* Increased base font size from 14px to 16px */
        }
        
        /* Better visibility for elements */
        .panel-widget-box {
            border: 1px solid #ddd;
            border-radius: 5px;
            background-color: #f9f9f9;
            padding: 5px;
            margin-bottom: 10px;
        }
        
        /* Make all widget labels larger */
        .bk-root .bk-input-group label {
            font-size: 16px !important;  /* Increased from 14px to 16px */
        }
        
        /* Make axis labels and tick labels larger */
        .bk-root .bk-axis .bk-axis-label {
            font-size: 16px !important;  /* Increased from 14px to 16px */
        }
        
        .bk-root .bk-axis .bk-tick text {
            font-size: 14px !important;  /* Increased from 12px to 14px */
        }
        
        /* Make plot titles larger */
        .bk-root .bk-plot-title {
            font-size: 18px !important;  /* Increased from 16px to 18px */
        }
        
        /* Make legend text larger */
        .bk-root .bk-legend-label {
            font-size: 16px !important;  /* Increased from 14px to 16px */
        }
        """
        pn.extension(raw_css=[style])

        # Left side layout: inputs and sensitivity analysis
        left_col = pn.Column(
            input_header,
            *[self.feature_sliders[feat] for feat in self.feature_ranges.keys()],
            sensitivity_header,
            self.sensitivity_feature_selector,
            self.sensitivity_analysis_plot,
            width=750,  # Increased width to accommodate larger plots
            scroll=True
        )

        # Right side layout with map next to predictions
        model_results = pn.Column(
            results_header,
            pn.Row(
                pn.Column(
                    self.random_forest_markdown,
                    self.xgboost_markdown,
                    width=450  # Increased width
                ),
                pn.Column(
                    self.map_image,
                    self.map_description,
                    width=500
                )
            ),
            comparison_header,
            self.model_comparison_plot,
            width=950  # Increased width
        )

        # Main layout
        layout = pn.Column(
            pn.Row(self.logo, sizing_mode='stretch_width'),
            pn.Row(
                left_col,
                model_results
            ),
            self.disclaimer,
            sizing_mode='stretch_width'
        )

        return layout

# Create the dashboard and make it servable
dashboard_app = ToxinPredictionDashboard()
dashboard = dashboard_app.create_dashboard_layout()
dashboard.servable()
# dashboard.show()

==== DASHBOARD INITIALIZATION ====
Current working directory: c:\DWR\HAB\HAB-main
Directory contents: ['Data_2', 'environment.yml', 'habdashboard.ipynb', 'HAB_Dashboard_9features_RFandXGB.ipynb', 'Logo.png', 'Map.png', 'Map.tif', 'Map1.png', 'Model', 'README.md', 'readme.pdf', 'run_server.sh', 'Sensitivity_Analysis_All_Variables.eps', 'Sensitivity_Analysis_All_Variables.png', 'Testing_RF_XGB_for _reproducability_V3.ipynb']
Model directory exists: True
Model directory contents: [WindowsPath('Model/1_results_with_9params.csv'), WindowsPath('Model/confusion_matrix_Random Forest_1.png'), WindowsPath('Model/confusion_matrix_XGBoost_1.png'), WindowsPath('Model/feature_importance_1.png'), WindowsPath('Model/rf_pipeline.joblib'), WindowsPath('Model/scaler.pkl'), WindowsPath('Model/test_results.csv'), WindowsPath('Model/xgb_pipeline.joblib')]
Trying model paths: Model\rf_pipeline.joblib, Model\xgb_pipeline.joblib
Found models at: Model\rf_pipeline.joblib, Model\xgb_pipeline.joblib
Using logo fr

Launching server at http://localhost:60002


<panel.io.server.Server at 0x154a1746af0>