In [None]:
# Install the H2O library
!pip install -q h2o
!pip install -q ydata-profiling
!pip install -q scikit-learn matplotlib seaborn
# Import necessary libraries
import pandas as pd
from google.colab import files, userdata
import google.generativeai as genai
import io
import h2o
from h2o.automl import H2OAutoML
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.9/265.9 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import base64
from google.colab import files
import io
import re
import numpy as np

In [None]:
# --- Securely configure the Gemini API ---
try:

    GEMINI_API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=GEMINI_API_KEY)
except userdata.SecretNotFoundError:
    raise ValueError("GEMINI_API_KEY not found. Please set it in Colab Secrets.")

# --- Initialize the H2O Cluster ---

h2o.init(max_mem_size="4G")

print("✅ Gemini API and H2O Cluster configured successfully.")

Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,2 mins 09 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,5 months and 16 days
H2O_cluster_name:,H2O_from_python_unknownUser_hapr55
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


✅ Gemini API and H2O Cluster configured successfully.


In [None]:
# --- Upload the Dataset ---
print("Please upload your CSV data file.")
from google.colab import files
uploaded = files.upload()

if not uploaded:
    raise ValueError("No file uploaded. Please run the cell again to upload your data.")

file_name = next(iter(uploaded))
clear_output(wait=True)
print(f"📄 Dataset '{file_name}' loaded successfully.")
df = pd.read_csv(io.BytesIO(uploaded[file_name]))


# --- Clean the Data ---
print("\n🧹 Cleaning data...")

# 1. Clean column names (remove special characters and extra spaces)
def clean_col_names(df):
    cols = df.columns
    new_cols = []
    for col in cols:
        new_col = re.sub(r'\[\d+\]', '', col) # Remove bracketed numbers like [50]
        new_col = re.sub(r'[^a-zA-Z0-9_]', '', col) # Remove non-alphanumeric characters
        new_cols.append(new_col)
    df.columns = new_cols
    return df

df = clean_col_names(df)

# 2. Convert object columns with numbers into proper numeric types
for col in df.columns:
    if df[col].dtype == 'object':
        try:
            # Attempt to convert to numeric, removing commas
            df[col] = df[col].str.replace(',', '').astype(float)
        except (ValueError, AttributeError):
            # If it fails, it's a true string column, so we leave it
            pass

print("✅ Data cleaning complete.")
print("\n--- Cleaned Data Preview ---")
display(df.head())
print("\n--- Final Data Types ---")
df.info()

📄 Dataset 'insurance (3).csv' loaded successfully.

🧹 Cleaning data...
✅ Data cleaning complete.

--- Cleaned Data Preview ---


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552



--- Final Data Types ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [None]:
# --- User Defines the Problem Type ---
problem_type_widget = widgets.Dropdown(
    options=['Regression', 'Classification'],
    value='Regression', # Defaults to Regression
    description='Problem Type:',
    disabled=False,
)

print("Please select the type of machine learning problem.")
display(problem_type_widget)

Please select the type of machine learning problem.


Dropdown(description='Problem Type:', options=('Regression', 'Classification'), value='Regression')

In [None]:
# --- Convert data to an H2OFrame ---
h2o_df = h2o.H2OFrame(df)

# --- Define Target and Predictors ---
target_column = 'charges' # We know the target is 'charges'

if target_column not in h2o_df.columns:
    raise ValueError(f"Target column '{target_column}' not found in the cleaned dataset.")

# Get the problem type from your selection
problem_type = problem_type_widget.value
print(f"Task identified by user as: {problem_type.upper()}")

if problem_type == 'Classification':
    # This line will only run if you select Classification, avoiding the previous error
    h2o_df[target_column] = h2o_df[target_column].asfactor()

predictors = h2o_df.columns
predictors.remove(target_column)


# --- Run H2O AutoML ---
print("\n💪 Training and comparing models with H2O AutoML...")
aml = H2OAutoML(max_runtime_secs=300, seed=1)
aml.train(x=predictors, y=target_column, training_frame=h2o_df)

# --- Display the Performance Leaderboard ---
print("\n🏆 Model Performance Leaderboard:")
leaderboard = aml.leaderboard
display(leaderboard.as_data_frame())

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Task identified by user as: REGRESSION

💪 Training and comparing models with H2O AutoML...
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%

🏆 Model Performance Leaderboard:





Unnamed: 0,model_id,rmse,mse,mae,rmsle,mean_residual_deviance
0,StackedEnsemble_BestOfFamily_4_AutoML_3_202509...,4511.406038,2.035278e+07,2464.804255,0.405988,2.035278e+07
1,GBM_grid_1_AutoML_3_20250913_180758_model_2,4537.487750,2.058880e+07,2533.684130,0.421353,2.058880e+07
2,GBM_grid_1_AutoML_3_20250913_180758_model_54,4538.646577,2.059931e+07,2562.340370,0.425482,2.059931e+07
3,XGBoost_grid_1_AutoML_3_20250913_180758_model_29,4560.349192,2.079678e+07,2503.212796,0.420254,2.079678e+07
4,GBM_grid_1_AutoML_3_20250913_180758_model_36,4582.605418,2.100027e+07,2591.165262,0.420036,2.100027e+07
...,...,...,...,...,...,...
117,GBM_grid_1_AutoML_3_20250913_180758_model_13,6521.342632,4.252791e+07,4586.840638,,4.252791e+07
118,GBM_grid_1_AutoML_3_20250913_180758_model_18,6544.966847,4.283659e+07,4544.830755,0.630293,4.283659e+07
119,GBM_grid_1_AutoML_3_20250913_180758_model_30,6853.573406,4.697147e+07,4823.443330,0.588216,4.697147e+07
120,XGBoost_grid_1_AutoML_3_20250913_180758_model_41,11906.034190,1.417537e+08,8066.548800,1.128345,1.417537e+08


In [None]:
profile = ProfileReport(df, minimal=True, title="Auto EDA Report")
profile.to_file("eda_report.html")

print("✅ EDA report generated: eda_report.html")
files.download("eda_report.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


100%|██████████| 7/7 [00:00<00:00, 106.80it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

✅ EDA report generated: eda_report.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import base64
from google.colab import files
import io
import json
import pandas as pd
import numpy as np

print("\n🚀 Generating Interactive Analysis Dashboard (True Correlation Heatmap Version)...")

# --- 0. DEBUG: Confirm required variables ---
if 'df' not in globals():
    raise NameError("❌ 'df' DataFrame not found. Please load your dataset first.")
if 'target_column' not in globals():
    raise NameError("❌ 'target_column' variable not defined. Please set it (e.g., target_column = 'target')")
if 'problem_type' not in globals():
    raise NameError("❌ 'problem_type' not defined. Set to 'Regression' or 'Classification'")
if 'aml' not in globals() or not hasattr(aml, 'leader'):
    raise NameError("❌ H2O AutoML model 'aml' not trained or not available. Train aml first!")
if 'h2o_df' not in globals():
    raise NameError("❌ 'h2o_df' (H2O Frame) not defined. Convert df to H2O frame with h2o.H2OFrame(df)")

file_name = "uploaded_dataset.csv"  # Customize if needed

print(f"✅ Using target_column: {target_column}")
print(f"✅ Problem type: {problem_type}")
print(f"✅ Best model: {aml.leader.model_id}")

# --- 1. Prepare Data for All Visualizations ---
viz_data = {}
numeric_df = df.select_dtypes(include=np.number)

# --- Viz 1: Target Variable Distribution ---
print("  - 📊 Preparing Target Distribution data...")
try:
    if problem_type == 'Regression':
        hist, edges = np.histogram(df[target_column].dropna(), bins=20)
        viz_data['target_dist'] = {
            'type': 'bar',
            'labels': [f"{edges[i]:.2f}-{edges[i+1]:.2f}" for i in range(len(edges)-1)],
            'values': hist.tolist()
        }
    else: # Classification
        counts = df[target_column].value_counts()
        viz_data['target_dist'] = {
            'type': 'bar',
            'labels': counts.index.astype(str).tolist(),
            'values': counts.values.tolist()
        }
except Exception as e:
    print(f"    - ⚠️ Could not generate target distribution: {e}")

# --- Viz 2: Correlation Heatmap (TRUE HEATMAP WITH COLORBAR) ---
print("  - 📊 Preparing Correlation Heatmap data...")
try:
    if numeric_df.shape[1] >= 2:
        corr = numeric_df.corr()
        heatmap_data = []
        columns = corr.columns.tolist()

        for row_col in columns:
            for col_col in columns:
                val = corr.loc[row_col, col_col]
                if pd.isna(val) or np.isinf(val):
                    val = None
                heatmap_data.append({
                    'x': str(col_col),
                    'y': str(row_col),
                    'v': float(val) if val is not None else None
                })

        viz_data['correlation_heatmap'] = {
            'data': heatmap_data,
            'columns': columns
        }
        print(f"    - ✅ Generated correlation matrix: {len(columns)} × {len(columns)}")
    else:
        print("    - ⚠️ Not enough numeric columns (>1) for correlation matrix.")
except Exception as e:
    print(f"    - ⚠️ Could not generate correlation heatmap: {e}")

# --- Viz 3: Cluster Analysis ---
print("  - 📊 Preparing Cluster Analysis data...")
try:
    if numeric_df.shape[1] > 1 and len(numeric_df) > 10:
        scaled_data = StandardScaler().fit_transform(numeric_df.fillna(0))
        pca = PCA(n_components=2)
        components = pca.fit_transform(scaled_data)
        kmeans = KMeans(n_clusters=min(6, len(numeric_df)//10), random_state=42, n_init=10).fit(scaled_data)

        # Vibrant, distinct cluster colors
        cluster_colors = [
            '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
            '#DDA0DD', '#FFB347', '#87CEEB', '#F08080', '#98D8C8',
            '#FFD166', '#06D6A0', '#118AB2', '#EF476F', '#073B4C'
        ]

        viz_data['clusters'] = {
            'data': [{'x': float(c[0]), 'y': float(c[1])} for c in components],
            'labels': kmeans.labels_.tolist(),
            'colors': [cluster_colors[label % len(cluster_colors)] for label in kmeans.labels_]
        }
        print(f"    - ✅ Clustered {len(kmeans.labels_)} points into {len(set(kmeans.labels_))} clusters.")
    else:
        print("    - ⚠️ Not enough data or features for clustering.")
except Exception as e:
    print(f"    - ⚠️ Could not generate cluster plot: {e}")

# --- Viz 4: Model Performance ---
print("  - 📊 Preparing Model Performance data...")
try:
    best_model = aml.leader
    preds = best_model.predict(h2o_df).as_data_frame()
    actuals = df[target_column]

    if problem_type == 'Regression':
        sample_size = min(len(actuals), 1000)
        sample_indices = np.random.choice(len(actuals), sample_size, replace=False)
        viz_data['model_performance'] = {
            'type': 'regression',
            'data': [{'x': float(actuals.iloc[i]), 'y': float(preds.iloc[i, 0])}
                     for i in sample_indices
                     if pd.notna(actuals.iloc[i]) and pd.notna(preds.iloc[i, 0])]
        }
        print(f"    - ✅ Regression performance: {len(viz_data['model_performance']['data'])} points sampled.")
    else: # Classification
        labels = sorted(list(set(actuals.unique()) | set(preds['predict'].unique())))
        cm = confusion_matrix(actuals, preds['predict'], labels=labels)
        viz_data['model_performance'] = {
            'type': 'classification',
            'labels': [str(l) for l in labels],
            'matrix': [[int(v) for v in row] for row in cm]
        }
        print(f"    - ✅ Classification confusion matrix: {len(labels)} classes.")
except Exception as e:
    print(f"    - ⚠️ Could not generate model performance plot: {e}")

# --- 2. Sanitize All Data for JSON ---
def sanitize_data(data):
    if isinstance(data, dict):
        return {k: sanitize_data(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [sanitize_data(item) for item in data]
    elif isinstance(data, float):
        if np.isnan(data) or np.isinf(data):
            return None
        return data
    elif isinstance(data, np.integer):
        return int(data)
    elif isinstance(data, np.floating):
        return float(data)
    elif isinstance(data, np.ndarray):
        return sanitize_data(data.tolist())
    else:
        return data

viz_data_clean = sanitize_data(viz_data)

# --- 3. Generate JavaScript for Charts (ONLY 4 TABS NOW) ---
js_chart_functions = []
js_switch_cases = []

# Color palette
chart_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7']

if viz_data_clean.get('target_dist'):
    js_chart_functions.append(f"""
        function initTargetChart() {{
            try {{
                if (vizData.target_dist && !chartInstances.target) {{
                    chartInstances.target = new Chart(document.getElementById('targetChart'), {{
                        type: 'bar',
                        data: {{ labels: vizData.target_dist.labels, datasets: [{{ label: '{target_column} Distribution', data: vizData.target_dist.values, backgroundColor: '{chart_colors[0]}', borderColor: '{chart_colors[0].replace("ff", "cc")}', borderWidth: 1 }}] }},
                        options: {{
                            maintainAspectRatio: false,
                            plugins: {{ title: {{ display: true, text: 'Distribution of Target Variable: {target_column}' }} }},
                            scales: {{ y: {{ beginAtZero: true }} }}
                        }}
                    }});
                }}
            }} catch (err) {{ console.error("Error initializing target chart:", err); }}
        }}
    """)
    js_switch_cases.append("case 'target': initTargetChart(); break;")

if viz_data_clean.get('model_performance'):
    if viz_data_clean['model_performance']['type'] == 'regression':
        js_chart_functions.append(f"""
            function initPerformanceChart() {{
                try {{
                    if (vizData.model_performance && !chartInstances.performance) {{
                        chartInstances.performance = new Chart(document.getElementById('performanceChart'), {{
                            type: 'scatter',
                            data: {{ datasets: [{{ label: 'Actual vs. Predicted', data: vizData.model_performance.data, backgroundColor: '{chart_colors[1]}', borderColor: '{chart_colors[1].replace("ff", "cc")}', borderWidth: 1 }}] }},
                            options: {{
                                maintainAspectRatio: false,
                                plugins: {{ title: {{ display: true, text: 'Model Performance: Actual vs. Predicted Values (Sample)' }} }},
                                scales: {{
                                    x: {{ title: {{ display: true, text: 'Actual Values' }} }},
                                    y: {{ title: {{ display: true, text: 'Predicted Values' }} }}
                                }}
                            }}
                        }});
                    }}
                }} catch (err) {{ console.error("Error initializing performance chart:", err); }}
            }}
        """)
    else: # Classification
        js_chart_functions.append(f"""
            function initPerformanceChart() {{
                try {{
                    if (vizData.model_performance && !chartInstances.performance) {{
                        const labels = vizData.model_performance.labels;
                        const matrix = vizData.model_performance.matrix;
                        const datasetData = [];
                        for (let i = 0; i < matrix.length; i++) {{
                            for (let j = 0; j < matrix[i].length; j++) {{
                                datasetData.push({{ x: labels[j], y: labels[i], v: matrix[i][j] }});
                            }}
                        }}
                        chartInstances.performance = new Chart(document.getElementById('performanceChart'), {{
                            type: 'matrix',
                            data: {{
                                datasets: [{{
                                    label: 'Confusion Matrix',
                                    data: datasetData,
                                    backgroundColor(ctx) {{
                                        const v = ctx.raw.v;
                                        const max = Math.max(...matrix.flat());
                                        const alpha = v === 0 ? 0.1 : 0.8 + (v / max) * 0.2;
                                        return `rgba(75, 192, 192, ${{alpha}})`;
                                    }},
                                    borderColor: '#333',
                                    borderWidth: 1,
                                    width: (ctx) => ctx.chart.width / labels.length,
                                    height: (ctx) => ctx.chart.height / labels.length
                                }}]
                            }},
                            options: {{
                                maintainAspectRatio: false,
                                plugins: {{
                                    title: {{ display: true, text: 'Confusion Matrix' }},
                                    legend: {{ display: false }},
                                    tooltip: {{
                                        callbacks: {{
                                            title: () => '',
                                            label: (c) => `Predicted: ${{c.raw.x}}, Actual: ${{c.raw.y}}<br>Count: ${{c.raw.v}}`
                                        }}
                                    }}
                                }},
                                scales: {{
                                    x: {{ ticks: {{ display: true, autoSkip: false }} }},
                                    y: {{ ticks: {{ display: true, offset: true, autoSkip: false }} }}
                                }}
                            }}
                        }});
                    }}
                }} catch (err) {{ console.error("Error initializing confusion matrix:", err); }}
            }}
        """)
    js_switch_cases.append("case 'performance': initPerformanceChart(); break;")

if viz_data_clean.get('clusters'):
    js_chart_functions.append(f"""
        function initClustersChart() {{
            try {{
                if (vizData.clusters && !chartInstances.clusters) {{
                    const uniqueLabels = [...new Set(vizData.clusters.labels)];
                    const datasets = uniqueLabels.map((label, i) => ({{
                        label: `Cluster ${{label}}`,
                        data: vizData.clusters.data.filter((d, idx) => vizData.clusters.labels[idx] === label),
                        backgroundColor: vizData.clusters.colors.filter((c, idx) => vizData.clusters.labels[idx] === label)[0] || '{chart_colors[2]}',
                        borderColor: '#333',
                        borderWidth: 1
                    }}));
                    chartInstances.clusters = new Chart(document.getElementById('clusterChart'), {{
                        type: 'scatter',
                        data: {{ datasets }},
                        options: {{
                            maintainAspectRatio: false,
                            plugins: {{
                                title: {{ display: true, text: 'Data Clusters (PCA Visualization)' }},
                                legend: {{ display: true, position: 'right' }}
                            }},
                            scales: {{
                                x: {{ title: {{ display: true, text: 'Principal Component 1' }} }},
                                y: {{ title: {{ display: true, text: 'Principal Component 2' }} }}
                            }}
                        }}
                    }});
                }}
            }} catch (err) {{ console.error("Error initializing clusters chart:", err); }}
        }}
    """)
    js_switch_cases.append("case 'clusters': initClustersChart(); break;")

if viz_data_clean.get('correlation_heatmap'):
    js_chart_functions.append(f"""
        function initCorrelationChart() {{
            try {{
                if (vizData.correlation_heatmap && !chartInstances.correlation) {{
                    const data = vizData.correlation_heatmap.data;
                    const columns = vizData.correlation_heatmap.columns;
                    const n = columns.length;
                    const canvas = document.getElementById('correlationChart');
                    const ctx = canvas.getContext('2d');

                    // Create tooltip if not exists
                    let tooltip;
                    if (!document.getElementById('correlation-tooltip')) {{
                        tooltip = document.createElement('div');
                        tooltip.id = 'correlation-tooltip';
                        tooltip.style.cssText = `
                            position: absolute;
                            background: rgba(0, 0, 0, 0.8);
                            color: white;
                            padding: 5px 8px;
                            border-radius: 3px;
                            pointer-events: none;
                            display: none;
                            z-index: 1000;
                            font-size: 12px;
                            white-space: nowrap;
                            font-family: Arial, sans-serif;
                        `;
                        document.body.appendChild(tooltip);
                    }} else {{
                        tooltip = document.getElementById('correlation-tooltip');
                    }}

                    // Margins for labels and colorbar
                    const leftMargin = 80;
                    const topMargin = 40;
                    const rightMargin = 40;
                    const gridWidth = canvas.width - leftMargin - rightMargin;
                    const gridHeight = canvas.height - topMargin;
                    const cellSize = Math.min(gridWidth / n, gridHeight / n);
                    const gridX = leftMargin;
                    const gridY = topMargin;

                    // Draw function
                    function drawHeatmap() {{
                        ctx.clearRect(0, 0, canvas.width, canvas.height);

                        // Draw cells
                        for (let i = 0; i < n; i++) {{
                            for (let j = 0; j < n; j++) {{
                                const val = data.find(d => d.x === columns[j] && d.y === columns[i])?.v;
                                if (val === null || val === undefined) continue;

                                const x = gridX + j * cellSize;
                                const y = gridY + i * cellSize;

                                // Color based on value
                                const hue = val > 0 ? 200 : 0; // Blue for positive, Red for negative
                                const alpha = Math.abs(val);
                                const color = `hsla(${{hue}}, 80%, 50%, ${{alpha}})`;

                                ctx.fillStyle = color;
                                ctx.fillRect(x, y, cellSize, cellSize);

                                // Draw border
                                ctx.strokeStyle = '#ddd';
                                ctx.lineWidth = 0.5;
                                ctx.strokeRect(x, y, cellSize, cellSize);

                                // Draw value with black text
                                ctx.fillStyle = 'black';
                                ctx.font = `bold ${{Math.max(8, cellSize * 0.3)}}px Arial`;
                                ctx.textAlign = 'center';
                                ctx.textBaseline = 'middle';
                                ctx.fillText(val.toFixed(2), x + cellSize / 2, y + cellSize / 2);
                            }}
                        }}

                        // Draw column labels (top)
                        ctx.fillStyle = 'black';
                        ctx.font = '12px Arial';
                        ctx.textAlign = 'center';
                        ctx.textBaseline = 'bottom';
                        for (let j = 0; j < n; j++) {{
                            ctx.fillText(columns[j], gridX + j * cellSize + cellSize / 2, gridY - 5);
                        }}

                        // Draw row labels (left)
                        ctx.textAlign = 'right';
                        ctx.textBaseline = 'middle';
                        for (let i = 0; i < n; i++) {{
                            ctx.fillText(columns[i], gridX - 5, gridY + i * cellSize + cellSize / 2);
                        }}
                        ctx.textAlign = 'left'; // Reset

                        // Add colorbar
                        const barWidth = 20;
                        const barX = gridX + gridWidth + 10;
                        const barY = gridY;
                        const barHeight = gridHeight;

                        // Gradient from red to blue
                        const gradient = ctx.createLinearGradient(barX, barY, barX, barY + barHeight);
                        gradient.addColorStop(0, 'red');
                        gradient.addColorStop(0.5, 'white');
                        gradient.addColorStop(1, 'blue');

                        ctx.fillStyle = gradient;
                        ctx.fillRect(barX, barY, barWidth, barHeight);

                        // Add colorbar ticks
                        ctx.fillStyle = 'black';
                        ctx.font = '10px Arial';
                        ctx.textAlign = 'left';
                        ctx.textBaseline = 'middle';
                        for (let i = 0; i <= 5; i++) {{
                            const v = -1 + (i / 5) * 2;
                            const y = barY + (i / 5) * barHeight;
                            ctx.fillText(`$ ${{v.toFixed(1)}}`, barX + barWidth + 5, y);
                        }}
                    }}

                    // Initial draw
                    drawHeatmap();

                    // Mouse events for interactivity
                    function handleMouseMove(event) {{
                        const rect = canvas.getBoundingClientRect();
                        const scaleX = canvas.width / rect.width;
                        const scaleY = canvas.height / rect.height;
                        const mouseX = (event.clientX - rect.left) * scaleX;
                        const mouseY = (event.clientY - rect.top) * scaleY;

                        const col = Math.floor((mouseX - gridX) / cellSize);
                        const row = Math.floor((mouseY - gridY) / cellSize);

                        if (col >= 0 && col < n && row >= 0 && row < n) {{
                            const val = data.find(d => d.x === columns[col] && d.y === columns[row])?.v;
                            if (val !== null && val !== undefined) {{
                                tooltip.innerHTML = `${{columns[row]}} vs ${{columns[col]}}: ${{val.toFixed(3)}}`;
                                tooltip.style.left = (event.pageX + 10) + 'px';
                                tooltip.style.top = (event.pageY - 10) + 'px';
                                tooltip.style.display = 'block';
                                return;
                            }}
                        }}
                        tooltip.style.display = 'none';
                    }}

                    canvas.addEventListener('mousemove', handleMouseMove);
                    canvas.addEventListener('mouseleave', () => tooltip.style.display = 'none');

                    // Store reference
                    chartInstances.correlation = {{ canvas: canvas, ctx: ctx, tooltip: tooltip }};
                }}
            }} catch (err) {{ console.error("Error initializing correlation chart:", err); }}
        }}
    """)
    js_switch_cases.append("case 'correlation': initCorrelationChart(); break;")

all_js_chart_functions = "\n".join(js_chart_functions)
all_js_switch_cases = "\n            ".join(js_switch_cases)

# --- 4. Generate the Interactive HTML Report (Only 4 Tabs Now) ---
html_report = f'''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Interactive AutoML Analysis for {file_name}</title>
    <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
    <style>
        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; margin: 0; background-color: #f0f2f5; color: #333; }}
        .container {{ max-width: 1200px; margin: 20px auto; background: #fff; border-radius: 8px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
        .header {{ background-color: #0052cc; color: white; padding: 20px; text-align: center; border-radius: 8px 8px 0 0; }}
        .header h1 {{ margin: 0; font-size: 2em; }}
        .header p {{ margin: 5px 0 0; opacity: 0.8; }}
        .tabs {{ display: flex; flex-wrap: wrap; background-color: #f8f9fa; border-bottom: 1px solid #dee2e6; }}
        .tab-button {{ padding: 15px 20px; cursor: pointer; border: none; background: none; font-size: 1em; color: #495057; }}
        .tab-button.active {{ background-color: #fff; color: #0052cc; border-bottom: 3px solid #0052cc; }}
        .tab-content {{ display: none; padding: 20px; animation: fadeIn 0.5s; }}
        .tab-content.active {{ display: block; }}
        .chart-container {{ position: relative; height: 500px; width: 100%; }}
        .info-box {{ padding: 15px; background-color: #e6f7ff; border-left: 5px solid #0052cc; margin-bottom: 20px; border-radius: 4px; line-height: 1.5; }}
        @keyframes fadeIn {{ from {{ opacity: 0; }} to {{ opacity: 1; }} }}
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>Interactive AutoML Analysis</h1>
            <p>Dataset: <strong>{file_name}</strong> | Target: <strong>{target_column}</strong></p>
        </div>
        <div class="tabs">
            <button class="tab-button active" onclick="openTab(event, 'summary')">Summary</button>
            <button class="tab-button" onclick="openTab(event, 'target')">Target Distribution</button>
            <button class="tab-button" onclick="openTab(event, 'performance')">Model Performance</button>
            <button class="tab-button" onclick="openTab(event, 'clusters')">Clusters</button>
            <button class="tab-button" onclick="openTab(event, 'correlation')">Correlation Matrix</button>
        </div>

        <div id="summary" class="tab-content active">
            <h2>Executive Summary</h2>
            <div class="info-box"><strong>Best Model:</strong> {aml.leader.model_id}</div>
            <div class="info-box"><strong>Key Findings:</strong> This dashboard focuses on core insights: target behavior, model accuracy, data clusters, and feature relationships via correlation. Explore each tab to uncover patterns.</div>
        </div>

        <!-- Placeholder divs for charts -->
        <div id="target" class="tab-content"><div class="chart-container"><canvas id="targetChart"></canvas></div></div>
        <div id="performance" class="tab-content"><div class="chart-container"><canvas id="performanceChart"></canvas></div></div>
        <div id="clusters" class="tab-content"><div class="chart-container"><canvas id="clusterChart"></canvas></div></div>
        <div id="correlation" class="tab-content"><div class="chart-container"><canvas id="correlationChart"></canvas></div></div>
    </div>

    <script>
        // --- Data from Python ---
        const vizData = {json.dumps(viz_data_clean, indent=4)};
        const chartInstances = {{}};

        // --- Tab Switching Logic ---
        function openTab(evt, tabName) {{
            let i, tabcontent, tabbuttons;
            tabcontent = document.getElementsByClassName("tab-content");
            for (i = 0; i < tabcontent.length; i++) {{
                tabcontent[i].style.display = "none";
            }}
            tabbuttons = document.getElementsByClassName("tab-button");
            for (i = 0; i < tabbuttons.length; i++) {{
                tabbuttons[i].className = tabbuttons[i].className.replace(" active", "");
            }}
            document.getElementById(tabName).style.display = "block";
            evt.currentTarget.className += " active";

            // Initialize chart on first view
            switch(tabName) {{
                {all_js_switch_cases}
            }}
        }}

        // --- Chart Generation Functions ---
        const CHART_COLORS = {{
            red: 'rgb(255, 99, 132)',
            orange: 'rgb(255, 159, 64)',
            yellow: 'rgb(255, 205, 86)',
            green: 'rgb(75, 192, 192)',
            blue: 'rgb(54, 162, 235)',
            purple: 'rgb(153, 102, 255)',
            grey: 'rgb(201, 203, 207)'
        }};
        const chartColorsArray = Object.values(CHART_COLORS);

        {all_js_chart_functions}

        // --- Initialize Summary Tab on Load ---
        window.onload = function() {{
            document.querySelector('.tab-button.active').click();
        }};
    </script>
</body>
</html>
'''

# --- 5. Save and Download the Report ---
report_filename = "Interactive_AutoML_Dashboard_Clean.html"
with open(report_filename, "w", encoding='utf-8') as f:
    f.write(html_report)
    print(f"\n  - ✅ Report content written to '{report_filename}'.")

files.download(report_filename)
print(f"\n✅ Success! The interactive dashboard '{report_filename}' is now downloading to your computer.")


🚀 Generating Interactive Analysis Dashboard (True Correlation Heatmap Version)...
✅ Using target_column: charges
✅ Problem type: Regression
✅ Best model: StackedEnsemble_BestOfFamily_4_AutoML_3_20250913_180758
  - 📊 Preparing Target Distribution data...
  - 📊 Preparing Correlation Heatmap data...
    - ✅ Generated correlation matrix: 4 × 4
  - 📊 Preparing Cluster Analysis data...
    - ✅ Clustered 1338 points into 6 clusters.
  - 📊 Preparing Model Performance data...
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
    - ✅ Regression performance: 1000 points sampled.






  - ✅ Report content written to 'Interactive_AutoML_Dashboard_Clean.html'.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✅ Success! The interactive dashboard 'Interactive_AutoML_Dashboard_Clean.html' is now downloading to your computer.


In [None]:
# --- Save the Best Model ---
print("\n🔧 Saving the best model for predictions...")
model_path = h2o.save_model(model=best_model_h2o, path="./", force=True)
print(f"\n✅ Best model has been saved as '{model_path}'")


🔧 Saving the best model for predictions...

✅ Best model has been saved as '/content/StackedEnsemble_BestOfFamily_4_AutoML_3_20250913_180758'


In [None]:
# --- Create an Interactive Prediction Interface ---
best_model_h2o = aml.leader
leaderboard_df = leaderboard.as_data_frame()
print("\n🚀 Loading prediction pipeline and creating interactive interface...")
prediction_pipeline = h2o.load_model(model_path)

input_widgets = {col: widgets.Text(description=col) for col in predictors}
predict_button = widgets.Button(description="Predict", button_style='success')
output_label = widgets.Label()

def on_predict_button_clicked(b):
    try:
        input_data = {col: [widget.value] for col, widget in input_widgets.items()}
        input_df = pd.DataFrame.from_dict(input_data)
        input_h2o_df = h2o.H2OFrame(input_df)
        prediction = prediction_pipeline.predict(input_h2o_df)
        predicted_value = prediction.as_data_frame().iloc[0,0]

        if problem_type == 'Classification':
            output_label.value = f'Predicted Class: {predicted_value}'
        else:
            output_label.value = f'Predicted Value: {predicted_value:.2f}'

        output_label.style.text_color = 'green'
    except Exception as e:
        output_label.value = f'Error: {str(e)}'

predict_button.on_click(on_predict_button_clicked)

print("\n📋 Enter new data below and click 'Predict':")
display(*input_widgets.values(), predict_button, output_label)


🚀 Loading prediction pipeline and creating interactive interface...

📋 Enter new data below and click 'Predict':





Text(value='', description='age')

Text(value='', description='sex')

Text(value='', description='bmi')

Text(value='', description='children')

Text(value='', description='smoker')

Text(value='', description='region')

Button(button_style='success', description='Predict', style=ButtonStyle())

Label(value='')

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%



