## Section 1: Import Libraries

In [None]:
# Core data science libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Image processing
import cv2
from PIL import Image
import os
from pathlib import Path

# Statistics
from scipy import stats
from scipy.stats import mannwhitneyu, f_oneway, chi2_contingency
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Utilities
import json
import warnings
warnings.filterwarnings('ignore')

# Import analytics pipeline
import sys
sys.path.insert(0, '.')
from analytics_pipeline import (
    load_images_from_dataset,
    preprocess_image,
    extract_crack_features,
    extract_vegetation_features,
    compute_risk_score,
    build_dataframes,
    run_statistical_tests,
    export_dataset_analytics,
    export_image_insights
)

print('✅ All libraries imported successfully')

## Section 2: Load and Preprocess Images

In [None]:
# Set dataset paths
base_path = Path('.')
crack_dir = base_path / 'Dataset' / 'crack_preprocess'
vegetation_dir = base_path / 'Dataset' / 'vegetation_preprocess'

print(f'Crack dataset path: {crack_dir}')
print(f'Vegetation dataset path: {vegetation_dir}')

# Load all images
print('\nLoading datasets...')
crack_data, vegetation_data = load_images_from_dataset(
    crack_dir=str(crack_dir),
    vegetation_dir=str(vegetation_dir),
    target_size=(640, 640)
)

print(f'\n✅ Crack images: {len(crack_data["images"])}')
print(f'✅ Vegetation images: {len(vegetation_data["images"])}')

# Display sample images
if len(crack_data['images']) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(10, 4))
    axes[0].imshow(crack_data['images'][0])
    axes[0].set_title(f'Sample Crack: {crack_data["filenames"][0]}')
    axes[0].axis('off')
    
    if len(vegetation_data['images']) > 0:
        axes[1].imshow(vegetation_data['images'][0])
        axes[1].set_title(f'Sample Vegetation: {vegetation_data["filenames"][0]}')
        axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()

## Section 3: Extract Features from All Images

In [None]:
# Extract crack features
print('Extracting crack features...')
crack_features_list = []
crack_risk_scores = []

for i, img in enumerate(crack_data['images']):
    try:
        features = extract_crack_features(img)
        risk_score = compute_risk_score(features, feature_type='crack')
        crack_features_list.append(features)
        crack_risk_scores.append(risk_score)
    except Exception as e:
        print(f'Error processing crack image {i}: {e}')
        # Add default features
        crack_features_list.append({f: 0.0 for f in ['crack_pixel_ratio', 'edge_density', 'skeleton_length_proxy', 'glcm_entropy', 'brightness', 'color_mean_r', 'color_mean_g', 'color_mean_b', 'roughness']})
        crack_risk_scores.append(0.0)

print(f'✅ Extracted {len(crack_features_list)} crack feature sets')

# Extract vegetation features
print('\nExtracting vegetation features...')
vegetation_features_list = []
vegetation_risk_scores = []

for i, img in enumerate(vegetation_data['images']):
    try:
        features = extract_vegetation_features(img)
        risk_score = compute_risk_score(features, feature_type='vegetation')
        vegetation_features_list.append(features)
        vegetation_risk_scores.append(risk_score)
    except Exception as e:
        print(f'Error processing vegetation image {i}: {e}')
        vegetation_features_list.append({f: 0.0 for f in ['vegetation_coverage', 'green_index_mean', 'glcm_entropy', 'brightness', 'color_mean_r', 'color_mean_g', 'color_mean_b', 'roughness', 'saturation_mean']})
        vegetation_risk_scores.append(0.0)

print(f'✅ Extracted {len(vegetation_features_list)} vegetation feature sets')

# Show sample features
print('\nSample Crack Features:')
print(crack_features_list[0] if crack_features_list else 'No crack images')

print('\nSample Vegetation Features:')
print(vegetation_features_list[0] if vegetation_features_list else 'No vegetation images')

## Section 4: Build Analytical DataFrames

In [None]:
# Build DataFrames
print('Building DataFrames...')
df_crack, df_vegetation = build_dataframes(
    crack_data=crack_data,
    vegetation_data=vegetation_data,
    crack_features_list=crack_features_list,
    vegetation_features_list=vegetation_features_list,
    crack_risk_scores=crack_risk_scores,
    vegetation_risk_scores=vegetation_risk_scores
)

print(f'✅ Crack DataFrame: {df_crack.shape}')
print(f'✅ Vegetation DataFrame: {df_vegetation.shape}')

print('\nCrack DataFrame Info:')
print(df_crack.head())

print('\nCrack Statistics:')
print(df_crack.describe())

print('\nVegetation DataFrame Info:')
print(df_vegetation.head())

print('\nVegetation Statistics:')
print(df_vegetation.describe())

## Section 5: Generate Visualizations

In [None]:
# Create comprehensive visualization dashboard
print('Creating visualization dashboard...')

fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(4, 3, hspace=0.3, wspace=0.3)

# Row 1: Crack features
ax1 = fig.add_subplot(gs[0, 0])
if 'crack_pixel_ratio' in df_crack.columns:
    ax1.hist(df_crack['crack_pixel_ratio'], bins=20, color='crimson', alpha=0.7, edgecolor='black')
    ax1.set_title('Crack Pixel Ratio Distribution')
    ax1.set_xlabel('Ratio')
    ax1.set_ylabel('Frequency')

ax2 = fig.add_subplot(gs[0, 1])
if 'edge_density' in df_crack.columns:
    ax2.hist(df_crack['edge_density'], bins=20, color='orange', alpha=0.7, edgecolor='black')
    ax2.set_title('Edge Density (Canny) Distribution')
    ax2.set_xlabel('Density')
    ax2.set_ylabel('Frequency')

ax3 = fig.add_subplot(gs[0, 2])
if 'severity' in df_crack.columns:
    severity_counts = df_crack['severity'].value_counts()
    ax3.bar(severity_counts.index, severity_counts.values, color='steelblue', edgecolor='black')
    ax3.set_title('Crack Severity Distribution')
    ax3.set_xlabel('Severity')
    ax3.set_ylabel('Count')
    ax3.tick_params(axis='x', rotation=45)

# Row 2: Crack analysis
ax4 = fig.add_subplot(gs[1, 0])
if 'crack_pixel_ratio' in df_crack.columns and 'edge_density' in df_crack.columns:
    scatter = ax4.scatter(df_crack['crack_pixel_ratio'], df_crack['edge_density'], 
                          c=df_crack['risk_score'], cmap='RdYlGn_r', s=50, alpha=0.6)
    ax4.set_title('Crack Density vs Edge Density')
    ax4.set_xlabel('Crack Pixel Ratio')
    ax4.set_ylabel('Edge Density')
    plt.colorbar(scatter, ax=ax4, label='Risk Score')

ax5 = fig.add_subplot(gs[1, 1])
if 'risk_score' in df_crack.columns:
    ax5.hist(df_crack['risk_score'], bins=20, color='darkred', alpha=0.7, edgecolor='black')
    ax5.set_title('Crack Risk Score Distribution')
    ax5.set_xlabel('Risk Score')
    ax5.set_ylabel('Frequency')

ax6 = fig.add_subplot(gs[1, 2])
numeric_crack_cols = df_crack.select_dtypes(include=[np.number]).columns[:8]
if len(numeric_crack_cols) > 1:
    corr_matrix = df_crack[numeric_crack_cols].corr()
    sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0, ax=ax6, cbar=True)
    ax6.set_title('Crack Features Correlation')
    ax6.tick_params(axis='x', rotation=45)

# Row 3: Vegetation features
ax7 = fig.add_subplot(gs[2, 0])
if 'vegetation_coverage' in df_vegetation.columns:
    ax7.hist(df_vegetation['vegetation_coverage'], bins=20, color='green', alpha=0.7, edgecolor='black')
    ax7.set_title('Vegetation Coverage Distribution')
    ax7.set_xlabel('Coverage %')
    ax7.set_ylabel('Frequency')

ax8 = fig.add_subplot(gs[2, 1])
if 'type' in df_vegetation.columns:
    type_counts = df_vegetation['type'].value_counts()
    ax8.bar(type_counts.index, type_counts.values, color='forestgreen', edgecolor='black')
    ax8.set_title('Vegetation Type Distribution')
    ax8.set_xlabel('Type')
    ax8.set_ylabel('Count')
    ax8.tick_params(axis='x', rotation=45)

ax9 = fig.add_subplot(gs[2, 2])
if 'vegetation_coverage' in df_vegetation.columns and 'green_index_mean' in df_vegetation.columns:
    scatter = ax9.scatter(df_vegetation['vegetation_coverage'], df_vegetation['green_index_mean'],
                          c=df_vegetation['risk_score'], cmap='RdYlGn_r', s=50, alpha=0.6)
    ax9.set_title('Coverage vs Green Index')
    ax9.set_xlabel('Coverage')
    ax9.set_ylabel('Green Index')
    plt.colorbar(scatter, ax=ax9, label='Risk Score')

# Row 4: Overall metrics
ax10 = fig.add_subplot(gs[3, 0])
if 'risk_score' in df_vegetation.columns:
    ax10.hist(df_vegetation['risk_score'], bins=20, color='darkgreen', alpha=0.7, edgecolor='black')
    ax10.set_title('Vegetation Risk Score Distribution')
    ax10.set_xlabel('Risk Score')
    ax10.set_ylabel('Frequency')

ax11 = fig.add_subplot(gs[3, 1])
numeric_veg_cols = df_vegetation.select_dtypes(include=[np.number]).columns[:8]
if len(numeric_veg_cols) > 1:
    corr_matrix = df_vegetation[numeric_veg_cols].corr()
    sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0, ax=ax11, cbar=True)
    ax11.set_title('Vegetation Features Correlation')
    ax11.tick_params(axis='x', rotation=45)

ax12 = fig.add_subplot(gs[3, 2])
if 'split' in df_crack.columns:
    combined_risk = pd.concat([
        df_crack[['split', 'risk_score']].assign(type='Crack'),
        df_vegetation[['split', 'risk_score']].assign(type='Vegetation')
    ])
    split_order = ['train', 'test', 'valid']
    splits_present = [s for s in split_order if s in combined_risk['split'].values]
    data_by_split = [combined_risk[combined_risk['split'] == s]['risk_score'].values for s in splits_present]
    ax12.boxplot(data_by_split, labels=splits_present)
    ax12.set_title('Risk Score by Dataset Split')
    ax12.set_ylabel('Risk Score')

plt.suptitle('Infrastructure Health Analytics Dashboard', fontsize=16, fontweight='bold', y=0.995)
plt.savefig('analytics_dashboard.png', dpi=150, bbox_inches='tight')
print('✅ Dashboard saved as analytics_dashboard.png')
plt.show()

## Section 6: Run Statistical Hypothesis Tests

In [None]:
# Run statistical tests
print('Running statistical hypothesis tests...\n')
statistical_tests = run_statistical_tests(df_crack, df_vegetation)

# Display results
for i, test in enumerate(statistical_tests, 1):
    print(f"Test {i}: {test['test_name']}")
    print(f"  Description: {test.get('description', 'N/A')}")
    print(f"  P-value: {test['p_value']:.6f}")
    print(f"  Significant (α=0.05): {test['significant']}")
    print(f"  Interpretation: {test['interpretation']}")
    if 'r_squared' in test:
        print(f"  R²: {test['r_squared']:.4f}")
    print()

## Section 7: Export Dataset Analytics JSON

In [None]:
# Compute statistics
from analytics_pipeline.statistics import compute_dataset_statistics

print('Computing dataset statistics...')
crack_stats = compute_dataset_statistics(df_crack, prefix='crack_')
vegetation_stats = compute_dataset_statistics(df_vegetation, prefix='vegetation_')

# Export JSON
print('\nExporting dataset analytics JSON...')
json_path = export_dataset_analytics(
    df_crack=df_crack,
    df_vegetation=df_vegetation,
    statistical_tests=statistical_tests,
    crack_stats=crack_stats,
    vegetation_stats=vegetation_stats,
    output_path='dataset_analytics.json'
)

# Show sample of exported data
with open('dataset_analytics.json', 'r') as f:
    data = json.load(f)
    print('\nExported JSON structure:')
    print(json.dumps({k: type(v).__name__ for k, v in data.items()}, indent=2))
    print(f"\nTotal size: {len(json.dumps(data)) / 1024:.1f} KB")

## Section 8: ImageInsightsAnalyzer Class

In [None]:
class ImageInsightsAnalyzer:
    """Analyzes a single image relative to dataset statistics."""
    
    def __init__(self, dataset_analytics_path='dataset_analytics.json'):
        """Initialize with dataset statistics."""
        with open(dataset_analytics_path, 'r') as f:
            self.dataset = json.load(f)
    
    def analyze_image(self, image_metrics):
        """
        Analyze a single image.
        
        Args:
            image_metrics: Dict with keys:
                - crack_risk_score
                - vegetation_risk_score
                - moisture_intensity
                - stress_index
                - etc.
        
        Returns:
            Dict with analysis results
        """
        from analytics_pipeline.export_json import (
            compute_health_score,
            get_risk_level,
            generate_insights,
            compute_overlap_analysis,
            compute_contribution_breakdown,
            generate_radar_chart_data,
            generate_summary
        )
        
        # Extract dataset stats
        crack_stats = self.dataset['crack_analysis']['metrics']
        vegetation_stats = self.dataset['vegetation_analysis']['metrics']
        
        all_stats = {**crack_stats, **vegetation_stats}
        
        # Compute health and risk
        health_score = compute_health_score(image_metrics)
        risk_level = get_risk_level(health_score)
        
        # Generate insights
        insights = self.generate_detailed_insights(image_metrics, all_stats)
        
        # Overlap and contribution
        overlap = compute_overlap_analysis(image_metrics)
        contribution = compute_contribution_breakdown(image_metrics)
        
        # Radar chart
        radar_data = generate_radar_chart_data(image_metrics, all_stats)
        
        return {
            'summary': generate_summary(health_score, risk_level, image_metrics),
            'health_score': int(health_score),
            'risk_level': risk_level,
            'radar_chart_data': radar_data,
            'overlap_analysis': overlap,
            'contribution_breakdown': contribution,
            'insights': insights
        }
    
    def generate_detailed_insights(self, image_metrics, dataset_stats):
        """Generate actionable insights."""
        insights = []
        
        # Crack insights
        if image_metrics.get('crack_risk_score', 0) > 0.5:
            insights.append({
                'type': 'warning',
                'message': 'High crack density detected. Surface integrity at risk. Schedule inspection.'
            })
        
        # Vegetation insights
        if image_metrics.get('vegetation_risk_score', 0) > 0.5:
            insights.append({
                'type': 'warning',
                'message': 'Significant biological growth detected. Accelerates moisture retention and degradation.'
            })
        
        # Moisture insights
        if image_metrics.get('moisture_intensity', 0) > 0.6:
            insights.append({
                'type': 'warning',
                'message': 'High moisture detected in multiple zones. Risk of accelerated corrosion.'
            })
        
        # Combined risk
        combined_risk = (image_metrics.get('crack_risk_score', 0) + 
                        image_metrics.get('vegetation_risk_score', 0) + 
                        image_metrics.get('moisture_intensity', 0)) / 3
        
        if combined_risk > 0.6:
            insights.append({
                'type': 'warning',
                'message': 'Multiple degradation factors detected. Recommend immediate detailed assessment.'
            })
        elif len(insights) == 0:
            insights.append({
                'type': 'ok',
                'message': 'Surface condition is stable. Continue routine monitoring.'
            })
        
        return insights[:5]  # Limit to top 5

print('✅ ImageInsightsAnalyzer class defined')

## Section 9: Example Per-Image Analysis

In [None]:
# Example image metrics (simulated)
example_image_metrics = {
    'crack_risk_score': 0.62,
    'vegetation_risk_score': 0.35,
    'moisture_intensity': 0.58,
    'stress_index': 0.42,
    'thermal_hotspot_score': 0.21,
    'material_durability': 0.68
}

# Analyze
print('Analyzing example image...\n')
analyzer = ImageInsightsAnalyzer('dataset_analytics.json')
image_analysis = analyzer.analyze_image(example_image_metrics)

# Display results
print('Image Analysis Results:')
print(f"  Health Score: {image_analysis['health_score']}/100")
print(f"  Risk Level: {image_analysis['risk_level']}")
print(f"  Summary: {image_analysis['summary']}")
print(f"\n  Insights:")
for insight in image_analysis['insights']:
    print(f"    [{insight['type'].upper()}] {insight['message']}")

print(f"\n  Overlap Analysis:")
for key, value in image_analysis['overlap_analysis'].items():
    print(f"    {key}: {value:.1f}%")

print(f"\n  Feature Contribution:")
for contrib in image_analysis['contribution_breakdown']:
    print(f"    {contrib['feature']}: {contrib['contribution_to_risk']:.1f} (weight: {contrib['weight']:.2f})")

# Export example to JSON
with open('example_image_insights.json', 'w') as f:
    json.dump(image_analysis, f, indent=2)
print('\n✅ Example image insights exported to example_image_insights.json')

## Section 10: React Architecture & Data Persistence Guide

### Problem: Data Disappears on Tab Switch

When users switch between tabs (e.g., ImageAnalysis → ImageInsights → ImageAnalysis), the analysis results disappear.

**Root Cause:** Component unmount. When you leave a tab, the component is destroyed, and local state is lost.

### Solution: Shared State Pattern ("Lift State Up")

Move the `lastAnalysis` state to the parent component (MainDashboard) so it persists across tab switches.

#### Implementation:

**1. Create AnalysisContext (new file: `frontend/src/contexts/AnalysisContext.js`):**

```jsx
import React, { createContext, useState } from 'react';

export const AnalysisContext = createContext();

export const AnalysisProvider = ({ children }) => {
  const [lastAnalysis, setLastAnalysis] = useState(null);
  
  return (
    <AnalysisContext.Provider value={{ lastAnalysis, setLastAnalysis }}>
      {children}
    </AnalysisContext.Provider>
  );
};
```

**2. Update MainDashboard.jsx:**

```jsx
import { AnalysisProvider } from './contexts/AnalysisContext';
import ImageAnalysis from './pages/ImageAnalysis';
import ImageInsights from './pages/ImageInsights';

const MainDashboard = () => {
  return (
    <AnalysisProvider>
      <Navbar />
      <div className="tabs-container">
        <Tab label="Image Analysis"><ImageAnalysis /></Tab>
        <Tab label="Image Insights"><ImageInsights /></Tab>
        {/* other tabs */}
      </div>
    </AnalysisProvider>
  );
};
```

**3. Update ImageAnalysis.jsx:**

```jsx
import { useContext } from 'react';
import { AnalysisContext } from '../contexts/AnalysisContext';

const ImageAnalysis = () => {
  const { setLastAnalysis } = useContext(AnalysisContext);
  
  const handleAnalyze = async (file) => {
    const response = await fetch('/api/analyze', { /* ... */ });
    const results = await response.json();
    setLastAnalysis(results); // ← Persist the results
    setImages(results.images);
  };
  
  return (
    <div>{/* render 9 output images */}</div>
  );
};
```

**4. Update ImageInsights.jsx (new component):**

```jsx
import { useContext, useEffect, useState } from 'react';
import { AnalysisContext } from '../contexts/AnalysisContext';

const ImageInsights = () => {
  const { lastAnalysis } = useContext(AnalysisContext);
  const [insights, setInsights] = useState(null);
  
  useEffect(() => {
    if (lastAnalysis) {
      // Fetch insights from backend
      fetch('/api/analytics/last_image')
        .then(r => r.json())
        .then(data => setInsights(data));
    }
  }, [lastAnalysis]);
  
  if (!lastAnalysis || !insights) {
    return <div>No image analyzed yet. Analyze an image first.</div>;
  }
  
  return (
    <div>
      <h2>Image Insights</h2>
      <div className="health-card">
        <h3>Health Score: {insights.health_score}/100</h3>
        <p>Risk Level: {insights.risk_level}</p>
      </div>
      {/* Render radar chart, overlap, contribution, insights */}
    </div>
  );
};
```

### Result:

✅ User uploads image in ImageAnalysis  
✅ Results saved to context (lastAnalysis)  
✅ User switches to ImageInsights → data persists  
✅ User switches back to ImageAnalysis → data still there  
✅ Can switch tabs infinitely without losing results  

### Backend Requirements:

```python
# /api/analyze - Returns metrics + image URLs
@app.route('/api/analyze', methods=['POST'])
def analyze_image():
    # ... process image ...
    return jsonify({
        'images': ['url1', 'url2', ...],  # 9 output images
        'metrics': {
            'crack_risk_score': 0.62,
            'vegetation_risk_score': 0.35,
            # ... etc
        }
    })

# /api/analytics/last_image - Returns detailed insights
@app.route('/api/analytics/last_image')
def get_last_image_insights():
    # Load last_analysis.json
    # Use ImageInsightsAnalyzer to compute insights
    return jsonify(insights)
```