In [1]:
import shap
import sys
sys.path.append("D:/ASOML/SNOCONE")
import numpy as np
import pandas as pd
import os
from CNN_benchmarks import*
from CNN_memoryOptimization import*
from CNN_preProcessing import*
from CNN_benchmarks import*
from CNN_modelArchitectureBlocks import*
import tensorflow as tf
import gc
import psutil
import matplotlib.pyplot as plt
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import threading
final_activation = 'relu'

  from .autonotebook import tqdm as notebook_tqdm


modules imported


In [2]:
def run_shap_optimized_gpu_cpu(weights_path, X_sample, feature_names, featNo, architecture, final_activation, custom_loss_fn, output_dir=None):
    """
    Optimized SHAP analysis using both GPU and CPU efficiently
    """
    import tensorflow as tf
    import gc
    import psutil
    import matplotlib.pyplot as plt
    import multiprocessing as mp
    from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
    import threading
    
    def monitor_resources():
        memory = psutil.virtual_memory()
        cpu_percent = psutil.cpu_percent(interval=1)
        gpu_info = ""
        try:
            import GPUtil
            gpus = GPUtil.getGPUs()
            if gpus:
                gpu = gpus[0]
                gpu_info = f"GPU: {gpu.memoryUtil*100:.1f}% memory, {gpu.load*100:.1f}% load"
        except:
            gpu_info = "GPU info unavailable"
        
        print(f"Memory: {memory.percent:.1f}% ({memory.used/1024**3:.1f}GB/{memory.total/1024**3:.1f}GB)")
        print(f"CPU: {cpu_percent:.1f}% | {gpu_info}")
    
    def configure_gpu():
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                # Set memory growth - this is the most important setting
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                
                print(f"GPU configured: {len(gpus)} device(s) with memory growth enabled")
                return True
            except RuntimeError as e:
                print(f"GPU setup error: {e}")
                return False
        else:
            print("No GPUs found")
            return False
    
    # Configure CPU for parallel processing
    def configure_cpu():
        # Set CPU threads for TensorFlow
        tf.config.threading.set_intra_op_parallelism_threads(0)  # Use all available cores
        tf.config.threading.set_inter_op_parallelism_threads(0)  # Use all available cores
        
        # Get optimal number of CPU cores to use
        cpu_cores = mp.cpu_count()
        optimal_threads = max(1, cpu_cores - 2)  # Leave 2 cores for system
        
        print(f"CPU configured: {cpu_cores} cores available, using {optimal_threads} threads")
        return optimal_threads
    
    print("Initial resources:")
    monitor_resources()
    
    # Configure hardware
    gpu_available = configure_gpu()
    cpu_threads = configure_cpu()
    
    # Intelligent sample sizing based on available resources
    if gpu_available:
        max_background = 10   # Reduced for memory efficiency
        max_explain = 5       # Reduced for memory efficiency
        batch_size = 1        # Process one at a time
    else:
        max_background = 5    # Fewer samples for CPU-only
        max_explain = 3       # Fewer samples for CPU-only
        batch_size = 1        # Smaller batches for CPU-only
    
    # CPU-based data preprocessing (parallel)
    def preprocess_data_parallel(X_data, max_samples, name):
        print(f"Preprocessing {name} data using CPU...")
        
        if len(X_data) > max_samples:
            # Use CPU for random sampling
            indices = np.random.choice(len(X_data), max_samples, replace=False)
            result = X_data[indices]
        else:
            result = X_data[:max_samples]
        
        # CPU-based data type conversion
        result = result.astype(np.float32)
        
        return result
    
    # Process background and explain data in parallel using CPU
    with ThreadPoolExecutor(max_workers=2) as executor:
        bg_future = executor.submit(preprocess_data_parallel, X_sample, max_background, "background")
        exp_future = executor.submit(preprocess_data_parallel, X_sample, max_explain, "explain")
        
        background = bg_future.result()
        X_explain = exp_future.result()
    
    print(f"Background samples: {len(background)}, Explain samples: {len(X_explain)}")
    
    # ADD DOWNSAMPLING FUNCTION
    def downsample_for_shap(X_data, target_size=(128, 128)):
        """Downsample images for SHAP to reduce memory usage"""
        print(f"Downsampling from {X_data.shape[1:3]} to {target_size}")
        X_resized = tf.image.resize(X_data, target_size)
        return X_resized.numpy()
    
    # Apply downsampling before SHAP
    print("Downsampling images for SHAP...")
    original_shape = background.shape
    background = downsample_for_shap(background, (128, 128))
    X_explain = downsample_for_shap(X_explain, (128, 128))
    print(f"Downsampled from {original_shape[1:3]} to {background.shape[1:3]}")
    
    # CREATE WRAPPER MODEL FUNCTION
    def create_downsampled_model(original_model, original_size=(256, 256), target_size=(128, 128)):
        """Create a model that accepts downsampled inputs but uses original model"""
        
        # Create input layer for downsampled size
        downsampled_input = tf.keras.layers.Input(shape=(*target_size, featNo))
        
        # Upsample back to original size for the model
        upsampled = tf.image.resize(downsampled_input, original_size)
        
        # Pass through original model
        output = original_model(upsampled)
        
        # Create new model
        wrapper_model = tf.keras.Model(inputs=downsampled_input, outputs=output)
        return wrapper_model
    
    # Load model on GPU if available
    print("Loading model...")
    with tf.device('/GPU:0' if gpu_available else '/CPU:0'):
        original_model = model_implementation(featNo, architecture, final_activation)
        original_model.load_weights(weights_path)
        original_model.compile(optimizer='adam', loss=custom_loss_fn, metrics=[masked_rmse, masked_mae, masked_mse])
        
        # Create wrapper model for downsampled inputs
        model = create_downsampled_model(original_model, (256, 256), (128, 128))
        print("Created wrapper model for downsampled inputs")
    
    print("After model loading:")
    monitor_resources()
    
    # Create explainer with GPU/CPU optimization
    print("Creating SHAP GradientExplainer...")
    
    try:
        # GradientExplainer expects the model object, not a function
        explainer = shap.GradientExplainer(model, background)
        
        print("After explainer creation:")
        monitor_resources()
        
        # Optimized SHAP calculation using both GPU and CPU
        print("Calculating SHAP values...")
        all_shap_values = []
        
        # Process in optimal batches
        for i in range(0, len(X_explain), batch_size):
            batch_end = min(i + batch_size, len(X_explain))
            batch = X_explain[i:batch_end]
            
            print(f"Processing batch {i//batch_size + 1}/{(len(X_explain)-1)//batch_size + 1}")
            
            # Use GPU for SHAP calculation, CPU for pre/post processing
            try:
                with tf.device('/GPU:0' if gpu_available else '/CPU:0'):
                    shap_val = explainer.shap_values(batch, nsamples=10)  # Reduced nsamples
                
                if isinstance(shap_val, list):
                    shap_val = shap_val[0]
                
                all_shap_values.append(shap_val)
                
                # Monitor every few batches
                if i % 2 == 0:
                    monitor_resources()
                    
            except Exception as e:
                print(f"Error processing batch {i}: {e}")
                continue
        
        # CPU-based result combination
        if all_shap_values:
            print("Combining results using CPU...")
            shap_values = np.concatenate(all_shap_values, axis=0)
        else:
            raise Exception("No SHAP values calculated successfully")
    
    except Exception as e:
        print(f"Primary SHAP calculation failed: {e}")
        print("Trying fallback with minimal samples...")
        
        tiny_background = background[:2]  # Only 2 background samples
        tiny_explain = X_explain[:1]      # Only 1 explain sample
        
        with tf.device('/CPU:0'):  # Force CPU
            explainer = shap.GradientExplainer(model, tiny_background)
            shap_values = explainer.shap_values(tiny_explain, nsamples=5) 
        
        if isinstance(shap_values, list):
            shap_values = shap_values[0]
    
    print(f"Final SHAP values shape: {shap_values.shape}")
    print("After SHAP calculation:")
    monitor_resources()
    
    # CPU-based feature importance calculation (parallel)
    def calculate_feature_importance_parallel(shap_vals):
        print("Calculating feature importance using CPU...")
        
        if len(shap_vals.shape) == 4:  # (samples, height, width, features)
            # Use CPU for numerical computation
            importance = np.mean(np.abs(shap_vals), axis=(0, 1, 2))
        else:
            importance = np.mean(np.abs(shap_vals), axis=0)
        
        return importance
    
    # Calculate feature importance on CPU
    with ThreadPoolExecutor(max_workers=1) as executor:
        importance_future = executor.submit(calculate_feature_importance_parallel, shap_values)
        feature_importance = importance_future.result()
    
    # CPU-based results processing
    results = pd.DataFrame({
        'Feature': feature_names,
        'SHAP_Importance': feature_importance,
        'Normalized_Importance': feature_importance / np.max(feature_importance)
    }).sort_values('SHAP_Importance', ascending=False).reset_index(drop=True)
    
    results['Rank'] = range(1, len(results) + 1)
    
    print("\nFeature Importance Rankings (GPU+CPU Optimized):")
    print(results[['Rank', 'Feature', 'SHAP_Importance']].to_string(index=False))
    
    # Clean up GPU memory
    del original_model, model, explainer, shap_values
    if gpu_available:
        tf.keras.backend.clear_session()
    gc.collect()
    
    # CPU-based file I/O and plotting
    if output_dir is not None:
        print("Saving results using CPU...")
        
        os.makedirs(output_dir, exist_ok=True)
        
        # Save CSV using CPU
        csv_path = os.path.join(output_dir, 'feature_importance_gpu_cpu_optimized.csv')
        results.to_csv(csv_path, index=False)
        print(f"CSV saved: {csv_path}")
        
        # CPU-based plotting
        def create_plot():
            try:
                fig, ax = plt.subplots(1, 1, figsize=(12, 8))
                top_features = results.head(15)
                
                colors = plt.cm.viridis(top_features['Normalized_Importance'])
                bars = ax.barh(range(len(top_features)), top_features['SHAP_Importance'], color=colors)
                ax.set_yticks(range(len(top_features)))
                ax.set_yticklabels(top_features['Feature'])
                ax.set_xlabel('SHAP Importance (GPU+CPU Optimized)')
                ax.set_title('Top 15 SWE Feature Importance (GPU+CPU Optimized)')
                ax.invert_yaxis()
                
                for i, bar in enumerate(bars):
                    width = bar.get_width()
                    ax.text(width, bar.get_y() + bar.get_height()/2, 
                           f'{width:.3f}', ha='left', va='center', fontsize=8)
                
                plt.tight_layout()
                
                plot_path = os.path.join(output_dir, 'feature_importance_gpu_cpu_plot.png')
                plt.savefig(plot_path, dpi=300, bbox_inches='tight')
                print(f"Plot saved: {plot_path}")
                plt.show()
                plt.close()
                
            except Exception as e:
                print(f"Plotting failed: {e}")
        
        # Run plotting on CPU
        with ThreadPoolExecutor(max_workers=1) as executor:
            plot_future = executor.submit(create_plot)
            plot_future.result()
    
    print("Final resources:")
    monitor_resources()
    
    return results

In [None]:
# establish parameters
Domain = "Rockies"
basin_name = "Conejos"
WorkspaceBase = f"D:/ASOML/{Domain}/"
ModelOutputs = f"{WorkspaceBase}/modelOutputs/BasinSpecifics/"
time_code = "20250716_121023"
model_interation = f"{basin_name}_{time_code}"
feature_Listcsv = f"{ModelOutputs}/{Domain}_{basin_name}_model_featureList_summary.csv"
best_weights = ModelOutputs + f"/{model_interation}/best_weights_{model_interation}.h5"
start_year = 2022
end_year = 2024
shap_output = f"{ModelOutputs}/{model_interation}/shap_results/"
architecture = "AdvancedBaseline"
shapeChecks = "N"

# workspaces
phv_features = WorkspaceBase + "features/scaled/"
tree_workspace = WorkspaceBase + "treeCover/"
land_workspace = WorkspaceBase + "landCover/"
modelOuptuts = WorkspaceBase + "modelOutputs/"
DMFSCAWorkspace = WorkspaceBase + "Rockies_DMFSCA/"

nonFreezeLayers = -3
learningRateTesting = 1e-4
penalty_weight = 0.6
penalties_used = ["fSCA", "LowSnow"]
low_snow_weight=0.2    
swe_threshold=0.01
fsca_threshold=0.01
low_threshold=0.05
penalty_scale=2.0

## get list of features
feat_df = pd.read_csv(feature_Listcsv)
feat_names = feat_df[[f'{time_code}']].dropna().astype(str)
featNo = len(feat_df[[f'{time_code}']].dropna().astype(str))
feature_names = feat_names[f'{time_code}'].dropna().astype(str).tolist()
print(feature_names)

print(basin_name)
X_sample, y_sample, featureNames = target_feature_stacks_basins(start_year=start_year, 
                                                   end_year=end_year, 
                                                   WorkspaceBase=WorkspaceBase, 
                                                   ext = "nonull_fnl.tif", 
                                                   vegetation_path = tree_workspace, 
                                                   landCover_path = land_workspace, 
                                                   phv_path = phv_features, 
                                                   target_shape=(256,256),
                                                   basin_name=basin_name,
                                                   shapeChecks="Y")

print("Preprocessing data for memory efficiency...")
print(featureNames)
featNo = len(featureNames)

# Reduce sample size if too large
if len(X_sample) > 100:
    print(f"Reducing sample size from {len(X_sample)} to 100")
    sample_indices = np.random.choice(len(X_sample), 100, replace=False)
    X_sample = X_sample[sample_indices]
    y_sample = y_sample[sample_indices]

# Convert to float32 to save memory
X_sample = X_sample.astype(np.float32)
y_sample = y_sample.astype(np.float32)

print(f"Final sample shapes: X={X_sample.shape}, y={y_sample.shape}")
print(f"Memory usage: X={X_sample.nbytes/1024**3:.2f}GB, y={y_sample.nbytes/1024**3:.2f}GB")

# Force garbage collection
gc.collect()

# Create your loss function first
custom_loss_fn = make_combined_swe_fsca_lowsnow_loss(
                base_loss_fn=MeanSquaredError(),
                penalty_weight=penalty_weight,   
                low_snow_weight=low_snow_weight,            
                swe_threshold=swe_threshold,
                fsca_threshold=fsca_threshold,
                low_threshold=low_threshold,
                penalty_scale=penalty_scale,
                mask_value=-1
            )

# Run the optimized SHAP analysis
results = run_shap_optimized_gpu_cpu(weights_path=best_weights, 
                                     X_sample=X_sample, 
                                     feature_names=featureNames, 
                                     featNo=featNo, 
                                     architecture=architecture, 
                                     final_activation="relu", 
                                     custom_loss_fn=custom_loss_fn, 
                                     output_dir=shap_output)

print("\n=== SHAP Analysis Complete ===")
print(f"Results saved to: {shap_output}")
print(f"Top 3 most important features:")
for i in range(min(3, len(results))):
    print(f"{i+1}. {results.iloc[i]['Feature']}: {results.iloc[i]['SHAP_Importance']:.4f}")

['fSCA', 'DMFSCA', 'DOY', 'Tree Density', 'LandCover', 'ASOML_CON_waterbodies_1_0_60_albn83', 'ASOML_CON_windScour_60_albn83_scl', 'ASO_CON_aspect_albn83_60m_scl', 'ASO_CON_curv_albn83_60m_scl', 'ASO_CON_DaH_albn83_60m_scl', 'ASO_CON_dem_albn83_60m_scl', 'ASO_CON_GausCurv_albn83_60m_scl', 'ASO_CON_gradMag_60_albn83_scl', 'ASO_CON_lat_albn83_60m_scl', 'ASO_CON_lon_albn83_60m_scl', 'ASO_CON_PlanCurv_albn83_60m_scl', 'ASO_CON_ProCurv_albn83_60m_scl', 'ASO_CON_slope_albn83_60m_scl', 'ASO_CON_stdElv_60_albn83_scl', 'ASO_CON_STDslope_albn83_60m_scl', 'ASO_CON_TPI_albn83_60m_scl', 'ASO_CON_TRASP_albn83_60m_scl']
Conejos
Processing year 2022
Processing year 2023
Processing year 2024
Preprocessing data for memory efficiency...
['fSCA', 'DMFSCA', 'DOY', 'Tree Density', 'LandCover', 'ASOML_CON_waterbodies_1_0_60_albn83', 'ASOML_CON_windScour_60_albn83_scl', 'ASO_CON_aspect_albn83_60m_scl', 'ASO_CON_curv_albn83_60m_scl', 'ASO_CON_DaH_albn83_60m_scl', 'ASO_CON_dem_albn83_60m_scl', 'ASO_CON_GausCurv





In [None]:
import h5py

print("=== WEIGHTS FILE ANALYSIS ===")
with h5py.File(best_weights, 'r') as f:
    print(f"Weights file path: {best_weights}")
    print("Layer names in weights file:")
    layer_names = []
    for key in f.keys():
        print(f"  {key}")
        layer_names.append(key)
    print(f"Total layers in file: {len(layer_names)}")

In [None]:
print("\n=== CURRENT MODEL ANALYSIS ===")
model = model_implementation(featNo, architecture, final_activation)
print(f"Current model layers: {len(model.layers)}")
print("Current model layer names:")
for i, layer in enumerate(model.layers):
    print(f"  {i}: {layer.name} ({type(layer).__name__})")