In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:

# ==============================================================================
# Damage Cost Estimation Script for RescueNet (using Ground Truth Labels)
# Includes Pixel Count Validation Checks
# ==============================================================================
# Project: Semantic Segmentation on RescueNet Dataset with fine-tuned model
# Task: Damage Model Implementation (Task 2)
# Team: Bryant Sundell, Josiah Keime, Jonathan Hansen (Based on Proposal)
# Date: 2025-04-06 # Current date placeholder

import os
import cv2 as cv  # OpenCV for image loading
import numpy as np
import time # To time the processing

# ==============================================================================
# CONFIGURATION SECTION - EDIT VALUES HERE
# ==============================================================================

# --- Dataset Class Labels ---
# These should match the RescueNet dataset definitions
CLASS_LABELS = {
    'Background': 0,
    'Water': 1,
    'Building_No_Damage': 2,
    'Building_Minor_Damage': 3,
    'Building_Major_Damage': 4,
    'Building_Total_Destruction': 5,
    'Vehicle': 6,
    'Road-Clear': 7,
    'Road-Blocked': 8,
    'Tree': 9,
    'Pool': 10
}

# --- Cost Factors (per Square Meter) ---
# EDIT THESE VALUES TO REFINE COST ESTIMATION
# Assign a cost ($/sq meter) to each class label. Use 0.0 if no cost.
# These are initial ROUGH ESTIMATES.
COST_FACTORS_PER_SQ_M = {
    CLASS_LABELS['Background']: 0.0,
    CLASS_LABELS['Water']: 0.0,
    CLASS_LABELS['Building_No_Damage']: 0.0,         # No cost for undamaged buildings
    CLASS_LABELS['Building_Minor_Damage']: 50.0,    # Rough estimate for minor repairs
    CLASS_LABELS['Building_Major_Damage']: 300.0,   # Rough estimate for major repairs/partial rebuild
    CLASS_LABELS['Building_Total_Destruction']: 1200.0, # Rough estimate for demolition/rebuild
    CLASS_LABELS['Vehicle']: 0.0,                     # No cost assigned to vehicles themselves in this model
    CLASS_LABELS['Road-Clear']: 0.0,                 # No cost for clear roads
    CLASS_LABELS['Road-Blocked']: 20.0,             # Rough estimate for debris clearing/road access
    CLASS_LABELS['Tree']: 0.0,                        # No cost assigned to standing/fallen trees in this model
    CLASS_LABELS['Pool']: 0.0,                       # No cost assigned to pools
}

# --- Physical Area Assumption ---
# !! IMPORTANT: ASSUMPTION !! Define Ground Sample Distance (GSD) in meters per pixel.
# Try to find the actual GSD from RescueNet documentation if possible and update this value!
GSD_METERS_PER_PIXEL = 0.1 # Example assumption
AREA_PER_PIXEL_SQ_M = GSD_METERS_PER_PIXEL * GSD_METERS_PER_PIXEL

# --- Data Path ---
# Adjust this path to point to the directory containing the ground truth label images
# (e.g., test set, validation set, or training set labels)
LABEL_DATA_DIR = '/content/drive/MyDrive/comp-vision-project/data/RescueNet.zip_(Unzipped Files)/test/test-label-img/' # Example path

# ==============================================================================
# CORE CALCULATION FUNCTION (with Validation)
# ==============================================================================

def calculate_damage_cost_with_validation(label_image_path, class_labels, cost_factors, area_per_pixel):
    """
    Calculates estimated damage cost for a single label image, including pixel count validation.

    Args:
        label_image_path (str): Path to the grayscale label image file.
        class_labels (dict): Dictionary mapping class names to integer label values.
        cost_factors (dict): Dictionary mapping integer label values to cost/sq meter.
        area_per_pixel (float): Real-world area represented by one pixel (in sq meters).

    Returns:
        tuple: (total_cost, pixel_counts_dict)
               total_cost (float): Total estimated damage cost for the image.
               pixel_counts_dict (dict): Dictionary mapping class names to their pixel counts.
                                         Returns empty dict if image loading fails.
    """
    try:
        # Load the label image as grayscale (single channel) integers
        label_image = cv.imread(label_image_path, cv.IMREAD_GRAYSCALE)
        if label_image is None:
            print(f"Warning: Could not load image {label_image_path}. Skipping.")
            return 0.0, {}

        total_cost = 0.0
        pixel_counts_loop = {} # Store counts from our loop method
        total_pixels_counted_loop = 0 # Initialize counter for sum validation

        # Get image dimensions for validation
        img_height, img_width = label_image.shape
        total_image_pixels = img_height * img_width

        # --- Method 1: Count pixels per class using loop and np.sum ---
        for class_name, label_value in class_labels.items():
            # Create a boolean mask for pixels matching the current class label
            mask = (label_image == label_value)
            # Count the number of pixels for this class
            num_pixels = np.sum(mask)
            pixel_counts_loop[class_name] = num_pixels
            total_pixels_counted_loop += num_pixels # Add to validation sum

            # Get the cost factor for this class
            cost_per_sq_m = cost_factors.get(label_value, 0.0)

            # Calculate cost only if the cost factor is greater than zero
            if cost_per_sq_m > 0:
                total_area = num_pixels * area_per_pixel
                cost_for_class = total_area * cost_per_sq_m
                total_cost += cost_for_class
        # --- End Method 1 ---

        # === VALIDATION CHECKS ===

        # --- Validation 1: Total Pixel Sum Check ---
        if total_pixels_counted_loop != total_image_pixels:
            print(f"\n!!! WARNING: Pixel count SUM mismatch for {os.path.basename(label_image_path)} !!!")
            print(f"    Total pixels in image (H*W): {total_image_pixels}")
            print(f"    Sum of pixels counted across classes (Loop): {total_pixels_counted_loop}\n")
        # --- End Validation 1 ---

        # --- Validation 2: Compare Loop Counts with np.unique ---
        unique_labels, unique_counts_array = np.unique(label_image, return_counts=True)
        unique_counts_dict = dict(zip(unique_labels, unique_counts_array))
        mismatch_found_unique = False

        for class_name, label_value in class_labels.items():
            count_from_loop = pixel_counts_loop.get(class_name, 0)
            # Get count for this label value from np.unique results, default to 0 if label not present
            count_from_unique = unique_counts_dict.get(label_value, 0)

            if count_from_loop != count_from_unique:
                # Don't print warning if both are 0 (class not present, correctly counted by both)
                if count_from_loop != 0 or count_from_unique != 0:
                     print(f"\n!!! WARNING: np.unique validation mismatch for class '{class_name}' ({label_value}) in {os.path.basename(label_image_path)} !!!")
                     print(f"    Count from loop: {count_from_loop}")
                     print(f"    Count from np.unique: {count_from_unique}\n")
                     mismatch_found_unique = True

        # Optional: Confirmation message if all checks pass for an image
        # if total_pixels_counted_loop == total_image_pixels and not mismatch_found_unique:
        #      print(f"  Pixel counts validated for {os.path.basename(label_image_path)}")
        # --- End Validation 2 ---

        # === END VALIDATION CHECKS ===

        # Return the calculated cost and the counts obtained from our primary loop method
        return total_cost, pixel_counts_loop

    except Exception as e:
        print(f"Error processing image {label_image_path}: {e}")
        return 0.0, {}

# ==============================================================================
# MAIN PROCESSING SCRIPT
# ==============================================================================

if __name__ == "__main__":
    print("======================================================")
    print(" Starting Damage Cost Estimation Process ")
    print("======================================================")
    print("\n--- Configuration ---")
    print(f"  Label Directory: {LABEL_DATA_DIR}")
    print(f"  GSD Assumption (m/pixel): {GSD_METERS_PER_PIXEL}")
    print(f"  Area per Pixel (sq m): {AREA_PER_PIXEL_SQ_M:.4f}")
    print("  Cost Factors ($/sq m):")
    # Sort by label value for consistent order
    for label_val in sorted(COST_FACTORS_PER_SQ_M.keys()):
         cost = COST_FACTORS_PER_SQ_M[label_val]
         # Find class name corresponding to label value for printing
         class_name = [name for name, val in CLASS_LABELS.items() if val == label_val]
         if class_name:
             print(f"    - {class_name[0]} ({label_val}): ${cost:.2f}")
    print("-" * 30)

    start_time = time.time()

    # Check if the label directory exists
    if not os.path.isdir(LABEL_DATA_DIR):
        print(f"\nERROR: Label directory not found: {LABEL_DATA_DIR}")
        print("Please check the LABEL_DATA_DIR path in the configuration section.")

    else:
        # Get list of all image files (assuming .png, adjust if needed)
        try:
            all_label_files = [os.path.join(LABEL_DATA_DIR, f) for f in os.listdir(LABEL_DATA_DIR) if f.lower().endswith('.png')]
            print(f"\nFound {len(all_label_files)} label images to process.")
        except Exception as e:
            print(f"\nERROR: Cannot access or list files in directory: {LABEL_DATA_DIR}")
            print(f"  {e}")
            all_label_files = [] # Prevent further errors

        if not all_label_files:
             print("\nNo label files found or accessible to process.")

        else:
            total_dataset_cost = 0.0
            all_results = {} # Store results for all images: filename -> {total_cost, pixel_counts}
            images_with_warnings = 0

            print("\n--- Processing Images ---")
            # --- Process each image ---
            for i, label_file_path in enumerate(all_label_files):
                image_filename = os.path.basename(label_file_path)

                # Store previous warning count to detect new warnings for this image
                # Note: This simple check assumes warnings print distinct lines. More robust checks possible.
                initial_warning_state = print.warning_flag if hasattr(print, 'warning_flag') else 0 # Simple state check

                cost, counts = calculate_damage_cost_with_validation( # Use the function with validation
                    label_file_path,
                    CLASS_LABELS,
                    COST_FACTORS_PER_SQ_M,
                    AREA_PER_PIXEL_SQ_M
                )

                # Check if warnings were printed during the function call (simple check)
                # A more robust method might involve redirecting stdout/stderr or custom logging
                final_warning_state = print.warning_flag if hasattr(print, 'warning_flag') else 0
                if final_warning_state > initial_warning_state: # Basic check if warning was printed
                    images_with_warnings +=1

                # Store results even if warnings occurred
                if counts: # Only store if image was loaded successfully
                    all_results[image_filename] = {'total_cost': cost, 'pixel_counts': counts}
                    total_dataset_cost += cost

                # Optional: Print progress periodically
                if (i + 1) % 50 == 0 or (i + 1) == len(all_label_files):
                    print(f"  Processed {i + 1}/{len(all_label_files)} images...")

            print("\n--- Processing Complete ---")
            end_time = time.time()
            print(f"Total processing time: {end_time - start_time:.2f} seconds")
            if images_with_warnings > 0:
                 print(f"!!! Found pixel count validation warnings in {images_with_warnings} image(s). Please review logs above. !!!")
            print(f"Total estimated cost across {len(all_results)} successfully processed images: ${total_dataset_cost:,.2f}")

            # --- Display Sample Results ---
            if all_results:
                print("\nSample Results (first 5 images):")
                for i, (filename, data) in enumerate(all_results.items()):
                    if i >= 5:
                        break
                    # Format pixel counts for readability
                    counts_str_parts = []
                    for class_name in sorted(data['pixel_counts'].keys(), key=lambda name: CLASS_LABELS[name]): # Sort by label value
                         count = data['pixel_counts'][class_name]
                         if count > 0: # Only show classes present in the image
                             counts_str_parts.append(f"{class_name}: {count:,}") # Add comma formatting to counts
                    counts_str = "; ".join(counts_str_parts)

                    print(f"\n- {filename}:")
                    print(f"    Estimated Cost = ${data['total_cost']:,.2f}")
                    print(f"    Pixel Counts: {counts_str if counts_str else 'None found'}")
            else:
                 print("\nNo results to display.")


            # --- Optional: Save results to CSV ---
            # Uncomment the block below if you want to save all results to a CSV file.
            # Requires pandas: pip install pandas
            # ---
            # import pandas as pd
            # print("\n--- Saving Results to CSV ---")
            # results_list = []
            # for filename, data in all_results.items():
            #      row = {'filename': filename, 'total_cost': data['total_cost']}
            #      # Add pixel counts for each class as separate columns
            #      for class_name, label_value in CLASS_LABELS.items():
            #           row[f'pixels_{class_name}'] = data['pixel_counts'].get(class_name, 0)
            #      results_list.append(row)

            # if results_list:
            #     df = pd.DataFrame(results_list)
            #     # Define output path (adjust as needed)
            #     output_csv_path = '/content/drive/MyDrive/comp-vision-project/results/damage_cost_estimates_gt.csv'
            #     try:
            #         os.makedirs(os.path.dirname(output_csv_path), exist_ok=True) # Ensure directory exists
            #         df.to_csv(output_csv_path, index=False, float_format='%.2f')
            #         print(f"Results saved to: {output_csv_path}")
            #     except Exception as e:
            #          print(f"ERROR saving CSV to {output_csv_path}: {e}")
            # else:
            #      print("No results to save to CSV.")
            # ---

    print("\n======================================================")
    print(" Script Finished ")
    print("======================================================")


# %% [markdown]
# **How to Use:**
#
# 1.  **Configure:** Carefully edit the `CONFIGURATION SECTION`. Pay special attention to `COST_FACTORS_PER_SQ_M` (adjust your rough estimates), `GSD_METERS_PER_PIXEL` (find the real value if possible!), and `LABEL_DATA_DIR`.
# 2.  **Run:** Execute the entire script.
# 3.  **Check Output:**
#     * Look for any `!!! WARNING:` messages printed during processing. These indicate potential issues with the pixel counting for specific images, either the total sum doesn't match or the loop count differs from `np.unique`. Investigate these images if warnings appear.
#     * Review the summary statistics and the sample results to see if the costs seem plausible.
# 4.  **Save Results (Optional):** Uncomment the pandas CSV saving block at the end if you want a detailed file of costs and pixel counts per image.

# %% [markdown]
# **Explanation and How to Use:**
#
# 1.  **Configuration:** Edit the values in the `CONFIGURATION SECTION` block. This is where you set:
#     * `COST_FACTORS_PER_SQ_M`: Change the dollar values per square meter for any class label. Making a value `0.0` means that class won't contribute to the cost.
#     * `GSD_METERS_PER_PIXEL`: **Crucially, update this** if you find the correct Ground Sample Distance for RescueNet, or refine your assumption.
#     * `LABEL_DATA_DIR`: Make sure this points to the correct directory holding the `.png` ground truth label files you want to analyze (e.g., from the `test` split).
# 2.  **Run the Script:** Execute the script. It will:
#     * Print the configuration being used.
#     * Find all `.png` files in the specified directory.
#     * Call `calculate_damage_cost` for each image.
#     * The function loads the image, counts pixels for every class defined in `CLASS_LABELS`, calculates the cost for classes with a non-zero cost factor, and sums them up.
#     * Print the total estimated cost for all images processed.
#     * Print detailed cost and pixel counts for the first few images as a sample.
# 3.  **Output (Optional):** The script includes commented-out code using `pandas` to save the detailed results (filename, total cost, pixel counts per class) to a CSV file. You can uncomment this section if you want to save the output for further analysis in a spreadsheet or other tools. Make sure `pandas` is installed (`pip install pandas`).
# 4.  **Integration:** Later, you will adapt the part that gets the `label_image` inside the `calculate_damage_cost` function (or adapt the main loop) to use the output masks from your segmentation model instead of reading files from `LABEL_DATA_DIR`.

 Starting Damage Cost Estimation Process 

--- Configuration ---
  Label Directory: /content/drive/MyDrive/comp-vision-project/data/RescueNet.zip_(Unzipped Files)/test/test-label-img/
  GSD Assumption (m/pixel): 0.1
  Area per Pixel (sq m): 0.0100
  Cost Factors ($/sq m):
    - Background (0): $0.00
    - Water (1): $0.00
    - Building_No_Damage (2): $0.00
    - Building_Minor_Damage (3): $50.00
    - Building_Major_Damage (4): $300.00
    - Building_Total_Destruction (5): $1200.00
    - Vehicle (6): $0.00
    - Road-Clear (7): $0.00
    - Road-Blocked (8): $20.00
    - Tree (9): $0.00
    - Pool (10): $0.00
------------------------------

Found 450 label images to process.

--- Processing Images ---
  Processed 50/450 images...
  Processed 100/450 images...
  Processed 150/450 images...
  Processed 200/450 images...
  Processed 250/450 images...
  Processed 300/450 images...
  Processed 350/450 images...
  Processed 400/450 images...
  Processed 450/450 images...

--- Processing Comp