# WallCrack Using CycleGAN

📩**Downloading dataset from Source**

In [6]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("arunrk7/surface-crack-detection")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\sreeh\.cache\kagglehub\datasets\arunrk7\surface-crack-detection\versions\1


In [8]:
import shutil
import random
from pathlib import Path

# --- 1. CONFIGURE YOUR FOLDERS AND SETTINGS HERE ---

# The folder where all your original images are currently located.
source_folder = Path("Dataset/") 

# The new folder where you want the 500 images to be copied.
destination_folder = Path("Dataset_500/")

# The number of images you want to select.
num_to_select = 500

# You can add other extensions if needed (e.g., ".gif")
image_extensions = [".jpg", ".jpeg", ".png"]


# --- 2. THE SCRIPT ---

try:
    # Create the destination folder if it doesn't exist
    destination_folder.mkdir(parents=True, exist_ok=True)

    print(f"Searching for images in: {source_folder}")

    # Find all files that match the image extensions
    all_images = [f for f in source_folder.glob("*") if f.suffix.lower() in image_extensions]

    if not all_images:
        print("Error: No images found in the source folder.")
    else:
        print(f"Found {len(all_images)} total images.")

        # Make sure we don't try to select more images than are available
        if len(all_images) < num_to_select:
            print(f"Warning: Only {len(all_images)} images available. Selecting all of them.")
            num_to_select = len(all_images)

        # Randomly select the specified number of images
        selected_images = random.sample(all_images, num_to_select)

        print(f"Copying {num_to_select} random images to: {destination_folder}")

        # Copy each selected file to the destination
        for image_path in selected_images:
            shutil.copy(image_path, destination_folder)

        print("\n✅ Done. All files copied successfully.")

except FileNotFoundError:
    print(f"Error: The source folder was not found at '{source_folder}'")
except Exception as e:
    print(f"An error occurred: {e}")

Searching for images in: Dataset
Found 20000 total images.
Copying 500 random images to: Dataset_500

✅ Done. All files copied successfully.


**Adding Blur**

In [14]:
from pathlib import Path
from PIL import Image, ImageFilter
from tqdm import tqdm

# --- 1. CONFIGURE YOUR SETTINGS HERE ---

# Folder with your 500 clean, original images.
source_folder = Path("Dataset_500/")

# Folder where the new blurred images will be saved (this will be part of trainA).
destination_folder = Path("trainA_blurred/")

# Adjust the blur intensity. Higher numbers mean more blur. Try values between 2 and 8.
blur_radius = 5

# --- 2. THE SCRIPT ---

try:
    # Create the destination folder if it doesn't exist
    destination_folder.mkdir(parents=True, exist_ok=True)

    # Get a list of all image files in the source folder
    image_files = list(source_folder.glob("*.jpg")) + \
                  list(source_folder.glob("*.jpeg")) + \
                  list(source_folder.glob("*.png"))

    if not image_files:
        print(f"Error: No images found in '{source_folder}'")
    else:
        print(f"Found {len(image_files)} images. Starting blur process...")

        # Loop through all images and show a progress bar
        for image_path in tqdm(image_files, desc="Applying blur"):
            try:
                # Open the image
                with Image.open(image_path) as img:
                    # Apply a Gaussian blur filter
                    blurred_img = img.filter(ImageFilter.GaussianBlur(radius=blur_radius))
                    
                    # Construct the new filename/path for the blurred image
                   
                    save_path = destination_folder / f"blur_{image_path.name}"
                    # Save the blurred image
                    blurred_img.save(save_path)
            except Exception as e:
                print(f"\nCould not process {image_path.name}: {e}")

        print(f"\n✅ Done. All blurred images saved to '{destination_folder}'.")

except FileNotFoundError:
    print(f"Error: Source folder not found at '{source_folder}'")

Found 500 images. Starting blur process...


Applying blur: 100%|██████████| 500/500 [00:00<00:00, 634.64it/s]


✅ Done. All blurred images saved to 'trainA_blurred'.





**Adding Shadow**

In [15]:
import cv2
import numpy as np
import random
from pathlib import Path
from tqdm import tqdm

# --- 1. CONFIGURE YOUR SETTINGS HERE ---

# Folder with your 500 clean, original images.
source_folder = Path("Dataset_500/")

# Folder where the new shadowed images will be saved.
destination_folder = Path("trainA_shadowed/")

# --- 2. THE SCRIPT ---

def add_shadow(image):
    """Adds a random shadow effect to an image."""
    h, w = image.shape[:2]
    
    # Create a completely black mask
    mask = np.zeros_like(image, dtype=np.float32)
    
    # --- Define a random polygon for the shadow ---
    # Choose a random corner to start the shadow from (0: top-left, 1: top-right)
    start_corner = random.choice([0, 1])
    
    if start_corner == 0:
        # Shadow from top-left
        points = np.array([
            [0, 0],
            [random.randint(w//2, w), 0],
            [random.randint(0, w//2), h]
        ], dtype=np.int32)
    else:
        # Shadow from top-right
        points = np.array([
            [w, 0],
            [random.randint(0, w//2), 0],
            [random.randint(w//2, w), h]
        ], dtype=np.int32)

    # Draw the white polygon on the black mask
    cv2.fillPoly(mask, [points], (1, 1, 1))
    
    # --- Make the shadow realistic ---
    # Add a strong blur to the mask to create soft edges
    # The kernel size must be odd
    blur_kernel_size = random.randrange(101, 251, 2)
    mask = cv2.GaussianBlur(mask, (blur_kernel_size, blur_kernel_size), 0)
    
    # Control the shadow's darkness (0.5=dark, 0.8=light)
    shadow_intensity = random.uniform(0.5, 0.8)
    
    # Blend the shadow mask with the original image
    # We convert image to float for multiplication, then back to uint8
    shadowed_image = image.astype(np.float32) * (1 - (mask * shadow_intensity))
    shadowed_image = np.clip(shadowed_image, 0, 255).astype(np.uint8)
    
    return shadowed_image

# --- Main script logic ---
try:
    destination_folder.mkdir(parents=True, exist_ok=True)
    
    image_files = list(source_folder.glob("*.jpg")) + \
                  list(source_folder.glob("*.jpeg")) + \
                  list(source_folder.glob("*.png"))

    if not image_files:
        print(f"Error: No images found in '{source_folder}'")
    else:
        print(f"Found {len(image_files)} images. Adding shadows...")

        for image_path in tqdm(image_files, desc="Adding shadows"):
            try:
                # Read image using OpenCV
                img = cv2.imread(str(image_path))
                if img is None:
                    print(f"\nCould not read {image_path.name}")
                    continue
                
                # Add the shadow
                shadowed_img = add_shadow(img)
                
                # Save the new image
                save_path = destination_folder / f"shadow_{image_path.name}"
                cv2.imwrite(str(save_path), shadowed_img)
            except Exception as e:
                print(f"\nCould not process {image_path.name}: {e}")
        
        print(f"\n✅ Done. All shadowed images saved to '{destination_folder}'.")

except FileNotFoundError:
    print(f"Error: Source folder not found at '{source_folder}'")

Found 500 images. Adding shadows...


Adding shadows: 100%|██████████| 500/500 [00:02<00:00, 225.72it/s]


✅ Done. All shadowed images saved to 'trainA_shadowed'.





**Add Both Blur And Shadow**

In [16]:
import cv2
import numpy as np
import random
from pathlib import Path
from tqdm import tqdm
import shutil

# --- 1. CONFIGURE YOUR SETTINGS HERE ---

# Folder with your 500 clean, original images.
source_folder = Path("Dataset_500/")

# Folder where the new combined-effect images will be saved.
destination_folder = Path("trainA_blur_and_shadow/")

# The kernel size for the blur. Must be a pair of odd numbers. (e.g., (9, 9))
blur_kernel_size = (9, 9)


# --- 2. HELPER FUNCTION (same as before) ---

def add_shadow(image):
    """Adds a random shadow effect to an image."""
    h, w = image.shape[:2]
    mask = np.zeros_like(image, dtype=np.float32)
    start_corner = random.choice([0, 1])
    
    if start_corner == 0: # Top-left
        points = np.array([[0, 0], [random.randint(w//2, w), 0], [random.randint(0, w//2), h]], dtype=np.int32)
    else: # Top-right
        points = np.array([[w, 0], [random.randint(0, w//2), 0], [random.randint(w//2, w), h]], dtype=np.int32)

    cv2.fillPoly(mask, [points], (1, 1, 1))
    blur_k = random.randrange(101, 251, 2)
    mask = cv2.GaussianBlur(mask, (blur_k, blur_k), 0)
    shadow_intensity = random.uniform(0.5, 0.8)
    shadowed_image = image.astype(np.float32) * (1 - (mask * shadow_intensity))
    return np.clip(shadowed_image, 0, 255).astype(np.uint8)


# --- 3. MAIN SCRIPT LOGIC ---

try:
    destination_folder.mkdir(parents=True, exist_ok=True)
    image_files = list(source_folder.glob("*.jpg")) + list(source_folder.glob("*.png"))

    if not image_files:
        print(f"Error: No images found in '{source_folder}'")
    else:
        print(f"Found {len(image_files)} images. Applying shadow and blur...")

        for image_path in tqdm(image_files, desc="Processing images"):
            try:
                img = cv2.imread(str(image_path))
                shadowed_img = add_shadow(img)
                final_img = cv2.GaussianBlur(shadowed_img, blur_kernel_size, 0)
                save_path = destination_folder / f"both_{image_path.name}"
                cv2.imwrite(str(save_path), final_img)
            except Exception as e:
                print(f"\nCould not process {image_path.name}: {e}")
        
        print(f"\n✅ Done. All images with shadow and blur saved to '{destination_folder}'.")

except FileNotFoundError:
    print(f"Error: Source folder not found at '{source_folder}'")

Found 500 images. Applying shadow and blur...


Processing images: 100%|██████████| 500/500 [00:02<00:00, 213.74it/s]


✅ Done. All images with shadow and blur saved to 'trainA_blur_and_shadow'.





Combining Dataset For Train A

In [17]:
import shutil
from pathlib import Path

# List of folders containing your augmented images
source_folders = [
    Path("trainA_blurred/"),
    Path("trainA_shadowed/"),
    Path("trainA_blur_and_shadow/")
]

# The final destination folder for your CycleGAN
final_trainA_folder = Path("trainA/")

# --- Script to combine folders ---
final_trainA_folder.mkdir(parents=True, exist_ok=True)
print(f"Assembling final 'trainA' folder...")

for folder in source_folders:
    print(f"- Copying files from {folder}...")
    if folder.is_dir():
        for file in folder.iterdir():
            shutil.copy(file, final_trainA_folder)
    else:
        print(f"  Warning: Folder not found.")

print("\n✅ All augmented images have been combined into 'trainA'.")
print("You are now ready to prepare your 'trainB' folder and start training!")

Assembling final 'trainA' folder...
- Copying files from trainA_blurred...
- Copying files from trainA_shadowed...
- Copying files from trainA_blur_and_shadow...

✅ All augmented images have been combined into 'trainA'.
You are now ready to prepare your 'trainB' folder and start training!


In [18]:
from pathlib import Path

# --- 1. Set the folder you want to count ---
folder_to_check = Path("trainA/")

# --- You can customize these if needed ---
image_extensions = [".jpg", ".jpeg", ".png"]

# --- 2. The script to count files ---
print(f"Checking image count in: {folder_to_check}\n")

if not folder_to_check.is_dir():
    print(f"Error: Directory not found at '{folder_to_check}'")
else:
    # Count all files with the specified image extensions
    count = sum(1 for f in folder_to_check.iterdir() if f.suffix.lower() in image_extensions)
    
    print(f"✅ The folder '{folder_to_check}' contains: {count} images.")

    # Check if the count matches the expected total
    if count == 1500:
        print("The count is exactly 1500. Your trainA dataset is ready!")
    else:
        print(f"Warning: The count is {count}, not 1500. You may want to re-run the final copy script.")

Checking image count in: trainA

✅ The folder 'trainA' contains: 1500 images.
The count is exactly 1500. Your trainA dataset is ready!


In [19]:
import shutil
import random
from pathlib import Path

# --- Configuration ---
source_folder = Path("Dataset_500/")
train_b_folder = Path("trainB/")
test_b_folder = Path("testB/")
train_split_ratio = 0.8  # 80% for training, 20% for testing

# --- Script ---
try:
    # Create destination folders
    train_b_folder.mkdir(parents=True, exist_ok=True)
    test_b_folder.mkdir(parents=True, exist_ok=True)

    # Get all image files
    image_files = list(source_folder.glob("*.jpg")) + list(source_folder.glob("*.png"))
    random.shuffle(image_files) # Shuffle for a random split

    # Determine the split index
    split_index = int(len(image_files) * train_split_ratio)

    # Split the list into training and testing sets
    train_files = image_files[:split_index]
    test_files = image_files[split_index:]

    print(f"Splitting {len(image_files)} images into:")
    print(f"- {len(train_files)} for trainB")
    print(f"- {len(test_files)} for testB")

    # Copy files
    for f in train_files:
        shutil.copy(f, train_b_folder)
    for f in test_files:
        shutil.copy(f, test_b_folder)

    print("\n✅ Done. trainB and testB folders created successfully.")

except Exception as e:
    print(f"An error occurred: {e}")

Splitting 500 images into:
- 400 for trainB
- 100 for testB

✅ Done. trainB and testB folders created successfully.


In [20]:
import cv2
import numpy as np
import random
from pathlib import Path
from tqdm import tqdm

# --- Configuration ---
source_folder = Path("testB/") # Source is the clean test images
destination_folder = Path("testA/") # Destination for augmented test images
blur_kernel_size = (9, 9)

# --- Helper function for shadows ---
def add_shadow(image):
    h, w = image.shape[:2]
    mask = np.zeros_like(image, dtype=np.float32)
    start_corner = random.choice([0, 1])
    if start_corner == 0:
        points = np.array([[0, 0], [random.randint(w//2, w), 0], [random.randint(0, w//2), h]], dtype=np.int32)
    else:
        points = np.array([[w, 0], [random.randint(0, w//2), 0], [random.randint(w//2, w), h]], dtype=np.int32)
    cv2.fillPoly(mask, [points], (1, 1, 1))
    blur_k = random.randrange(101, 251, 2)
    mask = cv2.GaussianBlur(mask, (blur_k, blur_k), 0)
    shadow_intensity = random.uniform(0.5, 0.8)
    return np.clip(image.astype(np.float32) * (1 - (mask * shadow_intensity)), 0, 255).astype(np.uint8)

# --- Main Script ---
try:
    destination_folder.mkdir(parents=True, exist_ok=True)
    image_files = list(source_folder.glob("*.jpg")) + list(source_folder.glob("*.png"))

    print(f"Generating augmented test set 'testA' from {len(image_files)} images...")

    for image_path in tqdm(image_files, desc="Creating testA"):
        img = cv2.imread(str(image_path))
        
        # 1. Create and save blurred version
        blurred_img = cv2.GaussianBlur(img, blur_kernel_size, 0)
        cv2.imwrite(str(destination_folder / f"blur_{image_path.name}"), blurred_img)

        # 2. Create and save shadowed version
        shadowed_img = add_shadow(img)
        cv2.imwrite(str(destination_folder / f"shadow_{image_path.name}"), shadowed_img)
        
        # 3. Create and save combined version (shadow then blur)
        final_img = cv2.GaussianBlur(shadowed_img, blur_kernel_size, 0)
        cv2.imwrite(str(destination_folder / f"both_{image_path.name}"), final_img)
        
    print(f"\n✅ Done. 'testA' created with {len(list(destination_folder.glob('*')))} images.")

except Exception as e:
    print(f"An error occurred: {e}")

Generating augmented test set 'testA' from 100 images...


Creating testA: 100%|██████████| 100/100 [00:00<00:00, 121.51it/s]


✅ Done. 'testA' created with 300 images.





In [3]:
import cv2
import numpy as np
from pathlib import Path
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm

# --- 1. CONFIGURE YOUR FOLDERS HERE ---

# Path to your original clean test images (the ground truth)
ground_truth_folder = Path("D:/Machine Learning/WallCrack_CG/Data/testB/")

# Path to the images generated by the test.py script
predicted_folder = Path(r"D:\Machine Learning\WallCrack_CG\pytorch-CycleGAN-and-pix2pix\results\crack_wall_cleanup_model\test_50\images")

# --- 2. THE SCRIPT (FINAL VERSION) ---
psnr_scores = []
ssim_scores = []

# Get the list of original clean images
ground_truth_files = sorted(list(ground_truth_folder.glob("*.jpg"))) + \
                     sorted(list(ground_truth_folder.glob("*.png")))

print(f"Comparing {len(ground_truth_files)} ground truth images...")

for gt_path in tqdm(ground_truth_files, desc="Calculating Metrics"):
    base_name = gt_path.stem
    
    # --- THIS IS THE CORRECTED LINE ---
    predicted_path = next(predicted_folder.glob(f"*_{base_name}_fake.png"), None)

    if predicted_path is None:
        print(f"\nWarning: No corresponding prediction found for {gt_path.name}")
        continue

    try:
        # Load images in grayscale for standard calculation
        img_gt = cv2.imread(str(gt_path), cv2.IMREAD_GRAYSCALE)
        img_pred = cv2.imread(str(predicted_path), cv2.IMREAD_GRAYSCALE)

        # Ensure images are the same size
        if img_gt.shape != img_pred.shape:
            h, w = img_gt.shape
            img_pred = cv2.resize(img_pred, (w, h))

        # Calculate metrics
        psnr_scores.append(psnr(img_gt, img_pred, data_range=255))
        ssim_scores.append(ssim(img_gt, img_pred, data_range=255))

    except Exception as e:
        print(f"\nError processing {gt_path.name}: {e}")

# Calculate and print the average scores
if psnr_scores and ssim_scores:
    avg_psnr = np.mean(psnr_scores)
    avg_ssim = np.mean(ssim_scores)

    print("\n--- Evaluation Results ---")
    print(f"✅ Average PSNR: {avg_psnr:.2f} dB")
    print(f"✅ Average SSIM: {avg_ssim:.4f}")
else:
    print("\nNo images were successfully processed. Please check folder paths and filenames.")

Comparing 100 ground truth images...


Calculating Metrics: 100%|██████████| 100/100 [00:01<00:00, 83.18it/s]


--- Evaluation Results ---
✅ Average PSNR: 25.11 dB
✅ Average SSIM: 0.8903





In [4]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

def measure_crack_width(image_path):
    """Measures the average width of a crack in an image."""
    if not Path(image_path).exists():
        return 0
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    threshold_value = 100
    _, mask = cv2.threshold(image, threshold_value, 255, cv2.THRESH_BINARY_INV)
    kernel = np.ones((3, 3), np.uint8)
    cleaned_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=2)
    dist_transform = cv2.distanceTransform(cleaned_mask, cv2.DIST_L2, 5)
    crack_pixels = dist_transform[cleaned_mask > 0]
    if len(crack_pixels) == 0:
        return 0
    return np.mean(crack_pixels) * 2

# --- 1. CONFIGURE YOUR FOLDER PATHS ---
test_b_folder = Path("D:/Machine Learning/WallCrack_CG/Data/testB/")
test_a_folder = Path("D:/Machine Learning/WallCrack_CG/Data/testA/")
# --- IMPORTANT: Point this to your final epoch 50 results ---
results_folder = Path(r"D:\Machine Learning\WallCrack_CG\pytorch-CycleGAN-and-pix2pix\results\crack_wall_cleanup_model\test_50\images")

# --- 2. THE AUTOMATED SCRIPT ---
results_data = []
clean_files = sorted(list(test_b_folder.glob("*.jpg"))) + sorted(list(test_b_folder.glob("*.png")))

for clean_path in tqdm(clean_files, desc="Processing all test images"):
    base_name = clean_path.stem
    
    paths = {
        'clean': clean_path,
        'blur': test_a_folder / f"blur_{base_name}.jpg",
        'shadow': test_a_folder / f"shadow_{base_name}.jpg",
        'both': test_a_folder / f"both_{base_name}.jpg",
        'restored_blur': results_folder / f"blur_{base_name}_fake.png",
        'restored_shadow': results_folder / f"shadow_{base_name}_fake.png",
        'restored_both': results_folder / f"both_{base_name}_fake.png"
    }
    
    widths = {key: measure_crack_width(path) for key, path in paths.items()}
    widths['filename'] = base_name
    results_data.append(widths)

# --- 3. CREATE AND SAVE THE REPORT ---
df = pd.DataFrame(results_data)
df = df[['filename', 'clean', 'blur', 'restored_blur', 'shadow', 'restored_shadow', 'both', 'restored_both']]

print("\n--- Crack Width Analysis (in pixels) ---")
print(df.head())

output_csv_path = "crack_width_analysis_epoch50.csv"
df.to_csv(output_csv_path, index=False)

print(f"\n✅ Full analysis saved to '{output_csv_path}'")

Processing all test images: 100%|██████████| 100/100 [00:01<00:00, 57.97it/s]



--- Crack Width Analysis (in pixels) ---
  filename      clean       blur  restored_blur     shadow  restored_shadow  \
0    00343  10.117659  10.914913      10.288900  33.172344        11.366459   
1    00741   3.835974   3.769678       3.878775  68.316444         6.237106   
2    00786   6.742176   6.834746       7.202435  54.824524         7.287296   
3    01420  15.785818  16.339111      15.449415  51.647896        17.587482   
4    01474  11.130548  12.265622      10.631347  18.486263        11.060898   

        both  restored_both  
0  35.023777      11.349206  
1  77.179230      10.466269  
2  57.933693       7.345812  
3  52.986858      17.953011  
4  26.166599      11.222775  

✅ Full analysis saved to 'crack_width_analysis_epoch50.csv'


In [6]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import shutil
import pandas as pd
from pathlib import Path
from skimage.morphology import skeletonize
from scipy.ndimage import distance_transform_edt
from zipfile import ZipFile
from tqdm import tqdm

# ===============================================================
# ===== 1. PASTE ALL OF YOUR PROFESSOR'S FUNCTIONS HERE =========
# ===============================================================

def classify_crack_width(width_mm):
    if width_mm <= 1.0:
        return "Light Crack"
    elif width_mm <= 2.0:
        return "Medium Crack"
    else:
        return "Heavy Crack"

def crack_width_measure(binary_image, pixel_size_mm=0.1, display_results=False):
    if binary_image.dtype not in [np.uint8, np.float32, np.float64]:
        binary_image = binary_image.astype(np.uint8)

    if np.sum(binary_image) == 0:
        return np.array([0]), 0

    skeleton = skeletonize(binary_image > 0)
    distance = distance_transform_edt(binary_image)
    crack_widths_pixels = distance[skeleton]
    if len(crack_widths_pixels) == 0:
        return np.array([0]), 0
        
    crack_widths_pixels = crack_widths_pixels * 2
    crack_widths_mm = crack_widths_pixels * pixel_size_mm
    
    if display_results:
        plt.figure(figsize=(6, 6))
        plt.imshow(binary_image, cmap='gray')
        y, x = np.where(skeleton)
        sc = plt.scatter(x, y, c=crack_widths_mm, cmap='jet', s=10)
        plt.colorbar(sc, label='Crack Width (mm)')
        plt.title('Crack Width Measurement (mm)')
        plt.axis('off')
        plt.show()

    return crack_widths_mm, np.max(crack_widths_mm)

def analyse_crack_image(image_path, pixel_size_mm=0.1, display_results=False):
    img = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    if img is None:
        return np.array([0]), 0, "Error"
    _, binary_image = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
    crack_widths_mm, max_crack_width_mm = crack_width_measure(binary_image, pixel_size_mm, display_results)
    crack_classification = classify_crack_width(max_crack_width_mm)
    return crack_widths_mm, max_crack_width_mm, crack_classification

def analyse_folder(folder_path, output_folder, pixel_size_mm=0.1, display_results=False):
    os.makedirs(output_folder, exist_ok=True)
    results = {}
    
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for filename in tqdm(image_files, desc=f"Analyzing {Path(folder_path).name}"):
        image_path = os.path.join(folder_path, filename)
        _, max_width, classification = analyse_crack_image(image_path, pixel_size_mm, display_results)
        results[filename] = (max_width, classification)
    return results, None # Simplified to not create folders/zip

# ===============================================================
# =========== 2. DEFINE YOUR FOLDERS AND RUN ANALYSIS ===========
# ===============================================================

# --- Define Your Folders ---
folder_clean = Path("D:/Machine Learning/WallCrack_CG/Data/testB/")
folder_augmented = Path("D:/Machine Learning/WallCrack_CG/Data/testA/")
folder_restored = Path(r"D:\Machine Learning\WallCrack_CG\pytorch-CycleGAN-and-pix2pix\results\crack_wall_cleanup_model\test_50\images")
output_base = Path("./crack_analysis_results")

# --- Run Analysis on Each Folder ---
results_clean, _ = analyse_folder(str(folder_clean), str(output_base / "clean"))
results_augmented, _ = analyse_folder(str(folder_augmented), str(output_base / "augmented"))
results_restored, _ = analyse_folder(str(folder_restored), str(output_base / "restored"))

# --- 3. COMBINE AND COMPARE THE RESULTS ---
comparison_data = []

# Loop through the clean images as the ground truth
for clean_filename, (clean_width, clean_class) in results_clean.items():
    base_name = Path(clean_filename).stem

    for prefix in ['blur', 'shadow', 'both']:
        aug_filename = f"{prefix}_{base_name}.jpg"
        res_filename = f"{prefix}_{base_name}_fake.png"
        
        aug_width, aug_class = results_augmented.get(aug_filename, (None, "N/A"))
        res_width, res_class = results_restored.get(res_filename, (None, "N/A"))
        
        comparison_data.append({
            'base_filename': base_name,
            'augmentation_type': prefix,
            'clean_width_mm': clean_width,
            'clean_class': clean_class,
            'augmented_width_mm': aug_width,
            'augmented_class': aug_class,
            'restored_width_mm': res_width,
            'restored_class': res_class
        })

# Create a final DataFrame
df_comparison = pd.DataFrame(comparison_data)

print("\n--- Final Comparison Report ---")
print(df_comparison.head(6))

# Save the final report to a CSV
df_comparison.to_csv("final_crack_width_comparison.csv", index=False)
print("\n✅ Final comparison report saved to 'final_crack_width_comparison.csv'")

Analyzing testB: 100%|██████████| 100/100 [00:00<00:00, 369.22it/s]
Analyzing testA: 100%|██████████| 300/300 [00:01<00:00, 228.89it/s]
Analyzing images: 100%|██████████| 600/600 [00:06<00:00, 99.20it/s] 


--- Final Comparison Report ---
  base_filename augmentation_type  clean_width_mm   clean_class  \
0         00343              blur        2.800000   Heavy Crack   
1         00343            shadow        2.800000   Heavy Crack   
2         00343              both        2.800000   Heavy Crack   
3         00741              blur        1.216553  Medium Crack   
4         00741            shadow        1.216553  Medium Crack   
5         00741              both        1.216553  Medium Crack   

   augmented_width_mm augmented_class  restored_width_mm restored_class  
0            2.973214     Heavy Crack           2.952965    Heavy Crack  
1           11.014536     Heavy Crack           3.124100    Heavy Crack  
2           11.200000     Heavy Crack           3.104835    Heavy Crack  
3            1.264911    Medium Crack           1.166190   Medium Crack  
4           11.343721     Heavy Crack           5.099020    Heavy Crack  
5           13.441726     Heavy Crack           6.794


