In [51]:
import cv2
from skimage import data, img_as_float
from skimage.metrics import structural_similarity as ssim
import glob
import os
import re
import pandas as pd
import numpy as np

In [52]:
def extract_number(filename):
    # Extract the first number found in the filename
    match = re.search(r'NDWI_Mask_(\d+)_', filename)
    if match:
        return int(match.group(1))
    return None

def extract_number_from_path(path):
    match = re.search(r'train_(\d+)_', path)
    return match.group(1) if match else None

In [53]:
def ssim_compare(img1_path, img2_path) :
    img1 = cv2.imread(img1_path, 0)
    img2 = cv2.imread(img2_path, 0)
    
    ssim_score, dif = ssim(img1, img2, full=True)
    return ssim_score

In [54]:
def checkAssertion(src_tif_files, target_tif_files):

    # Step 1: Assert that the two lists have the same size
    assert len(src_tif_files) == len(target_tif_files), "The two lists do not have the same size!"

    # Step 2: Compare the numbers at corresponding indices
    for i in range(len(src_tif_files)):
        num1 = extract_number(src_tif_files[i])
        num2 = extract_number(target_tif_files[i])
        
        assert num1 == num2, f"Number mismatch at index {i}: {num1} != {num2}"
    
    return True
    

In [55]:
parent_dir = './GEE_Masks/GEE_resized/train_gee'
dirs = [os.path.join(parent_dir, d) for d in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, d))]
print(dirs)

source_dir = './GEE_Masks/GEE_resized/train_gee'
src_tif_files = sorted(glob.glob(os.path.join(source_dir, '*_resized.tif')))

['./GEE_Masks/GEE_resized/train_gee\\train_12_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_15_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_17_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_20_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_25_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_2_gee_with_diff_kernels', './GEE_Masks/GEE_resized/train_gee\\train_30_gee_with_diff_kernels']


In [56]:
numbers = []
for i in range(len(src_tif_files)):
    numbers.append(extract_number(src_tif_files[i]))

dictionary = {
    "Image_Index":numbers
}

for target_dir in dirs:

    target_tif_files = sorted(glob.glob(os.path.join(target_dir, '*_resized_corrupt.tif')))
    checkAssertion(src_tif_files, target_tif_files)
    corruption = extract_number_from_path(target_dir)

    key = f"SSIM_{corruption}"

    ssims = []
    for i in range(len(src_tif_files)):
        ssims.append(ssim_compare(src_tif_files[i], target_tif_files[i]))

    dictionary[key] = ssims
    print(f"Done {corruption}%")

Done 12%
Done 15%
Done 17%
Done 20%
Done 25%
Done 2%
Done 30%


In [59]:
df = pd.DataFrame(dictionary)

ssim_cols = [col for col in df.columns if col.startswith('SSIM_')]

# Sort SSIM columns by the numeric part
sorted_ssim_cols = sorted(ssim_cols, key=lambda x: int(x.split('_')[1]))

# Reorder the DataFrame columns: keep Image_Index and add sorted SSIM columns
df = df[['Image_Index'] + sorted_ssim_cols]

df.to_csv("./SSIM_Analysis/Demo.csv", index=False)