In [None]:
import sys
from pathlib import Path
import os 
import pandas as pd
ROOT = Path().resolve().parent.parent
sys.path.append(str(ROOT))

In [None]:
from src.preprocess.image_quality_metrics import ImageQualityMetrics

processed_folder = ROOT / 'data/processed'
image_folders = os.listdir(processed_folder)
image_folders.sort()

df = pd.DataFrame()
rows = []

for folder in image_folders:
    reference_path = processed_folder / folder / 'groundtruth.fits'

    sim_folders = [f for f in os.listdir(processed_folder / folder) if 'sim' in f]

    for sim_folder in sim_folders:
        sim_images = [f for f in os.listdir(processed_folder / folder / sim_folder) if 'tclean' in f]
        for rec in sim_images:
            rec_folder = processed_folder / folder / sim_folder / rec

            image_path    = rec_folder / f"{rec}.image"
            residual_path = rec_folder / f"{rec}.residual"

            iqm = ImageQualityMetrics(
                image_path=str(image_path), 
                residual_path=str(residual_path), 
                reference_path=str(reference_path)
            )
            snr = iqm.snr()
            ssim = iqm.ssim()
            psnr_no_reference = iqm.psnr_no_reference()
            psnr_reference = iqm.psnr_reference()
            peak = iqm.peak()
            rms = iqm.rms()

            data_map = {
                'object': folder,
                'sim': sim_folder,
                'reconstruction': rec,
                'snr': snr,
                'ssim': ssim,
                'psnr_no_reference': psnr_no_reference,
                'psnr_reference': psnr_reference,
                'peak': peak,
                'rms': rms
            }
            rows.append(data_map)


df = pd.DataFrame(rows)
df.to_csv(ROOT /'data' / 'image_quality_metrics.csv', index=False)

In [43]:
df = pd.read_csv(ROOT /'data' / 'image_quality_metrics.csv')
df

Unnamed: 0,object,sim,reconstruction,snr,ssim,psnr_no_reference,psnr_reference,peak,rms
0,img_0000,sim2,tclean_multiscale_2500,7.978179,0.382072,1.765622,23.657766,255.0,144.425051
1,img_0000,sim2,tclean_hogbom_40000,9.336039,0.379462,1.431027,24.745150,255.0,178.193650
2,img_0000,sim1,tclean_multiscale_2500,3.915633,0.457655,1.617618,11.070400,255.0,157.639236
3,img_0000,sim1,tclean_hogbom_40000,4.868770,0.489562,1.483328,13.613603,255.0,171.910701
4,img_0001,sim2,tclean_multiscale_2500,5.614248,0.320323,1.502934,23.148172,255.0,169.668138
...,...,...,...,...,...,...,...,...,...
1131,img_0282,sim1,tclean_hogbom_40000,4.770417,0.448883,1.697019,11.344150,255.0,150.263500
1132,img_0283,sim2,tclean_multiscale_2500,12.309434,0.555116,1.668059,25.023753,255.0,152.872315
1133,img_0283,sim2,tclean_hogbom_40000,16.121807,0.591912,1.966662,28.479432,255.0,129.661297
1134,img_0283,sim1,tclean_multiscale_2500,5.308037,0.349318,1.625265,13.827060,255.0,156.897536


In [54]:
def normalize_metrics(group):
    group = group.copy()
    
    score = (group['snr'] + group['psnr_reference'] + group['psnr_no_reference']) * group['ssim']
    min_val = score.min()
    max_val = score.max()

    group['label'] = (score - min_val) / (max_val - min_val)
    return group

df = df.groupby(['object', 'sim'], group_keys=False).apply(normalize_metrics)
df_model = df[['object', 'sim', 'reconstruction', 'label']]
df_model.to_csv(ROOT /'data' / 'image_quality_metrics_model.csv', index=False)

  df = df.groupby(['object', 'sim'], group_keys=False).apply(normalize_metrics)
