# EXploration of features

In [2]:
import os
import glob
import nibabel as nib
import pandas as pd
from data_loader_folders import load_patient_files

In [3]:
def check_qsm_dimensions(root_dir, folder_type, output_csv_path):
    """
    Iterates over the patient folders in root_dir (using load_patient_files),
    loads the QSM image for each patient, and checks if any of its voxel spacings
    (x, y, z) are different than 1.
    
    For each dimension that is not equal to 1, a record is added with:
      - PatientID
      - FolderType (e.g., "baseline" or "follow_up")
      - Year (folder name)
      - Dimension (e.g., 'x', 'y', or 'z')
      - Spacing value
    
    The collected records are then saved to a CSV file at output_csv_path.
    
    Parameters
    ----------
    root_dir : str
        Path to the root folder (e.g., baseline or follow-up).
    folder_type : str
        Label indicating the folder type ("baseline" or "follow_up").
    output_csv_path : str
        File path for the output CSV.
    """
    # Call your existing function to get the patient records.
    records = load_patient_files(root_dir, folder_type)
    
    results = []
    # Define dimension labels for clarity.
    dim_labels = ['x', 'y', 'z']
    
    for rec in records:
        qsm_path = rec["QSM"]
        # Load the QSM image using nibabel.
        img = nib.load(qsm_path)
        # Get the voxel spacing for the first 3 dimensions.
        spacing = img.header.get_zooms()[:3]
        
        # Check each spatial dimension.
        for label, sp in zip(dim_labels, spacing):
            if sp != 1:
                results.append({
                    "PatientID": rec["PatientID"],
                    "FolderType": rec["FolderType"],
                    "Year": rec["Year"],
                    "Dimension": label,
                    "Spacing": sp
                })
    
    # Create a DataFrame from the results and write to CSV.
    df = pd.DataFrame(results)
    df.to_csv(output_csv_path, index=False)


In [4]:
baseline_dir = "/home/jbetancur/Desktop/codes/python_qsm/exploratory_pipeline/data_automatization/clean_baseline"
followup_dir = "/home/jbetancur/Desktop/codes/python_qsm/exploratory_pipeline/data_automatization/clean_follow_up"

check_qsm_dimensions(baseline_dir, "baseline", output_csv_path="/home/jbetancur/Desktop/codes/clustering/feature_extraction/output/baseline_spacing.csv")
check_qsm_dimensions(followup_dir, "followup", output_csv_path="/home/jbetancur/Desktop/codes/clustering/feature_extraction/output/follow_up_spacing.csv")

In [3]:
import pandas as pd

In [None]:
pyradiomics_features = pd.read_csv("feature_extraction/output/wavelet_pyradiomic_aggregated_lesion_features.csv")
pyradiomics_features.head()

  pyradiomics_features = pd.read_csv("/home/jbetancur/Desktop/codes/clustering/feature_extraction/output/pyradiomic_aggregated_lesion_features.csv")


Unnamed: 0,label_id,num_voxels,volume_physical,T1_original_shape_Elongation,T1_original_shape_Flatness,T1_original_shape_LeastAxisLength,T1_original_shape_MajorAxisLength,T1_original_shape_Maximum2DDiameterColumn,T1_original_shape_Maximum2DDiameterRow,T1_original_shape_Maximum2DDiameterSlice,...,QSM_original_ngtdm_Coarseness,QSM_original_ngtdm_Complexity,QSM_original_ngtdm_Contrast,QSM_original_ngtdm_Strength,PatientID,FolderType,YearFolder,T1_error,T2_error,QSM_error
0,1,17,16.999091,0.956726,0.5912,1.986722,3.360493,3.162125,3.162278,3.605492,...,0.894085,0.131584,0.006923,0.940793,14109676,baseline,2022,,,
1,2,2,1.999893,,,,,,,,...,,,,,14109676,baseline,2022,mask has too few dimensions (number of dimensi...,mask has too few dimensions (number of dimensi...,mask has too few dimensions (number of dimensi...
2,3,12,11.999358,0.612405,0.612405,2.0,3.265812,3.162125,2.236068,3.162125,...,1.235294,0.134921,0.011409,1.115942,14109676,baseline,2022,,,
3,4,45,44.997593,0.557967,0.0,0.0,10.303593,5.999679,9.0,10.2955,...,0.151588,0.293192,0.067895,0.145467,14109676,baseline,2022,,,
4,5,4,3.999786,0.999947,0.0,0.0,2.0,1.999893,2.0,2.236044,...,1.0,0.5,0.09375,1.0,14109676,baseline,2022,,,
