# Baseline dice score

In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd


In [2]:
def dice_score(baseline_seg, gt_seg, target_class):
    """
    Compute the Dice score for a specific class.
    
    Parameters:
    - res_seg: np.array, predicted segmentation
    - gt_seg: np.array, ground truth segmentation
    - target_class: int, the class for which the Dice score is computed
    
    Returns:
    - dice: float, Dice score for the target class
    """
    # Create binary masks for the target class
    baseline_mask = (baseline_seg == target_class)
    gt_mask = (gt_seg == target_class)
    
    # Compute intersection and union
    intersection = np.sum(baseline_mask & gt_mask)
    total_pixels = np.sum(baseline_mask) + np.sum(gt_mask)
    
    # Compute Dice score
    if total_pixels == 0:  # Avoid division by zero
        return 1.0 if np.sum(gt_mask) == 0 else 0.0
    
    dice = (2 * intersection) / total_pixels
    return dice


In [3]:
%%capture
# Import from different folder
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_preprocess_dir = os.path.join(parent_dir, "DataPreprocess")

sys.path.append(data_preprocess_dir)
from main_preprocess import load_nifti_convert_to_numpy

In [4]:
# Define paths
data_path = '/Users/bruger/Desktop/Bachelor/resampled_lung_pilot_data'
cropped_lung_ct_path = data_path + '/cropped_lungs_ct/*.nii.gz'
cropped_lung_ct_paths = glob.glob(cropped_lung_ct_path)

cropped_lung_gt_path = data_path + '/cropped_lungs_seg/*.nii.gz'
cropped_lung_gt_paths = glob.glob(cropped_lung_gt_path)

output_dir = data_path + "/numpy_files/"

In [5]:
# rows = []
# for path in cropped_lung_ct_paths:
#     arr = load_nifti_convert_to_numpy(input_path=path).flatten()
#     baseline_seg = np.where(
#     arr == -10000, 0,  # If the value is -10000, classify as 0
#     np.where(
#         (arr >= -720) & (arr <= -300), 2,  # If within the range [-720, -300], classify as 2
#         1  # Otherwise, classify as 1
#     )
# )
#     patient_id = os.path.basename(path)[7:10]

#     np.save(os.path.join(output_dir, f"patient_{patient_id}_attenuation.npy"), arr)
#     np.save(os.path.join(output_dir, f"patient_{patient_id}_baseline_seg.npy"), baseline_seg)

#     new_row = {
#         'attenuation': arr,
#         'res_seg': baseline_seg,
#         'patient': patient_id,
#         'label': 'w_ggo' if int(patient_id) < 14 else 'wo_ggo',
#     }
#     rows.append(new_row)

In [None]:
# gt_rows = []
# for path in cropped_lung_gt_paths:
#     patient_id = os.path.basename(path)[8:11]
#     output_file = os.path.join(output_dir, f"patient_{patient_id}_gt_seg.npy")
    
#     # Check if the output file already exists
#     if os.path.exists(output_file):
#         print(f"Output file for patient {patient_id} already exists. Skipping...")
#         continue
    
#     # Process the file and save it
#     gt_seg = load_nifti_convert_to_numpy(input_path=path).flatten()
#     np.save(output_file, gt_seg)
    
#     # Prepare the metadata
#     new_gt_row = {
#         'gt_seg': gt_seg,
#         'patient': patient_id,
#         'label': 'w_ggo' if int(patient_id) < 14 else 'wo_ggo',
#     }
#     print(patient_id)
#     gt_rows.append(new_gt_row)


: 

In [None]:
# Directory containing the numpy files
data_dir = output_dir

# Initialize a dictionary to store data
data = {'patient_id': [], 'baseline_seg': [], 'gt_seg': []}

# Loop through all files in the directory
for file in os.listdir(data_dir):
    if file.endswith('.npy'):
        # Extract patient ID and type of file from the filename
        parts = file.split('_')
        patient_id = parts[1]  # Assuming format is 'patient_XXX_...'
        file_type = parts[2].split('.')[0]  # Extract 'baseline', or 'gt_seg'

        # Load the numpy file
        file_path = os.path.join(data_dir, file)
        data_array = np.load(file_path)

        # Check if patient_id is already in data
        if patient_id not in data['patient_id']:
            data['patient_id'].append(patient_id)
            data['baseline_seg'].append(None)
            data['gt_seg'].append(None)

        # Update the respective field based on the file type
        idx = data['patient_id'].index(patient_id)
        if file_type == 'baseline':
            data['baseline_seg'][idx] = data_array
        elif file_type == 'gt':
            data['gt_seg'][idx] = data_array
        print(patient_id, file_type)

# Convert the dictionary to a pandas DataFrame
df = pd.DataFrame(data).sort_values('patient_id')
df = df.reset_index(drop=True)

# Display the DataFrame
print(df)


024 gt
013 gt
014 attenuation
008 baseline
026 attenuation
022 baseline
002 gt
003 baseline
002 attenuation
007 baseline
005 attenuation
007 gt
021 attenuation
018 baseline
026 baseline
021 gt
013 attenuation
013 baseline
009 baseline
004 attenuation
020 attenuation
011 gt
026 gt
000 gt
012 attenuation
002 baseline
023 baseline
019 baseline
027 baseline
005 gt
019 gt
006 baseline
008 gt
012 baseline
027 attenuation
003 attenuation
023 gt
014 gt
022 attenuation
001 baseline
006 gt
020 baseline
006 attenuation
009 attenuation
020 gt
010 attenuation
025 gt
012 gt
011 baseline
001 attenuation
024 baseline
005 baseline
003 gt
025 attenuation
018 attenuation
004 gt
021 baseline
000 baseline
018 gt
014 baseline
000 attenuation
009 gt
024 attenuation
019 attenuation
022 gt
023 attenuation
010 baseline
007 attenuation


In [None]:
df

NameError: name 'df' is not defined

In [None]:
for base, gt in zip(df['baseline_seg'], df['gt_seg']):
    print(len(base), len(gt))

TypeError: object of type 'NoneType' has no len()

In [31]:
target_class = 2

dice_scores = df.apply(
    lambda row: dice_score(baseline_seg=row['baseline_seg'], gt_seg=row['gt_seg'], target_class=target_class), 
    axis=1
)
