# Baseline dice score

In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd


In [15]:
def dice_score(baseline_seg, gt_seg, target_class):
    """
    Compute the Dice score for a specific class.
    
    Parameters:
    - res_seg: np.array, predicted segmentation
    - gt_seg: np.array, ground truth segmentation
    - target_class: int, the class for which the Dice score is computed
    
    Returns:
    - dice: float, Dice score for the target class
    """
    # Create binary masks for the target class
    baseline_mask = (baseline_seg == target_class)
    gt_mask = (gt_seg == target_class)
    
    # Compute intersection and union
    intersection = np.sum(baseline_mask & gt_mask)
    total_pixels = np.sum(baseline_mask) + np.sum(gt_mask)
    
    # Compute Dice score
    if total_pixels == 0:  # Avoid division by zero
        print('it is zero')
        return 1.0 if np.sum(gt_mask) == 0 else 0.0
    
    dice = (2 * intersection) / total_pixels
    return dice


In [3]:
%%capture
# Import from different folder
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_preprocess_dir = os.path.join(parent_dir, "DataPreprocess")

sys.path.append(data_preprocess_dir)
from main_preprocess import load_nifti_convert_to_numpy

In [4]:
# Define paths
data_path = '/Users/bruger/Desktop/Bachelor/resampled_lung_pilot_data'
cropped_lung_ct_path = data_path + '/cropped_lungs_ct/*.nii.gz'
cropped_lung_ct_paths = glob.glob(cropped_lung_ct_path)

cropped_lung_gt_path = data_path + '/cropped_lungs_seg/*.nii.gz'
cropped_lung_gt_paths = glob.glob(cropped_lung_gt_path)

output_dir = data_path + "/numpy_files/"

In [5]:
# rows = []
# for path in cropped_lung_ct_paths:
#     arr = load_nifti_convert_to_numpy(input_path=path).flatten()
#     baseline_seg = np.where(
#     arr == -10000, 0,  # If the value is -10000, classify as 0
#     np.where(
#         (arr >= -720) & (arr <= -300), 2,  # If within the range [-720, -300], classify as 2
#         1  # Otherwise, classify as 1
#     )
# )
#     patient_id = os.path.basename(path)[7:10]

#     np.save(os.path.join(output_dir, f"patient_{patient_id}_attenuation.npy"), arr)
#     np.save(os.path.join(output_dir, f"patient_{patient_id}_baseline_seg.npy"), baseline_seg)

#     new_row = {
#         'attenuation': arr,
#         'res_seg': baseline_seg,
#         'patient': patient_id,
#         'label': 'w_ggo' if int(patient_id) < 14 else 'wo_ggo',
#     }
#     rows.append(new_row)

In [6]:
# gt_rows = []
# for path in cropped_lung_gt_paths:
#     patient_id = os.path.basename(path)[8:11]
#     output_file = os.path.join(output_dir, f"patient_{patient_id}_gt_seg.npy")
    
#     # Check if the output file already exists
#     if os.path.exists(output_file):
#         print(f"Output file for patient {patient_id} already exists. Skipping...")
#         continue
    
#     # Process the file and save it
#     gt_seg = load_nifti_convert_to_numpy(input_path=path).flatten()
#     np.save(output_file, gt_seg)
    
#     # Prepare the metadata
#     new_gt_row = {
#         'gt_seg': gt_seg,
#         'patient': patient_id,
#         'label': 'w_ggo' if int(patient_id) < 14 else 'wo_ggo',
#     }
#     print(patient_id)
#     gt_rows.append(new_gt_row)


In [7]:
# # Directory containing the numpy files
# data_dir = output_dir

# target_class = 2

# dice_score(baseline_seg=row['baseline_seg'], gt_seg=row['gt_seg'], target_class=target_class)

# # Initialize a dictionary to store data
# data = {'patient_id': [], 'baseline_seg': [], 'gt_seg': []}

# # Loop through all files in the directory
# for file in os.listdir(data_dir):
#     print('new')
#     if file.endswith('.npy'):
#         # Extract patient ID and type of file from the filename
#         parts = file.split('_')
#         patient_id = parts[1]  # Assuming format is 'patient_XXX_...'
#         file_type = parts[2].split('.')[0]  # Extract 'baseline', or 'gt_seg'
#         if file_type == 'attenuation':
#             continue

#         # Load the numpy file
#         file_path = os.path.join(data_dir, file)
#         data_array = np.load(file_path)

#         # Check if patient_id is already in data
#         if patient_id not in data['patient_id']:
#             data['patient_id'].append(patient_id)
#             data['baseline_seg'].append(None)
#             data['gt_seg'].append(None)

#         # Update the respective field based on the file type
#         idx = data['patient_id'].index(patient_id)
#         if file_type == 'baseline':
#             data['baseline_seg'][idx] = data_array
#         elif file_type == 'gt':
#             data['gt_seg'][idx] = data_array
#         print(patient_id, file_type)

# # Convert the dictionary to a pandas DataFrame
# df = pd.DataFrame(data).sort_values('patient_id')
# df = df.reset_index(drop=True)

# # Display the DataFrame
# print(df)


In [None]:
# Directory containing the numpy files
data_dir = output_dir

# Initialize a list to store results
dice_scores = []

# Loop through all patient IDs
processed_patients = set()

for file in os.listdir(data_dir):
    if not file.endswith('.npy'):
        continue

    # Extract patient ID and type of file from the filename
    parts = file.split('_')
    patient_id = parts[1]  # Assuming format is 'patient_XXX_...'
    file_type = parts[2].split('.')[0]  # Extract 'baseline', or 'gt_seg'

    # Skip if attenuation file
    if file_type == 'attenuation':
        continue

    # Load the current file
    file_path = os.path.join(data_dir, file)
    data_array = np.load(file_path)

    # Check if the patient has already been processed
    if patient_id in processed_patients:
        continue

    # Try to find the corresponding baseline and gt files
    baseline_file = f'patient_{patient_id}_baseline_seg.npy'
    gt_file = f'patient_{patient_id}_gt_seg.npy'

    baseline_path = os.path.join(data_dir, baseline_file)
    gt_path = os.path.join(data_dir, gt_file)

    # Check if both files exist
    if os.path.exists(baseline_path) and os.path.exists(gt_path):
        # Load both arrays
        baseline_seg = np.load(baseline_path)
        gt_seg = np.load(gt_path)

        # Calculate Dice score for the target class
        target_class = 2
        dice = dice_score(baseline_seg, gt_seg, target_class)

        # Append results to the list
        dice_scores.append({'patient_id': patient_id, 'dice_score': dice})

        print(f"Processed patient {patient_id} with Dice score: {dice}")
    else:
        print("Failed for patient {patient_id}")

    # Mark the patient as processed
    processed_patients.add(patient_id)
    print('\n')

In [18]:
# Convert results to a DataFrame
df_dice = pd.DataFrame(dice_scores).sort_values('patient_id')
df_dice = df_dice.reset_index(drop=True)

# Display the DataFrame
print(df_dice)

# Optionally, save results to a CSV file
df_dice.to_csv(os.path.join(data_dir, 'dice_scores.csv'), index=False)


   patient_id  dice_score
0         000         0.0
1         001         0.0
2         002         0.0
3         003         0.0
4         004         0.0
5         005         0.0
6         006         0.0
7         007         0.0
8         008         0.0
9         009         0.0
10        010         0.0
11        011         0.0
12        012         0.0
13        013         0.0
14        014         0.0
