In [3]:
import pandas as pd
import numpy as np

def dice_coefficient(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
    '''
    Calculates the Dice Coefficient between two dataframes containing binary lesion masks.
    
    Parameters:
    -----------
    df1 : pd.DataFrame
        The first dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
        
    df2 : pd.DataFrame
        The second dataframe, where columns represent flattened nifti files and rows represent voxels.
        All values are zero, except for lesions which are binarized at 1.
    
    Returns:
    --------
    float
        The Dice Coefficient, a value between 0 and 1, where 1 represents a perfect overlap.
        
    '''
    # Check if in numpy array, and convert the dataframes to numpy arrays if required
    if isinstance(df1, np.ndarray):
        array1 = df1
    else:
        array1 = df1.to_numpy()
    if isinstance(df2, np.ndarray):
        array2 = df2
    else:
        array2 = df2.to_numpy()
    
    # Calculate the intersection of non-zero elements
    intersection = np.sum(np.logical_and(array1, array2))
    
    # Calculate the number of non-zero elements in each array
    num_elements_array1 = np.sum(np.count_nonzero(array1))
    num_elements_array2 = np.sum(np.count_nonzero(array2))
    
    # Calculate the Dice Coefficient
    dice_coefficient = (2 * intersection) / (num_elements_array1 + num_elements_array2)
    
    return dice_coefficient

Import Niftis to be Diced
- Make sure they have equivalent basenames, as these will be used to compare them. 

In [None]:
import_path_1 = 'data/path/root'
file_target_1 = '*target.nii'

import_path_2 = 'data/path/root'
file_target_2 = '*target.nii'

In [None]:
from calvin_utils.file_utils.import_functions import GiiNiiFileImport
giinii = GiiNiiFileImport(import_path=import_path_1, file_column=None, file_pattern=file_target_1)
df1 = giinii.run()

giinii2 = GiiNiiFileImport(import_path=import_path_2, file_column=None, file_pattern=file_target_2)
df2 = giinii.run()

Get Dice of Each Column Pair in 2 Dataframes, Assuming Equal Ordering

In [None]:
dice_coefficients_per_column = []

for col in df1.columns:
    dice_coeff = dice_coefficient(df1[col], df2[col])
    dice_coefficients_per_column.append(dice_coeff)

print('Dice coefficients per column:', dice_coefficients_per_column)

Save Dice Coefficients to a CSV

In [None]:
# Assuming df1 and df2 have the same columns and order
subject_ids = df1.columns

# Create a DataFrame to store the results
dice_coefficients_df = pd.DataFrame({
    'Subject_ID': subject_ids,
    'Dice_Coefficient': dice_coefficients_per_column
})

# Save the DataFrame to a CSV file
dice_coefficients_df.to_csv('dice_coefficients.csv', index=False)

# Permute Dice Coefficiencts

In [None]:
#Same, but with multiprocessing
import concurrent.futures
from calvin_utils.matrix_utilities import dice_coefficient

n_permutations = 1000
dice_coefficients = []
voxel_index = 0

with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    #Begin submitting the masked data to the permutor
    results = []
    for i in tqdm(range(n_permutations), desc="Jobs Launched"):
        permuted_df_1 = brain_permutation(df1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        permuted_df_2 = brain_permutation(df2.copy().to_numpy().reshape(1,-1), looped_permutation=True)
        
        result = executor.submit(dice_coefficient, permuted_df_1, permuted_df_2)
        results.append(result)
        
    progress_bar = tqdm(total=n_permutations, desc="Jobs Finalized")
    for result in concurrent.futures.as_completed(results):
        
        #Input the permuted data into the array
        permuted_dice_coefficient = result.result()
        dice_coefficients.append(permuted_dice_coefficient)
        
        #Update visualization
        progress_bar.update()
    progress_bar.close()
    
    # Permute the Dice Coefficient
from calvin_utils.permutation_analysis_utils.permutation_utils.palm import brain_permutation
from tqdm import tqdm 

# Assuming df_1 and df_2 are your original dataframes
n_permutations = 1000
dice_coefficients = []
voxel_index = 0
for i in tqdm(range(n_permutations)):
    # Permute dataframes
    permuted_df_1 = brain_permutation(df1.copy().to_numpy().reshape(1,-1), looped_permutation=True)
    permuted_df_2 = brain_permutation(df2.copy().to_numpy().reshape(1,-1), looped_permutation=True)

    # Threshold and calculate the Dice coefficient for the permuted dataframes
    permuted_dice_coefficient = dice_coefficient(permuted_df_1, permuted_df_2)

    # Store the Dice coefficient
    dice_coefficients.append(permuted_dice_coefficient)

# Convert the list to a numpy array
dice_coefficients = np.array(dice_coefficients)

In [None]:
print('empiric p: ', np.count_nonzero(dice_coefficients>observed_dice_coefficient))