# Optional - Discover Files

Find Files to Add Together

In [None]:
import glob
import pandas as pd

def find_files(root_dir, target_str):
    """
    Return a list of files globbed using a root directory and a target string.

    Args:
        root_dir (str): The root directory to start the search.
        target_str (str): The target string to match in file names.

    Returns:
        list: A list of file paths matching the target string.
    
    # Example usage:
    root_directory = '/path/to/root/directory'
    target_string = 'example'

    files_matching_target = find_files(root_directory, target_string)
    print(files_matching_target)
    """
    search_pattern = f"{root_dir}/**/*{target_str}*"
    files = glob.glob(search_pattern, recursive=True)
    return files

def create_dataframe_and_save(paths, output_path):
    """
    Creates a pandas DataFrame from a list of file paths with a single column 'paths'.
    The DataFrame is saved to a specified output path without an index.

    Parameters:
    - paths (list): A list of file paths to include in the DataFrame.
    - output_path (str): The file path where the DataFrame should be saved as a CSV.

    Prints messages indicating the progress and completion of the DataFrame creation and saving process.
    Includes error handling for potential issues during the DataFrame creation and saving process.
    """
    try:
        # Creating the DataFrame
        df = pd.DataFrame(paths, columns=['paths'])
        print("DataFrame created successfully.")

        # Saving the DataFrame to the specified output path
        df.to_csv(output_path, index=False)
        print(f"DataFrame saved successfully to {output_path}.")
    except Exception as e:
        print(f"An error occurred: {e}")
    return df

Define the Root Directory and Target String of Files to Add

In [None]:
path = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-*/ses-01/thresholded_tissue_segment_z_scores'
target = '*grey_matter_generated*'

In [None]:
file_list = find_files(path, target)
file_list

Edit the Files Above to Select the Specific Files you Want to Combine

In [None]:
file_list = [
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-030/ses-01/thresholded_tissue_segment_z_scores/sub-0030_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-023/ses-01/thresholded_tissue_segment_z_scores/sub-0023_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-021/ses-01/thresholded_tissue_segment_z_scores/sub-0021_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-041/ses-01/thresholded_tissue_segment_z_scores/sub-0041_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-013/ses-01/thresholded_tissue_segment_z_scores/sub-0013_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-032/ses-01/thresholded_tissue_segment_z_scores/sub-0032_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-033/ses-01/thresholded_tissue_segment_z_scores/sub-0033_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-000/ses-01/thresholded_tissue_segment_z_scores/sub-0000_ct_generated_nifti.nii',
 '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-020/ses-01/thresholded_tissue_segment_z_scores/sub-0020_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-027/ses-01/thresholded_tissue_segment_z_scores/sub-0027_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-015/ses-01/thresholded_tissue_segment_z_scores/sub-0015_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-009/ses-01/thresholded_tissue_segment_z_scores/sub-0009_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-014/ses-01/thresholded_tissue_segment_z_scores/sub-0014_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-038/ses-01/thresholded_tissue_segment_z_scores/sub-0038_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-019/ses-01/thresholded_tissue_segment_z_scores/sub-0019_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-017/ses-01/thresholded_tissue_segment_z_scores/sub-0017_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-037/ses-01/thresholded_tissue_segment_z_scores/sub-0037_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-031/ses-01/thresholded_tissue_segment_z_scores/sub-0031_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-040/ses-01/thresholded_tissue_segment_z_scores/sub-0040_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-010/ses-01/thresholded_tissue_segment_z_scores/sub-0010_ct_generated_nifti.nii',
'/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/atrophy_seeds/sub-001/ses-01/thresholded_tissue_segment_z_scores/sub-0001_ct_generated_nifti.nii']

Define CSV Output Path

In [None]:
csv_path = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/combined_atrophy_seeds/combined_ctrl_seeds/ct_ctrl_seeds.csv'

Run it

In [None]:
df = create_dataframe_and_save(paths=file_list, output_path=csv_path)
df

# Import Files

To import from CSV:
- import_path = 'files_to_import.csv'
- file_column = 'Column with the paths in it'
- file_pattern = None

To import from Folder
- import_path = 'path/to/my/folder'
- file_column = None
- file_pattern = /*shared_naming_architecture*

In [1]:
path = '/Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/smoothed_atrophy_seeds'
file_column = None
file_pattern = '*/*/unthresholded_tissue_segment_z_scores/*'

In [2]:
from calvin_utils.file_utils.import_functions import GiiNiiFileImport
importer = GiiNiiFileImport(import_path=path, file_column=None, file_pattern=file_pattern)
matrix_df1 = importer.run()
matrix_df1

Attempting to import from: /Users/cu135/Dropbox (Partners HealthCare)/studies/atrophy_seeds_2023/shared_analysis/niftis_for_elmira/smoothed_atrophy_seeds/*/*/unthresholded_tissue_segment_z_scores/*


Unnamed: 0,sub-0015_white_matter_generated_nifti_no_nan_no_nan.nii,sub-0015_ct_generated_nifti.nii,sub-0015_ct_generated_nifti_no_nan.nii,sub-0015_grey_matter_generated_nifti_no_nan.nii,sub-0015_grey_matter_generated_nifti_no_nan_no_nan.nii,sub-0015_grey_matter_generated_nifti.nii,sub-0015_ct_generated_nifti_no_nan_no_nan_no_nan.nii,sub-0015_ct_generated_nifti_no_nan_no_nan.nii,sub-0015_white_matter_generated_nifti.nii,sub-0015_white_matter_generated_nifti_no_nan.nii,...,sub-0034_ct_generated_nifti_no_nan.nii,sub-0034_grey_matter_generated_nifti_no_nan.nii,sub-0033_white_matter_generated_nifti_no_nan.nii,sub-0033_cerebrospinal_fluid_generated_nifti_no_nan.nii,sub-0033_grey_matter_generated_nifti_no_nan.nii,sub-0033_white_matter_generated_nifti.nii,sub-0033_cerebrospinal_fluid_generated_nifti.nii,sub-0033_ct_generated_nifti.nii,sub-0033_ct_generated_nifti_no_nan.nii,sub-0033_grey_matter_generated_nifti.nii
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Identify Unstable Values (NAN and Inf)

In [3]:
import numpy as np
print(f"Identified {matrix_df1.isna().sum().sum()} NaNs across the dataframe.")
print(f"Identified {np.sum(np.sum(np.isinf(matrix_df1)))} infinite values across the dataframe.")

Identified 0 NaNs across the dataframe.
Identified 24362 infinite values across the dataframe.


# Remove Unstable Values
- nans will be set to 0. 
- posinf willl be set to a value of your choice. 
- neginf will be set to a a value of your choice.

In [4]:
posinf_val=20
neginf_val=-20

In [5]:
import numpy as np
from calvin_utils.nifti_utils.matrix_utilities import remove_unstable_values

data_df = remove_unstable_values(matrix_df1, posinf_val=posinf_val, neginf_val=neginf_val)
print(f"Identified {data_df.isna().sum().sum()} NaNs across the dataframe.")
print(f"Identified {np.sum(np.sum(np.isinf(data_df)))} infinite values across the dataframe.")


Identified 0 NaNs across the dataframe.
Identified 0 infinite values across the dataframe.


# Save Your Results
- this will just save the processed files back where they went.
- file_suffix will add the suffix to the new file. If no suffix, will overwrite original. 

In [6]:
importer.save_files(data_df, file_paths=importer.file_paths, file_suffix=None, dry_run=False)

Saving files: 406it [00:07, 52.87it/s]
