## Changing the structure of data
Changing the structure and substructure of my sorted data in cohorts. I want to get rid of all substructue and move all .nii and .json files into the first folder.

In [101]:
import os 
import re
import shutil
import itertools

In [102]:
# original cohorts' paths
ncPath = '../../NACC_data/sorted_cohorts/NC/'
mciPath = '../../NACC_data/sorted_cohorts/MCI/'
alzdPath = '../../NACC_data/sorted_cohorts/ALZD/'
transPath = '../../NACC_data/sorted_cohorts/TRANS/'

# path to new, to be simply structured cohorts (s = simple)
s_ncPath = '../../NACC_data/sorted_cohorts/NC_simple/'
s_mciPath = '../../NACC_data/sorted_cohorts/MCI_simple/'
s_alzdPath = '../../NACC_data/sorted_cohorts/ALZD_simple/'
s_transPath = '../../NACC_data/sorted_cohorts/TRANS_simple/'

In [103]:
# Convert the relative path to an absolute path
ncPath = os.path.abspath(ncPath)
mciPath = os.path.abspath(mciPath)
alzdPath = os.path.abspath(alzdPath)
transPath = os.path.abspath(transPath)

s_ncPath = os.path.abspath(s_ncPath)
s_mciPath = os.path.abspath(s_mciPath)
s_alzdPath = os.path.abspath(s_alzdPath)
s_transPath = os.path.abspath(s_transPath)

# Modify the absolute path for long path support on Windows
if os.name == 'nt':                     # Check if the operating system is Windows
    ncPath = '\\\\?\\' + ncPath
    mciPath = '\\\\?\\' + mciPath
    alzdPath = '\\\\?\\' + alzdPath
    transPath = '\\\\?\\' + transPath

    s_ncPath = '\\\\?\\' + s_ncPath
    s_mciPath = '\\\\?\\' + s_mciPath
    s_alzdPath = '\\\\?\\' + s_alzdPath
    s_transPath = '\\\\?\\' + s_transPath

Different possible patterns/naming conventions of my folders (look at "mri_data_sequencing.ipynb" for clarification).

In [104]:
pattern1 = r'^\d{4}_'
pattern2 = 'mri'
pattern3 = 'NACC'

In [105]:
# List to store the matching folders
matching_folders1 = []
matching_folders2 = []
matching_folders3 = []

In [106]:
# Iterate over the items in the directory
for item in os.listdir(alzdPath):
    
    item_path = os.path.join(alzdPath, item)
    # Check if the item is a folder and matches the pattern
    if os.path.isdir(item_path) and re.match(pattern1, item):
        matching_folders1.append(item_path)

    elif os.path.isdir(item_path) and re.match(pattern2, item):
        matching_folders2.append(item_path)

    elif os.path.isdir(item_path) and re.match(pattern3, item):
        matching_folders3.append(item_path)

Now we move the .nii and .json files. We cover all three different naming convention by separately iterating throught all three matching folders array. We take into account the substructure of the each tpye of folders.

In [107]:
# for folder in matching_folders1:

#     # getting only the name of the folder
#     folder_name = folder.replace('\\?\\c:\\Users\\Crt\\Desktop\\WIMR\\asymmetryAD\\NACC_data\\sorted_cohorts\\ALZD\\', '')
#     folder_name = folder_name.replace('\\', '')

#     # Get the path to the subfolder 
#     subfolder_path = os.path.join(folder, os.listdir(folder)[0])       

#     # copy and rename to original folder name
#     shutil.copytree(subfolder_path, s_alzdPath, dirs_exist_ok=True)
#     os.rename(s_alzdPath + '\\' + os.listdir(subfolder_path)[0], s_alzdPath + '\\' + folder_name)

Better way of doing this, all in one go.

In [108]:
nc_folders = []
mci_folders = []
alz_folders = []
trans_folders = []

In [110]:
for i, j, k, l in itertools.zip_longest(os.listdir(ncPath), os.listdir(mciPath), os.listdir(alzdPath), os.listdir(transPath)):

    if i is not None: 
        nc_folders.append(os.path.join(ncPath, i))

    if j is not None: 
        mci_folders.append(os.path.join(mciPath, j))

    if k is not None: 
        alz_folders.append(os.path.join(alzdPath, k))

    if l is not None: 
        trans_folders.append(os.path.join(transPath, l))

This function finds the .nii and .json files, no matter the structure and substructure of folder and copies them to a new location with the same parent folder names as before, without all the unneccessary substructure. 

In [112]:
def copy_deepest_files_to_new_location(source_folders, target_base_folder):
    # Traverse through all directories and subdirectories
    for folder in source_folders:
        # Extract the original folder name (top-level parent folder)
        parent_folder_name = os.path.basename(folder.rstrip(os.sep))

        # Create a new target folder based on the parent folder name
        target_folder = os.path.join(target_base_folder, parent_folder_name)
        os.makedirs(target_folder, exist_ok=True)

        # Walk through the folder structure to find all files at the deepest level
        for root, dirs, files in os.walk(folder):
            if files:  # If the current folder contains files
                for file in files:
                    file_path = os.path.join(root, file)
                    
                    # Copy the file to the target folder and rename if necessary to avoid duplicates
                    new_file_path = os.path.join(target_folder, file)
                    if os.path.exists(new_file_path):
                        base, extension = os.path.splitext(file)
                        new_file_path = os.path.join(target_folder, f"{base}_copy{extension}")
                    
                    shutil.copy2(file_path, new_file_path)  # Copy instead of move
                    print(f"Copied: {file_path} to {new_file_path}")

In [113]:
#copy_deepest_files_to_new_location(alz_folders, s_alzdPath)

Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\ALZD\1018_NACC862393_20161122ni\1018_NACC862393_20161122\MPRAGE_GRAPPA2_2_1312211075219452552016112212142985622332873000\1.3.12.2.1107.5.2.19.45255.2016112212142985622332873.0.0.0.json to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\ALZD_simple\1018_NACC862393_20161122ni\1.3.12.2.1107.5.2.19.45255.2016112212142985622332873.0.0.0.json
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\ALZD\1018_NACC862393_20161122ni\1018_NACC862393_20161122\MPRAGE_GRAPPA2_2_1312211075219452552016112212142985622332873000\1.3.12.2.1107.5.2.19.45255.2016112212142985622332873.0.0.0.nii to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\ALZD_simple\1018_NACC862393_20161122ni\1.3.12.2.1107.5.2.19.45255.2016112212142985622332873.0.0.0.nii
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\ALZD\1018_NACC862393_20191003ni\1018_NACC862393_20191003\MPRA

In [114]:
#copy_deepest_files_to_new_location(mci_folders, s_mciPath)

Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI\mri141ni\007_T1\007\1.2.840.113619.2.260.6945.2378838.4401.1271019391.431.json to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI_simple\mri141ni\1.2.840.113619.2.260.6945.2378838.4401.1271019391.431.json
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI\mri141ni\007_T1\007\1.2.840.113619.2.260.6945.2378838.4401.1271019391.431.nii to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI_simple\mri141ni\1.2.840.113619.2.260.6945.2378838.4401.1271019391.431.nii
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI\mri4012ni\004_T1_Volumetric\004\1.2.840.113619.2.374.6574342.5279736.19305.1448055864.69.json to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\MCI_simple\mri4012ni\1.2.840.113619.2.374.6574342.5279736.19305.1448055864.69.json
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\so

In [115]:
#copy_deepest_files_to_new_location(trans_folders, s_transPath)

Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\TRANS\1018_NACC356689_20171019ni\1018_NACC356689_20171019\MPRAGE_GRAPPA2_6_1312211075219452552017101913341047583564152000\1.3.12.2.1107.5.2.19.45255.2017101913341047583564152.0.0.0.json to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\TRANS_simple\1018_NACC356689_20171019ni\1.3.12.2.1107.5.2.19.45255.2017101913341047583564152.0.0.0.json
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\TRANS\1018_NACC356689_20171019ni\1018_NACC356689_20171019\MPRAGE_GRAPPA2_6_1312211075219452552017101913341047583564152000\1.3.12.2.1107.5.2.19.45255.2017101913341047583564152.0.0.0.nii to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\TRANS_simple\1018_NACC356689_20171019ni\1.3.12.2.1107.5.2.19.45255.2017101913341047583564152.0.0.0.nii
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\TRANS\1018_NACC356689_20201102ni\1018_NACC356689_20201102

In [116]:
#copy_deepest_files_to_new_location(nc_folders, s_ncPath)

Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\NC\1018_NACC282203_20170908ni\1018_NACC282203_20170908\MPRAGE_GRAPPA2_6_1312211075219452552017090814552268696468275000\1.3.12.2.1107.5.2.19.45255.2017090814552268696468275.0.0.0.json to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\NC_simple\1018_NACC282203_20170908ni\1.3.12.2.1107.5.2.19.45255.2017090814552268696468275.0.0.0.json
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\NC\1018_NACC282203_20170908ni\1018_NACC282203_20170908\MPRAGE_GRAPPA2_6_1312211075219452552017090814552268696468275000\1.3.12.2.1107.5.2.19.45255.2017090814552268696468275.0.0.0.nii to \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\NC_simple\1018_NACC282203_20170908ni\1.3.12.2.1107.5.2.19.45255.2017090814552268696468275.0.0.0.nii
Copied: \\?\c:\Users\Crt\Desktop\WIMR\asymmetryAD\NACC_data\sorted_cohorts\NC\1018_NACC282203_20201106ni\1018_NACC282203_20201106\MPRAGE_GRAPPA2