In [1]:
import os
import shutil
from pathlib import Path
import tarfile

In [4]:
def copy_gif_files_from_subdirs(main_directory, dest_directory):
    # Traverse the first level of subdirectories in the main directory
    for first_level_subdir in os.listdir(main_directory):
        first_level_subdir_path = os.path.join(main_directory, first_level_subdir)
        
        # Check if it's a directory (skip files in the main directory)
        if os.path.isdir(first_level_subdir_path):
            # Remove '_MR1' suffix from the folder name if it exists
            dest_folder_name = first_level_subdir
            
            # Create a corresponding directory in the destination directory
            subdir_gif_dest = os.path.join(dest_directory, dest_folder_name)
            if not os.path.exists(subdir_gif_dest):
                os.makedirs(subdir_gif_dest)  # Create destination directory for this subdirectory
            
            # Walk through the current first-level subdirectory and its subdirectories
            for root, dirs, files in os.walk(first_level_subdir_path):
                for file in files:
                    if file.endswith('.gif'):
                        src_path = os.path.join(root, file)  # Source .gif file
                        dest_path = os.path.join(subdir_gif_dest, file)  # Destination path for the .gif
                        shutil.copy2(src_path, dest_path)  # Copy the .gif file
                        print(f"Copied: {src_path} to {dest_path}")




In [5]:
def process_tarball(tarball_path, extract_temp_dir, destination_directory):
    # Unzip the tarball into a temporary directory
    with tarfile.open(tarball_path, 'r:gz') as tar:
        tar.extractall(path=extract_temp_dir)

    # Extracted folder is based on the "discX" part of the filename
    extracted_dir_name = os.path.basename(tarball_path).split('_')[-1].replace('.tar.gz', '')
    extracted_dir_path = os.path.join(extract_temp_dir, extracted_dir_name)

    # Traverse through the unzipped files and copy .gif files
    for root, dirs, files in os.walk(extracted_dir_path):
        for file in files:
            if file.endswith('.gif'):
                # Remove '_MR1' from the folder name when creating the destination directory
                relative_path = os.path.relpath(root, extract_temp_dir)
                dest_dir = os.path.join(destination_directory, relative_path)

                # Create the destination folder if it doesn't exist
                if not os.path.exists(dest_dir):
                    os.makedirs(dest_dir)

                # Copy the .gif file to the destination
                src_file = os.path.join(root, file)
                dest_file = os.path.join(dest_dir, file)
                shutil.copy2(src_file, dest_file)
                print(f"Copied {src_file} to {dest_file}")
    
    # Delete the unzipped directory after processing
    shutil.rmtree(extracted_dir_path)
    print(f"Deleted extracted files from {extracted_dir_path}")

def process_all_tarballs(source_directory, destination_directory):
    # Process each tarball one by one
    for tarball in os.listdir(source_directory):
        if tarball.endswith('.tar.gz'):
            tarball_path = os.path.join(source_directory, tarball)
            print(f"Processing {tarball_path}...")

            # Unzip the tarball into a temporary directory
            with tarfile.open(tarball_path, 'r:gz') as tar:
                tar.extractall(path=source_directory)

            # Extracted folder is based on the "discX" part of the filename
            extracted_dir_name = os.path.basename(tarball_path).split('_')[-1].replace('.tar.gz', '')
            disk_path = os.path.join(source_directory, extracted_dir_name)
            processed_path = os.path.join(destination_directory,extracted_dir_name)
            copy_gif_files_from_subdirs(disk_path, processed_path)

            # Delete the unzipped directory after processing
            shutil.rmtree(disk_path)
            print(f"Deleted extracted files from {disk_path}")

In [6]:
# Paths
source_directory = "/home/rohitb/projects/ml/Project/assets/OASIS-1/raw_data"
destination_directory = "/home/rohitb/projects/ml/Project/assets/OASIS-1/processed"

# Call the function to process all tarballs
process_all_tarballs(source_directory, destination_directory)

Processing /home/rohitb/projects/ml/Project/assets/OASIS-1/raw_data/oasis_cross-sectional_disc9.tar.gz...
Copied: /home/rohitb/projects/ml/Project/assets/OASIS-1/raw_data/disc9/OAS1_0348_MR1/PROCESSED/MPRAGE/T88_111/OAS1_0348_MR1_mpr_n4_anon_111_t88_gfc_sag_95.gif to /home/rohitb/projects/ml/Project/assets/OASIS-1/processed/disc9/OAS1_0348_MR1/OAS1_0348_MR1_mpr_n4_anon_111_t88_gfc_sag_95.gif
Copied: /home/rohitb/projects/ml/Project/assets/OASIS-1/raw_data/disc9/OAS1_0348_MR1/PROCESSED/MPRAGE/T88_111/OAS1_0348_MR1_mpr_n4_anon_111_t88_masked_gfc_tra_90.gif to /home/rohitb/projects/ml/Project/assets/OASIS-1/processed/disc9/OAS1_0348_MR1/OAS1_0348_MR1_mpr_n4_anon_111_t88_masked_gfc_tra_90.gif
Copied: /home/rohitb/projects/ml/Project/assets/OASIS-1/raw_data/disc9/OAS1_0348_MR1/PROCESSED/MPRAGE/T88_111/OAS1_0348_MR1_mpr_n4_anon_111_t88_gfc_cor_110.gif to /home/rohitb/projects/ml/Project/assets/OASIS-1/processed/disc9/OAS1_0348_MR1/OAS1_0348_MR1_mpr_n4_anon_111_t88_gfc_cor_110.gif
Copied: /ho