In [2]:
import os

import numpy as np
import pydicom
from collections import defaultdict
import pandas as pd

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"


In [3]:
def load_pet_series(modality_path):
    series_dict = defaultdict(list)
    metadata_dict = {}

    if not os.path.exists(modality_path):
        print(f"❌ ERROR: Folder '{modality_path}' does not exist!")
        return {}

    files = os.listdir(modality_path)
    print(f"✅ Found {len(files)} total files in {modality_path}")

    for file in files:
        file_path = os.path.join(modality_path, file)
        
        print(f"🔍 Checking file: {file_path}")
        
        if not os.path.exists(file_path):
            print(f"❌ ERROR: File does not exist: {file_path}")
        else:
            mode = os.stat(file_path).st_mode
            print(f"File mode: {mode}")

        if os.path.isfile(file_path):
            print("✅ This is a valid file!")

            print("lol")  # This should now print if the condition is correct

            print(f"🔍 Checking file: {file_path}")

            try:
                ds = pydicom.dcmread(file_path)
                print(f"✅ Successfully read DICOM file: {file}")

                # Extract SeriesInstanceUID
                series_id = ds.get("SeriesInstanceUID", None)
                if not series_id:
                    print(f"⚠️ Warning: Missing SeriesInstanceUID in {file_path}")
                    series_id = "UnknownSeries"

                print(f"🆔 Found SeriesInstanceUID: {series_id}")
                
                series_dict[series_id].append(ds)

                # Save metadata from the first slice
                if series_id not in metadata_dict:
                    metadata_dict[series_id] = ds

            except Exception as e:
                print(f"❌ ERROR reading {file_path}: {e}")

    # Convert series_dict to stacked NumPy arrays
    series_volumes = {}
    for series_id, ds_list in series_dict.items():
        ds_list.sort(key=lambda s: int(s.get("InstanceNumber", 0)))

        try:
            volume = np.stack([s.pixel_array for s in ds_list])
            series_volumes[series_id] = {"volume": volume, "metadata": metadata_dict[series_id]}
            print(f"✅ Successfully stacked {len(ds_list)} slices for Series ID: {series_id}")

        except Exception as e:
            print(f"❌ ERROR stacking slices for series {series_id}: {e}")

    return series_volumes


In [4]:
# ---------------------------
# 5. Full PET Preprocessing Pipeline on a single PET Folder
# ---------------------------
# Specify the path to your PET modality folder.
import os

pet_modality_path = r"C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000"


# Load the PET series (grouped by SeriesInstanceUID)
pet_series = load_pet_series(pet_modality_path)
print("Available PET series:")
for series_id in pet_series:
    vol_shape = pet_series[series_id]["volume"].shape
    print(f"  Series UID: {series_id} with volume shape: {vol_shape}")

# # For this example, select the first series available.
# selected_series_id = list(pet_series.keys())[0]
# raw_volume = pet_series[selected_series_id]["volume"]
# ref_ds = pet_series[selected_series_id]["metadata"]



✅ Found 669 total files in C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000
🔍 Checking file: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__100_.dcm
File mode: 33206
✅ This is a valid file!
lol
🔍 Checking file: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__100_.dcm
✅ Successfully read DICOM file: PET__100_.dcm
🆔 Found SeriesInstanceUID: 2.16.840.1.114362.1.12192385.26916398793.697132191.975.827
🔍 Checking file: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^J

In [5]:
import os
import unicodedata

for file in os.listdir(pet_modality_path):
    normalized_file = unicodedata.normalize("NFC", file)  # Normalize encoding
    print(f"Original: {repr(file)} | Normalized: {repr(normalized_file)} | Length: {len(file)}")


Original: 'PET__100_.dcm' | Normalized: 'PET__100_.dcm' | Length: 13
Original: 'PET__101_.dcm' | Normalized: 'PET__101_.dcm' | Length: 13
Original: 'PET__102_.dcm' | Normalized: 'PET__102_.dcm' | Length: 13
Original: 'PET__103_.dcm' | Normalized: 'PET__103_.dcm' | Length: 13
Original: 'PET__104_.dcm' | Normalized: 'PET__104_.dcm' | Length: 13
Original: 'PET__105_.dcm' | Normalized: 'PET__105_.dcm' | Length: 13
Original: 'PET__106_.dcm' | Normalized: 'PET__106_.dcm' | Length: 13
Original: 'PET__107_.dcm' | Normalized: 'PET__107_.dcm' | Length: 13
Original: 'PET__108_.dcm' | Normalized: 'PET__108_.dcm' | Length: 13
Original: 'PET__109_.dcm' | Normalized: 'PET__109_.dcm' | Length: 13
Original: 'PET__10_.dcm' | Normalized: 'PET__10_.dcm' | Length: 12
Original: 'PET__110_.dcm' | Normalized: 'PET__110_.dcm' | Length: 13
Original: 'PET__111_.dcm' | Normalized: 'PET__111_.dcm' | Length: 13
Original: 'PET__112_.dcm' | Normalized: 'PET__112_.dcm' | Length: 13
Original: 'PET__113_.dcm' | Normaliz

In [6]:
import os
import re
import unicodedata

def sanitize_filename(filename):
    """Remove or replace problematic characters in filenames."""
    # Normalize encoding (fixes UTF issues)
    filename = unicodedata.normalize("NFC", filename)
    
    # Remove or replace problematic characters (e.g., spaces, special symbols)
    filename = re.sub(r'[^\w\-.]', '_', filename)  # Keep letters, numbers, "_", ".", "-"
    
    return filename

for file in os.listdir(pet_modality_path):
    old_path = os.path.join(pet_modality_path, file)
    new_name = sanitize_filename(file)
    new_path = os.path.join(pet_modality_path, new_name)
    
    try:
        os.rename(old_path, new_path)
        print(f"✅ Renamed: {old_path} → {new_path}")
    except Exception as e:
        print(f"❌ ERROR renaming {old_path}: {e}")


✅ Renamed: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__100_.dcm → C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__100_.dcm
✅ Renamed: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__101_.dcm → C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__101_.dcm
✅ Renamed: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Stu

In [7]:
import os
import stat

for file in os.listdir(pet_modality_path):
    file_path = os.path.join(pet_modality_path, file)
    
    if os.path.exists(file_path):
        print(f"✅ File exists: {file_path}")
        mode = os.stat(file_path).st_file_attributes
        print(f"File attributes: {mode}")
    else:
        print(f"❌ ERROR: File does not exist: {file_path}")


✅ File exists: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__100_.dcm
File attributes: 32
✅ File exists: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__101_.dcm
File attributes: 32
✅ File exists: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__102_.dcm
File attributes: 32
✅ File exists: C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__103_.dcm
File attributes: 32
✅ Fi

In [13]:
import pydicom

pet_modality_path = r"C:\Users\alime\Dropbox (AMC)\Mon PC (DESKTOP-RG9FHVT)\Desktop\Deauville\Deauville_DeepLearning\2025-02__Studies\DOE^JANE_ANON40815_PT_2025-02-12_091445_PET.FDG.corps.entier_PETAC.QC800.TF_n669__00000\PET__1_.dcm"
ds = pydicom.dcmread(pet_modality_path)

print("DICOM metadata keys:", ds.dir())  # List all available metadata


DICOM metadata keys: ['AccessionNumber', 'AcquisitionContextSequence', 'AcquisitionDate', 'AcquisitionStartCondition', 'AcquisitionStartConditionData', 'AcquisitionTerminationCondition', 'AcquisitionTerminationConditionData', 'AcquisitionTime', 'ActualFrameDuration', 'AttenuationCorrectionMethod', 'AxialMash', 'BitsAllocated', 'BitsStored', 'CoincidenceWindowWidth', 'CollimatorType', 'Columns', 'ConfidentialityCode', 'ContentDate', 'ContentQualification', 'ContentTime', 'CorrectedImage', 'CountsSource', 'DeadTimeFactor', 'DecayCorrection', 'DecayFactor', 'DeidentificationMethod', 'DetectorLinesOfResponseUsed', 'DeviceSerialNumber', 'DoseCalibrationFactor', 'EnergyWindowRangeSequence', 'FieldOfViewDimensions', 'FieldOfViewShape', 'FrameOfReferenceUID', 'FrameReferenceTime', 'FrameTime', 'GantryDetectorTilt', 'HighBit', 'ImageIndex', 'ImageOrientationPatient', 'ImagePositionPatient', 'ImageType', 'InstanceCreationDate', 'InstanceCreationTime', 'InstanceCreatorUID', 'InstanceNumber', 'Ins