### Preprocessing for the additional dataset

1. We extracted the necessary 3T images from a PPMI control dataset to supplement our existing dataset.

2. Rician noise was applied to these 3T images to simulate lower-quality 1.5T images, creating a paired dataset. The noisy 3T images serve as the 1.5T equivalents, while the original 3T images act as the ground truth, consistent with the approach used in our initial dataset.


In [78]:
import os
import glob
import json
import nibabel as nib
import numpy as np
import shutil
import monai as mn
from collections import defaultdict, Counter

In [24]:
def get_sagittal_and_3T_slices(input_dir, output_dir):
    json_files = glob.glob(f'{input_dir}/**/*.json', recursive=True)
    
    des = output_dir
    os.makedirs(des, exist_ok=True)
    
    for json_file in json_files:
        try:
            with open(json_file) as f:
                data = json.load(f)

            # Check for 3T magnetic field strength
            if 'MagneticFieldStrength' in data and data['MagneticFieldStrength'] == 3.0:
                # Check for SliceThickness and ImageOrientationPatientDICOM conditions
                if 'SliceThickness' in data and data['SliceThickness'] <= 1.5:
                    if 'ImageOrientationPatientDICOM' in data and data['ImageOrientationPatientDICOM'][1] > 0.9 and abs(data['ImageOrientationPatientDICOM'][5]) > 0.9:
                        
                        nii_file = json_file.replace('.json', '.nii.gz')
                        
                        os.system(f"cp '{json_file}' {des}")
                        os.system(f"cp '{nii_file}' {des}")

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON in {json_file}: {e}")
        except Exception as e:
            print(f"Error processing {json_file}: {e}")



In [25]:
input_dir = '/scratch/Data/PPMI/PPMI_Control_nifti'
output_dir = '/scratch/Costanza/PPMI'
get_sagittal_and_3T_slices(input_dir, output_dir)

In [26]:
def extract_patient_id(filename):
    base_name = os.path.basename(filename)
    patient_id = base_name.split('_')[2]
    return patient_id

In [27]:
def find_all_patients(input_dir):
    json_files = glob.glob(f'{input_dir}/**/*.json', recursive=True)
    all_patients = {}

    for json_file in json_files:
        try:
            with open(json_file) as f:
                data = json.load(f)
                
            patient_id = extract_patient_id(json_file)
            
            # Check for 3T magnetic field strength
            if 'MagneticFieldStrength' in data and data['MagneticFieldStrength'] == 3.0:
                if patient_id not in all_patients:
                    all_patients[patient_id] = True  # Indicates this patient has at least one 3T scan
            else:
                if patient_id not in all_patients:
                    all_patients[patient_id] = False  # Indicates this patient does not have a 3T scan

        except Exception as e:
            print(f"Error processing {json_file}: {e}")

    return all_patients

input_dir = '/scratch/Costanza/PPMI'
all_patients = find_all_patients(input_dir)

# Count the number of patients with and without 3T scans
total_patients = len(all_patients)
patients_with_3T = sum(has_3T for has_3T in all_patients.values())
patients_without_3T = total_patients - patients_with_3T

print(f'The total number of patients in this dataset directory {input_dir} is {total_patients}')
print(f'Number of patients with 3T scans: {patients_with_3T}')
print(f'Number of patients without 3T scans: {patients_without_3T}')

The total number of patients in this dataset directory /scratch/Costanza/PPMI is 137
Number of patients with 3T scans: 137
Number of patients without 3T scans: 0


In [85]:
def find_all_patients(input_dir):
    nii_files = glob.glob(f'{input_dir}/**/*.nii.gz', recursive=True)
    all_patients = defaultdict(dict)

    for nii_file in nii_files:
        try:
            # Assuming the corresponding JSON file has the same name but with .json extension
            json_file = nii_file.replace(".nii.gz", ".json")
            if not os.path.exists(json_file):
                continue
            
            with open(json_file, 'r') as f:
                data = json.load(f)

            # Extract patient ID and check magnetic field strength
            patient_id = extract_patient_id(nii_file)
            magnetic_field_strength = data.get('MagneticFieldStrength', None)
            
            if magnetic_field_strength == 3.0:
                if '3T' not in all_patients[patient_id]:
                    all_patients[patient_id]['3T'] = []
                all_patients[patient_id]['3T'].append(nii_file)

        except Exception as e:
            print(f"Error processing {nii_file}: {e}")

    # Add the count of scans for each patient
    scan_counts = {}
    for patient_id in all_patients:
        scan_count = len(all_patients[patient_id].get('3T', []))
        scan_counts[patient_id] = scan_count
    
    return scan_counts

def extract_patient_id(filename):
    return os.path.basename(filename).split('_')[2]

input_dir = '/scratch/Costanza/PPMI'
scan_counts = find_all_patients(input_dir)

scan_distribution = Counter(scan_counts.values())

categories = {1: 0, 2: 0, 3: 0, 4: 0, 'more_than_4': 0}

for count, num_patients in scan_distribution.items():
    if count >= 5:
        categories['more_than_4'] += num_patients
    else:
        categories[count] = num_patients

# Print the distribution
print(f'Number of patients with 1 scan: {categories[1]}')
print(f'Number of patients with 2 scans: {categories[2]}')
print(f'Number of patients with 3 scans: {categories[3]}')
print(f'Number of patients with 4 scans: {categories[4]}')
print(f'Number of patients with more than 4 scans: {categories["more_than_4"]}')

print("\nPatients with more than 4 scans:")
for patient_id, scan_count in scan_counts.items():
    if scan_count > 4:
        print(f'Patient {patient_id} has {scan_count} scans.')


Number of patients with 1 scan: 67
Number of patients with 2 scans: 50
Number of patients with 3 scans: 13
Number of patients with 4 scans: 6
Number of patients with more than 4 scans: 1

Patients with more than 4 scans:
Patient 4085 has 7 scans.


In [22]:
def find_all_patients(input_dir):
    nii_files = glob.glob(f'{input_dir}/**/*.nii.gz', recursive=True)
    all_patients = defaultdict(dict)

    for nii_file in nii_files:
        try:
            json_file = nii_file.replace(".nii.gz", ".json")
            if not os.path.exists(json_file):
                continue
            
            with open(json_file, 'r') as f:
                data = json.load(f)

            patient_id = extract_patient_id(nii_file)
            magnetic_field_strength = data.get('MagneticFieldStrength', None)
            
            if magnetic_field_strength == 3.0:
                if '3T' not in all_patients[patient_id]:
                    all_patients[patient_id]['3T'] = []
                all_patients[patient_id]['3T'].append(nii_file)

        except Exception as e:
            print(f"Error processing {nii_file}: {e}")

    return all_patients

def extract_patient_id(filename):
    return os.path.basename(filename).split('_')[2]

input_dir = '/scratch/Costanza/PPMI'
all_patients = find_all_patients(input_dir)

patients_with_3T = len(all_patients)
patients_with_multiple_3T = sum(1 for scans in all_patients.values() if len(scans['3T']) > 1)

print(f'The total number of patients with 3T scans: {patients_with_3T}')
print(f'Number of patients with multiple 3T scans: {patients_with_multiple_3T}')

The total number of patients with 3T scans: 137
Number of patients with multiple 3T scans: 70


### HD-BET Tool

'hd-bet -i /scratch/Costanza/PPMI -o /scratch/Costanza/PPMI_NO_SKULL'

and now we remove the unnecessary files

In [16]:
def remove_mask_files(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('_mask.nii.gz'):
                count += 1
                file_path = os.path.join(root, file)
                print(f"Removing: {file_path}")
                os.remove(file_path)
    return count

directory = '/scratch/Costanza/PPMI_NO_SKULL'
num_files = remove_mask_files(directory)
print(f"Total number of mask files removed: {num_files}")


Total number of mask files removed: 0


In [23]:
input_dir = "/scratch/Costanza/PPMI_NO_SKULL"
output_dir = "/scratch/Costanza/PPMI_SkullStripping"

os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.endswith(".nii.gz"):
        patient_id = filename.split("_")[2]  
        
        patient_dir = os.path.join(output_dir, patient_id)
        os.makedirs(patient_dir, exist_ok=True)
        
        shutil.move(os.path.join(input_dir, filename), os.path.join(patient_dir, filename))

print("Dataset successfully organized.")


Dataset successfully organized.


### Adding Rician Noise

Now we want to create a paired dataset by using the 3T as ground truth and a noisy version of it where it was added Rician Noise

In [None]:
def add_rician_noise(image, noise_level):
    """
    Adds Rician noise to a given image.
    
    Args:
    - image (numpy.ndarray): The input image to which Rician noise will be added.
    - noise_level (float): The standard deviation of the Gaussian distributions used for the noise.

    Returns:
    - noisy_image (numpy.ndarray): The image with added Rician noise.
    """
    if image.max() > 1.0:
        image = image / np.max(image)  # Normalize to [0, 1]
    
    rician_noise = mn.transforms.RandRicianNoise(prob=1.0, mean=0.0, std=noise_level)
    noisy_image = rician_noise(image)
    
    noisy_image = (noisy_image * 255).astype(np.uint8)  # Rescale to [0, 255] if needed
    return noisy_image

def save_nii_image(image_data, reference_nii, output_path):
    new_nii = nib.Nifti1Image(image_data, affine=reference_nii.affine, header=reference_nii.header)
    nib.save(new_nii, output_path)

def process_patient_rician_noise(dataset_dir, noise_level):
    for patient_id in sorted(os.listdir(dataset_dir)):
        patient_dir = os.path.join(dataset_dir, patient_id)
        
        if os.path.isdir(patient_dir):
            unique_files = set()  
            
            for file_name in sorted(os.listdir(patient_dir)):
                if file_name.endswith('.nii.gz'):
                    unique_name = file_name.split('.')[0]
                    if unique_name in unique_files:
                        continue
                    unique_files.add(unique_name)

                    file_path = os.path.join(patient_dir, file_name)
                    nii_image = nib.load(file_path)
                    image_data = nii_image.get_fdata()

                    noisy_image_data = add_rician_noise(image_data, noise_level=noise_level)
                    noisy_file_name = unique_name + '_rician_noise.nii.gz'
                    noisy_file_path = os.path.join(patient_dir, noisy_file_name)

                    save_nii_image(noisy_image_data, nii_image, noisy_file_path)

                    print(f"Saved noisy image for patient {patient_id} at {noisy_file_path}")
                    
dataset_dir = "/scratch/Costanza/PPMI_SkullStripping"
process_patient_rician_noise(dataset_dir, noise_level=0.2)


In [None]:
# Base directory containing all the patient folders with images
base_dir = "/scratch/Costanza/PPMI_SkullStripping"

# Iterate over all patient folders
for patient_folder in os.listdir(base_dir):
    patient_dir = os.path.join(base_dir, patient_folder)
    
    # Iterate over all the files in the patient folder
    for filename in os.listdir(patient_dir):
        if filename.endswith("_rician_noise.nii.gz"):
            # Construct the full path of the file
            noisy_img_path = os.path.join(patient_dir, filename)
            
            # Remove the noisy image
            os.remove(noisy_img_path)

print("All noisy images have been removed successfully.")
