In [17]:
import os
import numpy as np
import nibabel as nib
from nilearn import image, input_data,datasets
from nilearn.datasets import fetch_atlas_aal
from sklearn.model_selection import train_test_split
import pandas as pd
from scipy.stats import skew, kurtosis
from skimage.feature import graycomatrix, graycoprops
import torch
import torch.nn as nn
import pennylane as qml
import torch.optim as optim
from sklearn.metrics import accuracy_score
import json
import logging
import warnings 
from sklearn.preprocessing import StandardScaler



In [18]:
# Paths to your data
mri_pet_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives"
mri_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip"


In [19]:
# Configure logging
logging.basicConfig(filename='processing_log.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Capture warnings using logging
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
    log = file if hasattr(file, 'write') else logging
    log.warning(warnings.formatwarning(message, category, filename, lineno))
warnings.showwarning = warn_with_traceback

# Fetch the AAL atlas
aal_atlas = datasets.fetch_atlas_aal()
atlas_filename = aal_atlas.maps

def extract_roi_features(image_path, atlas_filename, surviving_labels=None):
    try:
        img = nib.load(image_path)
        masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
        roi_data = masker.fit_transform(img)

        all_roi_features = []
        if surviving_labels is None:
            surviving_labels = range(roi_data.shape[1])

        for i, roi_values in enumerate(roi_data.T):
            if i in surviving_labels:
                mean_val = np.mean(roi_values)
                std_val = np.std(roi_values)
                skew_val = skew(roi_values)
                kurt_val = kurtosis(roi_values)

                if roi_values.max() == roi_values.min():
                    roi_features = [mean_val, std_val, skew_val, kurt_val, 0, 0, 0, 0]
                else:
                    roi_values_int = (roi_values - roi_values.min()) / (roi_values.max() - roi_values.min()) * 255
                    roi_values_int = roi_values_int.astype(np.uint8)

                    size = int(np.sqrt(len(roi_values_int)))
                    if size * size != len(roi_values_int):
                        size += 1
                    reshaped_roi_values = np.pad(roi_values_int, (0, size * size - len(roi_values_int)), 'constant').reshape(size, size)

                    glcm = graycomatrix(reshaped_roi_values, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
                    contrast = graycoprops(glcm, 'contrast').mean()
                    correlation = graycoprops(glcm, 'correlation').mean()
                    energy = graycoprops(glcm, 'energy').mean()
                    homogeneity = graycoprops(glcm, 'homogeneity').mean()

                    roi_features = [mean_val, std_val, skew_val, kurt_val, contrast, correlation, energy, homogeneity]
                all_roi_features.extend(roi_features)

        return np.array(all_roi_features), surviving_labels

    except Exception as e:
        logging.error(f"Error processing image {image_path}: {e}")
        return None, None

In [20]:
# Find the reference MRI image for MRI-only data
reference_mri_only_path = None
max_rois_mri_only = 0

for subject_folder in os.listdir(mri_data_path):
    mri_path = os.path.join(mri_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    img = nib.load(mri_path)
    masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
    roi_data = masker.fit_transform(img)
    num_rois = roi_data.shape[1]

    if num_rois > max_rois_mri_only:
        max_rois_mri_only = num_rois
        reference_mri_only_path = mri_path

_, surviving_mri_only_labels = extract_roi_features(reference_mri_only_path, atlas_filename)


In [21]:
# Process MRI-only data
mri_only_features = []
mri_only_labels = []

expected_feature_length = len(surviving_mri_only_labels) * 8 if surviving_mri_only_labels is not None else 0

for subject_folder in os.listdir(mri_data_path):
    mri_path = os.path.join(mri_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    mri_features, _ = extract_roi_features(mri_path, atlas_filename, surviving_mri_only_labels)
    if mri_features is not None:
        if expected_feature_length > 0 and len(mri_features) < expected_feature_length:
            padding = np.full(expected_feature_length - len(mri_features), np.nan)
            padded_features = np.concatenate((mri_features, padding))
            mri_only_features.append(padded_features)
        else:
            mri_only_features.append(mri_features)
        subject_id = int(subject_folder.replace("sub-", ""))
        mri_only_labels.append(0 if subject_id > 4000 else 1)
    else:
        # If feature extraction failed, append a NaN array of the expected length
        if expected_feature_length > 0:
            mri_only_features.append(np.full(expected_feature_length, np.nan))
            subject_id = int(subject_folder.replace("sub-", "")) # Still try to get subject ID for label consistency
            mri_only_labels.append(np.nan) # Or handle the label appropriately

mri_only_features = np.array(mri_only_features, dtype=object) # Use object dtype to handle potential NaNs
mri_only_labels = np.array(mri_only_labels)

In [22]:
# Find the reference MRI image for MRI-PET data
reference_mri_pet_path = None
max_rois_mri_pet = 0

for subject_folder in os.listdir(os.path.join(mri_pet_data_path, "cleaned_skullstrip")):
    mri_path = os.path.join(mri_pet_data_path, "cleaned_skullstrip", subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    img = nib.load(mri_path)
    masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
    roi_data = masker.fit_transform(img)
    num_rois = roi_data.shape[1]

    if num_rois > max_rois_mri_pet:
        max_rois_mri_pet = num_rois
        reference_mri_pet_path = mri_path

_, surviving_mri_pet_labels = extract_roi_features(reference_mri_pet_path, atlas_filename)


In [23]:
# Process MRI-PET data
mri_pet_features = []
mri_pet_labels = []
surviving_mri_pet_labels = None

for subject_folder in os.listdir(os.path.join(mri_pet_data_path, "cleaned_skullstrip")):
    mri_path = os.path.join(mri_pet_data_path, "cleaned_skullstrip", subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    pet_path = os.path.join(mri_pet_data_path, "preprocessed_pet", subject_folder, "pet", f"{subject_folder}_space-MNI152NLin2009aPET.nii.gz")

    mri_features, surviving_mri_pet_labels = extract_roi_features(mri_path, atlas_filename, surviving_mri_pet_labels)
    pet_features, surviving_mri_pet_labels = extract_roi_features(pet_path, atlas_filename, surviving_mri_pet_labels)

    if mri_features is not None and pet_features is not None:
        combined_features = np.concatenate((mri_features, pet_features))
        mri_pet_features.append(combined_features)
        mri_pet_labels.append(1 if "patient" in subject_folder else 0)

mri_pet_features = np.array(mri_pet_features)
mri_pet_labels = np.array(mri_pet_labels)

In [24]:
# Feature Scaling
scaler = StandardScaler()
mri_only_features_scaled = scaler.fit_transform(mri_only_features)
mri_pet_features_scaled = scaler.fit_transform(mri_pet_features)

#Example of printing the shapes of the dataframes.
print(f"Shape of MRI only features scaled: {mri_only_features_scaled.shape}")
print(f"Shape of MRI only labels: {mri_only_labels.shape}")
print(f"Shape of MRI PET features scaled: {mri_pet_features_scaled.shape}")
print(f"Shape of MRI PET labels: {mri_pet_labels.shape}")

Shape of MRI only features scaled: (10, 928)
Shape of MRI only labels: (10,)
Shape of MRI PET features scaled: (5, 352)
Shape of MRI PET labels: (5,)


In [31]:
import numpy as np
import pandas as pd

# Assuming you have the following NumPy arrays:
# mri_only_features_scaled (shape: (10, 928))
# mri_only_labels (shape: (10,))
# mri_pet_features_scaled (shape: (5, 352))
# mri_pet_labels (shape: (5,))

# Create Pandas DataFrames
df_mri_only = pd.DataFrame(mri_only_features_scaled)
df_mri_only['label'] = mri_only_labels

df_mri_pet = pd.DataFrame(mri_pet_features_scaled)
df_mri_pet['label'] = mri_pet_labels

print("---------------------- MRI-Only NaN Column Analysis ----------------------")
nan_cols_mri_only = df_mri_only.isnull().sum()
total_cols_mri_only = df_mri_only.shape[1]
nan_cols_count_mri_only = nan_cols_mri_only[nan_cols_mri_only > 0].count()

print(f"Number of columns with NaN values in MRI-Only: {nan_cols_count_mri_only}")
print(f"Total number of columns in MRI-Only: {total_cols_mri_only}")

print("\n---------------------- MRI-PET NaN Column Analysis ----------------------")
nan_cols_mri_pet = df_mri_pet.isnull().sum()
total_cols_mri_pet = df_mri_pet.shape[1]
nan_cols_count_mri_pet = nan_cols_mri_pet[nan_cols_mri_pet > 0].count()

print(f"Number of columns with NaN values in MRI-PET: {nan_cols_count_mri_pet}")
print(f"Total number of columns in MRI-PET: {total_cols_mri_pet}")

---------------------- MRI-Only NaN Column Analysis ----------------------
Number of columns with NaN values in MRI-Only: 238
Total number of columns in MRI-Only: 929

---------------------- MRI-PET NaN Column Analysis ----------------------
Number of columns with NaN values in MRI-PET: 88
Total number of columns in MRI-PET: 353


In [32]:
import numpy as np
import pandas as pd

# Assuming you have the following NumPy arrays:
# mri_only_features_scaled (shape: (10, 928))
# mri_only_labels (shape: (10,))
# mri_pet_features_scaled (shape: (5, 352))
# mri_pet_labels (shape: (5,))

# Create Pandas DataFrames
df_mri_only = pd.DataFrame(mri_only_features_scaled)
df_mri_only['label'] = mri_only_labels

df_mri_pet = pd.DataFrame(mri_pet_features_scaled)
df_mri_pet['label'] = mri_pet_labels

print("---------------------- MRI-Only Column Data Types ----------------------")
print(df_mri_only.dtypes)

print("\n---------------------- MRI-PET Column Data Types ----------------------")
print(df_mri_pet.dtypes)


---------------------- MRI-Only Column Data Types ----------------------
0        float64
1        float64
2        float64
3        float64
4        float64
          ...   
924      float64
925      float64
926      float64
927      float64
label      int32
Length: 929, dtype: object

---------------------- MRI-PET Column Data Types ----------------------
0        float64
1        float64
2        float64
3        float64
4        float64
          ...   
348      float64
349      float64
350      float64
351      float64
label      int32
Length: 353, dtype: object


In [34]:
print(df_mri_only.label)

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: label, dtype: int32


In [47]:
import os
import numpy as np
import nibabel as nib
from nilearn import input_data, datasets
import logging
from scipy.stats import skew, kurtosis
from skimage.feature import graycomatrix, graycoprops
from sklearn.preprocessing import StandardScaler
import warnings

# Configure logging
logging.basicConfig(filename='processing_log.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Capture warnings using logging
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
    log = file if hasattr(file, 'write') else logging
    log.warning(warnings.formatwarning(message, category, filename, lineno))
warnings.showwarning = warn_with_traceback

# Fetch the AAL atlas
aal_atlas = datasets.fetch_atlas_aal()
atlas_filename = aal_atlas.maps

def extract_roi_features(image_path, atlas_filename, surviving_labels=None):
    try:
        img = nib.load(image_path)
        masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
        roi_data = masker.fit_transform(img)

        all_roi_features = []
        if surviving_labels is None:
            surviving_labels = range(roi_data.shape[1])

        for i, roi_values in enumerate(roi_data.T):
            if i in surviving_labels:
                mean_val = np.mean(roi_values)
                std_val = np.std(roi_values)
                skew_val = skew(roi_values)
                kurt_val = kurtosis(roi_values)

                if roi_values.max() == roi_values.min():
                    roi_features = [mean_val, std_val, skew_val, kurt_val, 0, 0, 0, 0]
                else:
                    roi_values_int = (roi_values - roi_values.min()) / (roi_values.max() - roi_values.min()) * 255
                    roi_values_int = roi_values_int.astype(np.uint8)

                    size = int(np.sqrt(len(roi_values_int)))
                    if size * size != len(roi_values_int):
                        size += 1
                    reshaped_roi_values = np.pad(roi_values_int, (0, size * size - len(roi_values_int)), 'constant').reshape(size, size)

                    glcm = graycomatrix(reshaped_roi_values, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
                    contrast = graycoprops(glcm, 'contrast').mean()
                    correlation = graycoprops(glcm, 'correlation').mean()
                    energy = graycoprops(glcm, 'energy').mean()
                    homogeneity = graycoprops(glcm, 'homogeneity').mean()

                    roi_features = [mean_val, std_val, skew_val, kurt_val, contrast, correlation, energy, homogeneity]
                all_roi_features.extend(roi_features)

        return np.array(all_roi_features), surviving_labels

    except Exception as e:
        logging.error(f"Error processing image {image_path}: {e}")
        return None, None

# Paths to your data
mri_pet_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip"
mri_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip"

# Find the reference MRI image for MRI-only data
reference_mri_only_path = None
max_rois_mri_only = 0

for subject_folder in os.listdir(mri_data_path):
    mri_path = os.path.join(mri_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    img = nib.load(mri_path)
    masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
    roi_data = masker.fit_transform(img)
    num_rois = roi_data.shape[1]

    if num_rois > max_rois_mri_only:
        max_rois_mri_only = num_rois
        reference_mri_only_path = mri_path

_, surviving_mri_only_labels = extract_roi_features(reference_mri_only_path, atlas_filename)


# Process MRI-only data
mri_only_processed_data = []

expected_feature_length = len(surviving_mri_only_labels) * 8 if surviving_mri_only_labels is not None else 0

for subject_folder in os.listdir(mri_data_path):
    if subject_folder.startswith("sub-"):
        mri_path = os.path.join(mri_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
        mri_features, _ = extract_roi_features(mri_path, atlas_filename, surviving_mri_only_labels)
        if mri_features is not None:
            if expected_feature_length > 0 and len(mri_features) < expected_feature_length:
                padding = np.full(expected_feature_length - len(mri_features), np.nan)
                padded_features = np.concatenate((mri_features, padding))
                subject_id_str = subject_folder.replace("sub-", "")
                try:
                    subject_id = int(subject_id_str)
                    label = 0 if subject_id < 4000 else 1
                    mri_only_processed_data.append({"patient_id": subject_id, "features": padded_features, "label": label})
                except ValueError:
                    logging.warning(f"Could not parse subject ID for labeling: {subject_folder}")
            else:
                subject_id_str = subject_folder.replace("sub-", "")
                try:
                    subject_id = int(subject_id_str)
                    label = 0 if subject_id < 4000 else 1
                    mri_only_processed_data.append({"patient_id": subject_id, "features": mri_features, "label": label})
                except ValueError:
                    logging.warning(f"Could not parse subject ID for labeling: {subject_folder}")
        else:
            if expected_feature_length > 0:
                subject_id_str = subject_folder.replace("sub-", "")
                try:
                    subject_id = int(subject_id_str)
                    mri_only_processed_data.append({"patient_id": subject_id, "features": np.full(expected_feature_length, np.nan), "label": np.nan})
                except ValueError:
                    logging.warning(f"Could not parse subject ID for labeling (NaN features): {subject_folder}")

# Find the reference MRI image for MRI-PET data
reference_mri_pet_path = None
max_rois_mri_pet = 0

for subject_folder in os.listdir(mri_pet_data_path):
    mri_path = os.path.join(mri_pet_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
    try:
        img = nib.load(mri_path)
        masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
        roi_data = masker.fit_transform(img)
        num_rois = roi_data.shape[1]

        if num_rois > max_rois_mri_pet:
            max_rois_mri_pet = num_rois
            reference_mri_pet_path = mri_path
        print(f"(Reference) Subject: {subject_folder}, MRI Path: {mri_path}, ROI Data Shape: {roi_data.shape}") # DEBUG
    except FileNotFoundError:
        logging.warning(f"MRI file not found for subject: {subject_folder} in {mri_path}")
    except Exception as e:
        logging.error(f"Error loading or processing MRI image {mri_path}: {e}")

_, surviving_mri_pet_labels = extract_roi_features(reference_mri_pet_path, atlas_filename)
print(f"Surviving MRI-PET labels (after reference): {surviving_mri_pet_labels}") # DEBUG

# Process MRI-PET data (Corrected PET Path Finding)
mri_pet_processed_data = []

for subject_folder in os.listdir(mri_pet_data_path):
    if subject_folder.startswith("sub-"):
        mri_path = os.path.join(mri_pet_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
        pet_folder = os.path.join(mri_pet_data_path, "preprocessed_pet", subject_folder, "pet")
        pet_path = None

        try:
            for filename in os.listdir(pet_folder):
                if filename.endswith("_space-MNI152NLin2009aPET.nii.gz"):
                    pet_path = os.path.join(pet_folder, filename)
                    break  # Assuming only one matching PET file per subject
            if pet_path is None:
                logging.warning(f"PET image not found for subject: {subject_folder} in {pet_folder}")
                continue # Skip to the next subject if no PET found
        except FileNotFoundError:
            logging.warning(f"PET folder not found for subject: {subject_folder} at {pet_folder}")
            continue # Skip to the next subject if PET folder is missing

        print(f"(Processing) Subject: {subject_folder}, MRI Path: {mri_path}, PET Path: {pet_path}") # DEBUG

        mri_features, surviving_mri_labels = extract_roi_features(mri_path, atlas_filename)
        print(f"(Processing) MRI Features Shape: {mri_features.shape if mri_features is not None else None}, Surviving Labels: {surviving_mri_labels}") # DEBUG

        pet_features, surviving_pet_labels = extract_roi_features(pet_path, atlas_filename)
        print(f"(Processing) PET Features Shape: {pet_features.shape if pet_features is not None else None}, Surviving Labels: {surviving_pet_labels}") # DEBUG

        if surviving_mri_labels is not None and surviving_pet_labels is not None:
            common_labels = sorted(list(set(surviving_mri_labels) & set(surviving_pet_labels)))
            print(f"(Processing) Common Labels: {common_labels}") # DEBUG

            if common_labels:
                # Extract features again, but only for the common labels
                mri_features_common, _ = extract_roi_features(mri_path, atlas_filename, surviving_labels=common_labels)
                pet_features_common, _ = extract_roi_features(pet_path, atlas_filename, surviving_labels=common_labels)

                if mri_features_common is not None and pet_features_common is not None:
                    combined_features = np.concatenate((mri_features_common, pet_features_common))
                    patient_id_str = subject_folder.replace("sub-", "")
                    label = 1 if "patient" in subject_folder else 0
                    mri_pet_processed_data.append({"patient_id": patient_id_str, "features": combined_features, "label": label})
                else:
                    patient_id_str = subject_folder.replace("sub-", "")
                    label = 1 if "patient" in subject_folder else 0
                    combined_features_length = len(common_labels) * 16
                    mri_pet_processed_data.append({"patient_id": patient_id_str, "features": np.full(combined_features_length, np.nan), "label": label})
            else:
                print(f"(Processing) No common ROIs found for {subject_folder}")
                patient_id_str = subject_folder.replace("sub-", "")
                label = 1 if "patient" in subject_folder else 0
                mri_pet_processed_data.append({"patient_id": patient_id_str, "features": np.array([]), "label": label}) # Append empty features
        else:
            print(f"(Processing) Could not get surviving labels for {subject_folder}")
            patient_id_str = subject_folder.replace("sub-", "")
            label = 1 if "patient" in subject_folder else 0
            mri_pet_processed_data.append({"patient_id": patient_id_str, "features": np.array([]), "label": label}) # Append empty features

# Feature Scaling
scaler_mri_only = StandardScaler()
mri_only_features_to_scale = [item["features"] for item in mri_only_processed_data if isinstance(item["features"], np.ndarray) and item["features"].size > 0]
if mri_only_features_to_scale:
    mri_only_features_scaled = scaler_mri_only.fit_transform(np.array(mri_only_features_to_scale))
    mri_only_labels_array = np.array([item["label"] for item in mri_only_processed_data if isinstance(item["label"], (int, float)) and isinstance(item["features"], np.ndarray) and item["features"].size > 0])
    mri_only_patient_ids = [item["patient_id"] for item in mri_only_processed_data if isinstance(item["features"], np.ndarray) and item["features"].size > 0]
else:
    mri_only_features_scaled = np.array([])
    mri_only_labels_array = np.array([])
    mri_only_patient_ids = []

scaler_mri_pet = StandardScaler()
mri_pet_features_to_scale = [item["features"] for item in mri_pet_processed_data if isinstance(item["features"], np.ndarray) and item["features"].size > 0]
if mri_pet_features_to_scale:
    mri_pet_features_scaled = scaler_mri_pet.fit_transform(np.array(mri_pet_features_to_scale))
    mri_pet_labels_array = np.array([item["label"] for item in mri_pet_processed_data if isinstance(item["label"], int) and isinstance(item["features"], np.ndarray) and item["features"].size > 0])
    mri_pet_patient_ids = [item["patient_id"] for item in mri_pet_processed_data if isinstance(item["features"], np.ndarray) and item["features"].size > 0]
else:
    mri_pet_features_scaled = np.array([])
    mri_pet_labels_array = np.array([])
    mri_pet_patient_ids = []

# Example of printing the processed data
print("\nProcessed MRI-Only Data (Alternative):")
for item in mri_only_processed_data:
    print(f"Patient ID: {item['patient_id']}, Label: {item['label']}, Features Shape: {item['features'].shape if isinstance(item['features'], np.ndarray) else None}")

print("\nProcessed MRI-PET Data (Alternative):")
for item in mri_pet_processed_data:
    print(f"Patient ID: {item['patient_id']}, Label: {item['label']}, Features Shape: {item['features'].shape if isinstance(item['features'], np.ndarray) else None}")

print(f"\nShape of Scaled MRI only features (Alternative): {mri_only_features_scaled.shape}")
print(f"Shape of MRI only labels (Alternative): {mri_only_labels_array.shape}")
print(f"Patient IDs (MRI-Only) (Alternative): {mri_only_patient_ids}")

print(f"\nShape of Scaled MRI PET features (Alternative): {mri_pet_features_scaled.shape}")
print(f"Shape of MRI PET labels (Alternative): {mri_pet_labels_array.shape}")
print(f"Patient IDs (MRI-PET) (Alternative): {mri_pet_patient_ids}")

(Reference) Subject: sub-control01, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip\sub-control01\anat\sub-control01_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 22)
(Reference) Subject: sub-control02, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip\sub-control02\anat\sub-control02_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 22)
(Reference) Subject: sub-control03, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip\sub-control03\anat\sub-control03_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 22)
(Reference) Subject: sub-control04, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip\sub-control04\anat\sub-control04_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 22)
(Reference) Subject: sub-control05, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip\sub

In [43]:
print(mri_only_labels_array)

[0 0 0 0 0 0 0 0 0 0]


In [72]:
import os
import numpy as np
import nibabel as nib
from nilearn import input_data, datasets
import logging
from scipy.stats import skew, kurtosis
from skimage.feature import graycomatrix, graycoprops
from sklearn.preprocessing import StandardScaler
import warnings
import pandas as pd  # Import Pandas

# Setup logging
logging.basicConfig(filename='processing_log.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Define data paths
mri_skullstrip_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip"
pet_processed_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\preprocessed_pet"
mri_pet_anat_data_path = r"C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri_pet\derivatives\cleaned_skullstrip"
atlas_filename = datasets.fetch_atlas_aal().maps

def extract_roi_features(image_path, atlas_filename, surviving_labels=None):
    try:
        img = nib.load(image_path)
        masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
        roi_data = masker.fit_transform(img)

        all_roi_features = []
        if surviving_labels is None:
            surviving_labels = range(roi_data.shape[1])

        for i, roi_values in enumerate(roi_data.T):
            if i in surviving_labels:
                mean_val = np.mean(roi_values)
                std_val = np.std(roi_values)
                skew_val = skew(roi_values)
                kurt_val = kurtosis(roi_values)

                if roi_values.max() == roi_values.min():
                    roi_features = [mean_val, std_val, skew_val, kurt_val, 0, 0, 0, 0]
                else:
                    roi_values_int = (roi_values - roi_values.min()) / (roi_values.max() - roi_values.min()) * 255
                    roi_values_int = roi_values_int.astype(np.uint8)

                    size = int(np.sqrt(len(roi_values_int)))
                    if size * size != len(roi_values_int):
                        size += 1
                    reshaped_roi_values = np.pad(roi_values_int, (0, size * size - len(roi_values_int)), 'constant').reshape(size, size)

                    glcm = graycomatrix(reshaped_roi_values, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
                    contrast = graycoprops(glcm, 'contrast').mean()
                    correlation = graycoprops(glcm, 'correlation').mean()
                    energy = graycoprops(glcm, 'energy').mean()
                    homogeneity = graycoprops(glcm, 'homogeneity').mean()

                    roi_features = [mean_val, std_val, skew_val, kurt_val, contrast, correlation, energy, homogeneity]
                all_roi_features.extend(roi_features)

        return np.array(all_roi_features), surviving_labels

    except FileNotFoundError:
        logging.error(f"Error: File not found at {image_path}")
        return None, None
    except Exception as e:
        logging.error(f"Error processing image {image_path}: {e}")
        return None, None

# Process MRI (Skull-stripped) data
mri_only_data = []
reference_mri_skullstrip_path = None
max_rois_mri_skullstrip = 0

for subject_folder in os.listdir(mri_skullstrip_data_path):
    if subject_folder.startswith("sub-"):
        mri_path = os.path.join(mri_skullstrip_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
        try:
            img = nib.load(mri_path)
            masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
            roi_data = masker.fit_transform(img)
            num_rois = roi_data.shape[1]

            if num_rois > max_rois_mri_skullstrip:
                max_rois_mri_skullstrip = num_rois
                reference_mri_skullstrip_path = mri_path
            print(f"(Reference MRI-Only) Subject: {subject_folder}, MRI Path: {mri_path}, ROI Data Shape: {roi_data.shape}") # DEBUG
        except FileNotFoundError:
            logging.warning(f"MRI file not found for subject: {subject_folder} in {mri_path}")
        except Exception as e:
            logging.error(f"Error loading or processing MRI image {mri_path}: {e}")

if reference_mri_skullstrip_path:
    _, surviving_mri_skullstrip_labels = extract_roi_features(reference_mri_skullstrip_path, atlas_filename)
    print(f"Surviving MRI-Only labels (after reference): {surviving_mri_skullstrip_labels}") # DEBUG

    for subject_folder in os.listdir(mri_skullstrip_data_path):
        if subject_folder.startswith("sub-"):
            mri_path = os.path.join(mri_skullstrip_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
            features, _ = extract_roi_features(mri_path, atlas_filename, surviving_mri_skullstrip_labels)
            if features is not None:
                subject_id = subject_folder
                try:
                    numeric_id = int(subject_id.replace("sub-", ""))
                    label = 1 if numeric_id < 4000 else 0
                except ValueError:
                    logging.warning(f"Could not extract numeric ID from {subject_folder}. Assigning default label 0.")
                    label = 0
                feature_dict = {f'M{i+1}': val for i, val in enumerate(features.flatten())}
                mri_only_data.append({'subject_id': subject_id, **feature_dict, 'label': label})
else:
    logging.error("No reference MRI found for MRI-only data. Skipping MRI-only processing.")

print(f"MRI-PET (Anat) Data Path: {mri_pet_anat_data_path}") # DEBUG
print(f"PET (Processed) Data Path: {pet_processed_data_path}") # DEBUG

# Process MRI and PET (from MRI-PET) data
mri_pet_data = {}
surviving_mri_pet_labels = None
reference_mri_pet_path = None
max_rois_mri_pet = 0

for subject_folder in os.listdir(mri_pet_anat_data_path):
    if subject_folder.startswith("sub-control"):
        mri_path = os.path.join(mri_pet_anat_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
        try:
            img = nib.load(mri_path)
            masker = input_data.NiftiLabelsMasker(labels_img=atlas_filename, standardize=False)
            roi_data = masker.fit_transform(img)
            num_rois = roi_data.shape[1]

            if num_rois > max_rois_mri_pet:
                max_rois_mri_pet = num_rois
                reference_mri_pet_path = mri_path
            print(f"(Reference MRI-PET MRI) Subject: {subject_folder}, MRI Path: {mri_path}, ROI Data Shape: {roi_data.shape}") # DEBUG
        except FileNotFoundError:
            logging.warning(f"MRI file not found for subject: {subject_folder} in {mri_path}")
        except Exception as e:
            logging.error(f"Error loading or processing MRI image {mri_path}: {e}")

if reference_mri_pet_path:
    _, surviving_mri_pet_labels = extract_roi_features(reference_mri_pet_path, atlas_filename)
    print(f"Surviving MRI-PET labels (after reference): {surviving_mri_pet_labels}") # DEBUG
else:
    logging.error("No reference MRI found for MRI-PET data. Skipping MRI-PET processing.")

if surviving_mri_pet_labels is not None:
    for subject_folder in os.listdir(mri_pet_anat_data_path):
        if subject_folder.startswith("sub-control"):
            subject_id = subject_folder
            mri_path = os.path.join(mri_pet_anat_data_path, subject_folder, "anat", f"{subject_folder}_T1w_skullstripped.nii.gz")
            print(f"(Processing MRI-PET MRI) Subject: {subject_folder}, MRI Path: {mri_path}") # DEBUG
            mri_features, _ = extract_roi_features(mri_path, atlas_filename, surviving_mri_pet_labels)

            pet_folder = os.path.join(pet_processed_data_path, subject_folder, "pet")
            pet_path = None
            try:
                for filename in os.listdir(pet_folder):
                    if filename.endswith("_space-MNI152NLin2009aPET.nii.gz"):
                        pet_path = os.path.join(pet_folder, filename)
                        break
                if pet_path:
                    print(f"(Processing PET) Subject: {subject_folder}, PET Path: {pet_path}") # DEBUG
                    pet_features, _ = extract_roi_features(pet_path, atlas_filename, surviving_mri_pet_labels)
                else:
                    pet_features = None
                    logging.warning(f"PET image not found for subject: {subject_folder} in {pet_folder}")
            except FileNotFoundError:
                pet_features = None
                logging.warning(f"PET folder not found for subject: {subject_folder} at {pet_folder}")

            label = 0 if "control" in subject_folder else 1
            mri_pet_data[subject_id] = {
                **({f'M{i+1}': val for i, val in enumerate(mri_features.flatten())} if mri_features is not None else {}),
                **({f'P{i+1}': val for i, val in enumerate(pet_features.flatten())} if pet_features is not None else {}),
                'label': label
            }

# --- Save to CSV files ---
def save_list_of_dicts_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"\nData saved to {filename}")

def save_combined_mri_pet_data_to_csv(data, filename):
    output_data = []
    for subject_id, features in data.items():
        output_data.append({'subject_id': subject_id, **features})
    df = pd.DataFrame(output_data)
    df.to_csv(filename, index=False)
    print(f"\nCombined MRI-PET data saved to {filename}")

# Save MRI-only data
save_list_of_dicts_to_csv(mri_only_data, 'mri_only_data_wide.csv')

# Save combined MRI-PET data
save_combined_mri_pet_data_to_csv(mri_pet_data, 'mri_pet_combined_wide.csv')

print("\nProcessing complete. Data saved to wide format CSV files.")

import pandas as pd

# Load and print the head of the new CSV files
try:
    df_mri_only = pd.read_csv('mri_only_data_wide.csv')
    print("\nHead of MRI-Only Data (Wide Format):")
    print(df_mri_only.head())
except FileNotFoundError:
    print("\nError: mri_only_data_wide.csv not found.")

try:
    df_mri_pet = pd.read_csv('mri_pet_combined_wide.csv')
    print("\nHead of Combined MRI-PET Data (Wide Format):")
    print(df_mri_pet.head())
except FileNotFoundError:
    print("\nError: mri_pet_combined_wide.csv not found.")

(Reference MRI-Only) Subject: sub-1, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip\sub-1\anat\sub-1_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 116)
(Reference MRI-Only) Subject: sub-10, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip\sub-10\anat\sub-10_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 115)
(Reference MRI-Only) Subject: sub-2, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip\sub-2\anat\sub-2_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 116)
(Reference MRI-Only) Subject: sub-3, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip\sub-3\anat\sub-3_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 116)
(Reference MRI-Only) Subject: sub-4, MRI Path: C:\Users\ishsh\OneDrive\Desktop\tests\processed\mri\derivatives\cleaned_skullstrip\sub-4\anat\sub-4_T1w_skullstripped.nii.gz, ROI Data Shape: (1, 115)
(Refere

In [6]:
import pandas as pd

# Load the MRI-only data
try:
    df_mri_only = pd.read_csv('mri_only_data_wide.csv')
    print("\n--- MRI-Only Data ---")
    total_rows_mri_only = len(df_mri_only)
    columns_to_drop_mri_only = []
    for col in df_mri_only.columns:
        nan_percentage = df_mri_only[col].isnull().sum() / total_rows_mri_only * 100
        if nan_percentage > 50:
            columns_to_drop_mri_only.append(col)

    print(f"Number of columns with > 50% NaN values: {len(columns_to_drop_mri_only)}")
    if columns_to_drop_mri_only:
        print("Columns with > 50% NaN values:")
        print(columns_to_drop_mri_only)
        # You can then drop these columns if you decide to:
        # df_mri_only_cleaned = df_mri_only.drop(columns=columns_to_drop_mri_only)
        # print("\nShape of MRI-Only data after dropping columns:", df_mri_only_cleaned.shape)
    else:
        print("No columns found with > 50% NaN values.")

except FileNotFoundError:
    print("\nError: mri_only_data_wide.csv not found.")
    df_mri_only = None

# Load the combined MRI-PET data
try:
    df_mri_pet = pd.read_csv('mri_pet_combined_wide.csv')
    print("\n--- Combined MRI-PET Data ---")
    total_rows_mri_pet = len(df_mri_pet)
    columns_to_drop_mri_pet = []
    for col in df_mri_pet.columns:
        nan_percentage = df_mri_pet[col].isnull().sum() / total_rows_mri_pet * 100
        if nan_percentage > 50:
            columns_to_drop_mri_pet.append(col)

    print(f"Number of columns with > 50% NaN values: {len(columns_to_drop_mri_pet)}")
    if columns_to_drop_mri_pet:
        print("Columns with > 50% NaN values:")
        print(columns_to_drop_mri_pet)
        # You can then drop these columns if you decide to:
        # df_mri_pet_cleaned = df_mri_pet.drop(columns=columns_to_drop_mri_pet)
        # print("\nShape of Combined MRI-PET data after dropping columns:", df_mri_pet_cleaned.shape)
    else:
        print("No columns found with > 50% NaN values.")

except FileNotFoundError:
    print("\nError: mri_pet_combined_wide.csv not found.")
    df_mri_pet = None


--- MRI-Only Data ---
Number of columns with > 50% NaN values: 232
Columns with > 50% NaN values:
['M3', 'M4', 'M11', 'M12', 'M19', 'M20', 'M27', 'M28', 'M35', 'M36', 'M43', 'M44', 'M51', 'M52', 'M59', 'M60', 'M67', 'M68', 'M75', 'M76', 'M83', 'M84', 'M91', 'M92', 'M99', 'M100', 'M107', 'M108', 'M115', 'M116', 'M123', 'M124', 'M131', 'M132', 'M139', 'M140', 'M147', 'M148', 'M155', 'M156', 'M163', 'M164', 'M171', 'M172', 'M179', 'M180', 'M187', 'M188', 'M195', 'M196', 'M203', 'M204', 'M211', 'M212', 'M219', 'M220', 'M227', 'M228', 'M235', 'M236', 'M243', 'M244', 'M251', 'M252', 'M259', 'M260', 'M267', 'M268', 'M275', 'M276', 'M283', 'M284', 'M291', 'M292', 'M299', 'M300', 'M307', 'M308', 'M315', 'M316', 'M323', 'M324', 'M331', 'M332', 'M339', 'M340', 'M347', 'M348', 'M355', 'M356', 'M363', 'M364', 'M371', 'M372', 'M379', 'M380', 'M387', 'M388', 'M395', 'M396', 'M403', 'M404', 'M411', 'M412', 'M419', 'M420', 'M427', 'M428', 'M435', 'M436', 'M443', 'M444', 'M451', 'M452', 'M459', 'M460',

In [79]:
import pandas as pd

# Assuming you have run the previous code block to create df_mri_only_cleaned and df_mri_pet_cleaned

# Check NaN values in the cleaned MRI-only DataFrame
if 'df_mri_only_cleaned' in locals() and df_mri_only_cleaned is not None:
    print("\n--- Cleaned MRI-Only Data ---")
    print(f"Total number of columns: {df_mri_only_cleaned.shape[1]}")
    nan_counts_cleaned_mri_only = df_mri_only_cleaned.isnull().sum()
    columns_with_nan_cleaned_mri_only = nan_counts_cleaned_mri_only[nan_counts_cleaned_mri_only > 0]
    num_cols_with_nan_cleaned_mri_only = len(columns_with_nan_cleaned_mri_only)
    print(f"Number of columns with NaN values: {num_cols_with_nan_cleaned_mri_only}")
    if num_cols_with_nan_cleaned_mri_only > 0:
        print("\nNaN values per column:")
        print(nan_counts_cleaned_mri_only[nan_counts_cleaned_mri_only > 0])
    else:
        print("\nNo columns with NaN values found.")
else:
    print("\nError: df_mri_only_cleaned not found. Make sure the previous step ran successfully.")

# Check NaN values in the cleaned combined MRI-PET DataFrame
if 'df_mri_pet_cleaned' in locals() and df_mri_pet_cleaned is not None:
    print("\n--- Cleaned Combined MRI-PET Data ---")
    print(f"Total number of columns: {df_mri_pet_cleaned.shape[1]}")
    nan_counts_cleaned_mri_pet = df_mri_pet_cleaned.isnull().sum()
    columns_with_nan_cleaned_mri_pet = nan_counts_cleaned_mri_pet[nan_counts_cleaned_mri_pet > 0]
    num_cols_with_nan_cleaned_mri_pet = len(columns_with_nan_cleaned_mri_pet)
    print(f"Number of columns with NaN values: {num_cols_with_nan_cleaned_mri_pet}")
    if num_cols_with_nan_cleaned_mri_pet > 0:
        print("\nNaN values per column:")
        print(nan_counts_cleaned_mri_pet[nan_counts_cleaned_mri_pet > 0])
    else:
        print("\nNo columns with NaN values found.")
else:
    print("\nError: df_mri_pet_cleaned not found. Make sure the previous step ran successfully.")


--- Cleaned MRI-Only Data ---
Total number of columns: 698
Number of columns with NaN values: 6

NaN values per column:
M921    2
M922    2
M925    2
M926    2
M927    2
M928    2
dtype: int64

--- Cleaned Combined MRI-PET Data ---
Total number of columns: 266
Number of columns with NaN values: 0

No columns with NaN values found.
