In [1]:
import pandas as pd

In [2]:
# import data files
young_train_df = pd.read_csv("young_train_df.csv")
young_test_df = pd.read_csv("young_test_df.csv")
old_train_df = pd.read_csv("old_train_df.csv")
old_test_df = pd.read_csv("old_test_df.csv")

In [3]:
# Assuming young_train_df and old_train_df are already loaded DataFrames

# Function to randomly select a matching row from old_train_df based on constraints
def select_matching_row(row):
    if row['Diagnosis'] == 'CN':
        # Filter old_train_df based on constraints
        filtered_old_df = old_train_df[(old_train_df['Diagnosis'] == 'CN') | (old_train_df['Diagnosis'] == 'MCI') | (old_train_df['Diagnosis'] == 'AD')]
    elif row['Diagnosis'] == 'MCI':
        # Filter old_train_df based on constraints
        filtered_old_df = old_train_df[(old_train_df['Diagnosis'] == 'MCI') | (old_train_df['Diagnosis'] == 'AD')]
    else:
        # Filter old_train_df based on constraints
        filtered_old_df = old_train_df[old_train_df['Diagnosis'] == 'AD']
    
    # Randomly select a row from filtered_old_df
    selected_row = filtered_old_df.sample(n=1)
    
    # Return the MRID, Age, and Diagnosis of the selected row
    return selected_row['MRID'].values[0], selected_row['Age-rounded'].values[0], selected_row['Diagnosis'].values[0]

# Apply the function to each row in young_train_df and expand the returned results into multiple columns
result = young_train_df.apply(select_matching_row, axis=1, result_type='expand')
young_train_df['MRID_pair'], young_train_df['OldAgeRounded'], young_train_df['OldDiagnosis'] = result[0], result[1], result[2]


In [4]:
young_train_df

Unnamed: 0,MRID,Age,Diagnosis,Sex,sth,B,Patient ID,Date,Age-rounded,Age_Group,dataset,MRID_pair,OldAgeRounded,OldDiagnosis
0,023_S_0058_2005-12-12,70.100000,CN,M,23,S,58,2005-12-12,70.0,young,train,024_S_4280_2011-10-18,80.0,AD
1,073_S_0089_2010-10-04,69.773511,CN,M,73,S,89,2010-10-04,70.0,young,train,041_S_6401_2019-05-14,77.0,CN
2,082_S_0304_2006-04-06,70.800000,CN,F,82,S,304,2006-04-06,71.0,young,train,033_S_5017_2012-11-08,84.0,AD
3,023_S_0331_2006-04-12,64.616427,MCI,F,23,S,331,2006-04-12,65.0,young,train,051_S_5005_2012-11-06,78.0,AD
4,021_S_0332_2006-04-19,69.900000,AD,M,21,S,332,2006-04-19,70.0,young,train,023_S_1289_2007-02-20,77.0,AD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,033_S_6572_2019-09-26,68.959548,CN,F,33,S,6572,2019-09-26,69.0,young,train,033_S_5259_2013-07-25,79.0,CN
281,137_S_6685_2019-04-01,70.500000,MCI,M,137,S,6685,2019-04-01,70.0,young,train,116_S_4209_2011-08-29,78.0,AD
282,023_S_6723_2019-04-19,64.000000,MCI,F,23,S,6723,2019-04-19,64.0,young,train,037_S_0150_2014-05-07,82.0,MCI
283,016_S_6802_2019-08-21,63.223340,CN,F,16,S,6802,2019-08-21,63.0,young,train,041_S_4876_2012-08-14,74.0,MCI


In [5]:
old_train_df[old_train_df["MRID"]=="073_S_0746_2012-02-13"]

Unnamed: 0,MRID,Age,Diagnosis,Sex,sth,B,Patient ID,Date,Age-rounded,Age_Group,dataset
40,073_S_0746_2012-02-13,78.851335,MCI,F,73,S,746,2012-02-13,79.0,old,train


In [6]:
import os
import nibabel as nib
import numpy as np
import torch

# Assuming 'young_train_df' is already loaded and contains columns 'MRID' and 'MRID_pair'
# Path to the directory where folders named by MRID are stored
base_image_path = 'adni_data'

# Function to load a NIfTI image from a specific folder
def load_nifti_image(mrid):
    folder_path = os.path.join(base_image_path, mrid, mrid+"_MNI152_registered.nii.gz")
    try:
        # We assume the file follows a naming convention such as 'image.nii.gz'
        image_path = os.path.join(folder_path, 'image.nii.gz')  # Adjust the file name as needed
        image = nib.load(image_path)
        return image
    except FileNotFoundError:
        print(f"No image found for MRID {mrid} in {folder_path}")
        return None
    except Exception as e:
        print(f"An error occurred while loading NIfTI image for MRID {mrid}: {str(e)}")
        return None

# Function to encode the diagnosis based on predefined category mapping
def encode_diagnosis(diagnosis):
    category_mapping = {"CN": [0, 0], "MCI": [0, 1], "AD": [1, 1]}
    # Fetch the list from the mapping or default to [None, None] if diagnosis is not found
    encoded_list = category_mapping.get(diagnosis, [None, None])
    # Convert list to a 2x1 NumPy array
    return np.array(encoded_list).reshape(2, 1)

def age_vector(age):
    vector = np.zeros(100, dtype=int)
    if age < 100:  # If age is 100 or more, the vector will be all 1s
        vector[100-age:] = 1
    else:
        vector[:] = 1
    return vector

# Iterate through each row in the DataFrame
for index, row in young_train_df.iterrows():
    young_image = load_nifti_image(row['MRID'])
    old_image = load_nifti_image(row['MRID_pair'])
    young_age = row["Age-rounded"]
    old_age = row["OldAgeRounded"]
    young_condition = encode_diagnosis(row["Diagnosis"])
    old_condition = encode_diagnosis(row["OldDiagnosis"])

    age_difference = abs(old_age - young_age)  # Calculate absolute age difference

    old_age_vector = age_vector(old_age)  # Encode old age
    age_difference_vector = age_vector(age_difference)  # Encode age difference
    
    # Here you can process or display the images as needed
    # For example, you might visualize the images using matplotlib or another library
    # This is just a placeholder for whatever processing you want to do
    print(f"Loaded NIfTI images for young MRID {row['MRID']} and old MRID {row['MRID_pair']}")

    # Compute the 99.5th percentile intensity value for old_image and young_image
    old_image_percentile = np.percentile(old_image, 99.5)
    young_image_percentile = np.percentile(young_image, 99.5)
    
    # Rescale the intensities of old_image and young_image
    old_image = np.clip(old_image, 0, old_image_percentile)
    young_image = np.clip(young_image, 0, young_image_percentile)

    # Assuming old_image and young_image are torch tensors

    # Normalize pixel values to range [-1, 1]
    old_image_normalized = (old_image / old_image.abs().max()) * 2 - 1
    young_image_normalized = (young_image / young_image.abs().max()) * 2 - 1
    
    # Reshape images to 208x160 using torch
    resize = torch.nn.functional.interpolate
    old_image_reshaped = resize(old_image_normalized.unsqueeze(0).unsqueeze(0), size=(208, 160), mode='bilinear', align_corners=False).squeeze(0).squeeze(0)
    young_image_reshaped = resize(young_image_normalized.unsqueeze(0).unsqueeze(0), size=(208, 160), mode='bilinear', align_corners=False).squeeze(0).squeeze(0)
    
    # Now old_image_reshaped and young_image_reshaped are resized to 208x160 and normalized to range [-1, 1]
    xi = young_image_reshaped
    yo = old_image_reshaped
    ad = age_difference_vector
    ao = old_age_vector
    ho = old_condition

    # Define the file name
    output_file = f"{MRID}_{MRID_pair}.npz"
    print(output_file)
    
    # Save the variables into the .npz file
    # np.savez(output_file, xi=xi, yo=yo, ad=ad, ao=ao, ho=ho)
    
    print("Data saved to", output_file)
    

    


No image found for MRID 023_S_0058_2005-12-12 in adni_data/023_S_0058_2005-12-12/023_S_0058_2005-12-12_MNI152_registered.nii.gz
No image found for MRID 024_S_4280_2011-10-18 in adni_data/024_S_4280_2011-10-18/024_S_4280_2011-10-18_MNI152_registered.nii.gz


TypeError: 'DataFrame' object is not callable