In [1]:
import torch
import torch.nn as nn
import torchio as tio 
import numpy as np
import pandas as pd
import nibabel as nib
import json

import os

# global settings
base_path = '/mnt/data_lab513/vqtran_data'
root_data = os.path.join(base_path, "data", "raw_data", "ADNI_NIfTI")
root_bias_correction = os.path.join(base_path, "data", "clean_data", "mri_bias_correction")
root_bet = os.path.join(base_path, "data", "clean_data", "mri_brain_extraction")
root_reg = os.path.join(base_path, "data", "clean_data", "mri_registration")
root_meta = os.path.join(base_path, "data", "meta_data")#, "Pre-Thesis_metadata", "ADNI") 
root_train = os.path.join(base_path, "data", "train_data")
root_train_dec = os.path.join(base_path, "data", "data_train_dec", "origin")
root_train_unique = os.path.join(base_path, "data", "data_train_dec", "unique")
root_train_unique_tensor = os.path.join(base_path, "data", "data_train_dec", "tensor")



# Work

0. Read data in (pandas/ json + nib filename)
1. Compute mean
2. Compute root mean square distance
3. Save data as a dataframe in unique_subject_with_prediction_mean_filter.csv
4. Cut off image have high root mean square distance
4. Create torch tensor for dataset


# 1. Read data in & compute_mean

In [2]:
subject_dict = json.load(open('../investigate/unique_dataset_dict.json', 'r'))

mean_image_CN = torch.zeros(110, 110, 110)
count_image_CN = 0

mean_image_EMCI = torch.zeros(110, 110, 110)
count_image_EMCI = 0

mean_image_LMCI = torch.zeros(110, 110, 110)
count_image_LMCI = 0
# print(mean_image_CN.dtype) #torch.float32
mean_image_AD = torch.zeros(110, 110, 110)
count_image_AD = 0


for key in subject_dict.keys():
    filename = subject_dict[key][2]
    image_absolute_path = os.path.join(root_train_unique, filename)
    # print(filename)
    label = subject_dict[key][1]
    # print(label)
    
    if label == "CN" or label == "AD" or label == "EMCI" or label == "LMCI":
        image_sample = nib.load(image_absolute_path)
        image_array = image_sample.get_fdata()
        
        image_tensor = torch.Tensor(image_array)
        image_tensor = torch.unsqueeze(image_tensor,0)
        
        image_transformation_tio = tio.transforms.Compose(
                [
                    tio.transforms.Resize((110,110,110)),
                    tio.ZNormalization(),
                    tio.RescaleIntensity(out_min_max=(0, 1)) #), in_min_max=(0., 8957.8574))
                ]
        )

        image_tensor = image_transformation_tio(image_tensor)
        
        # print(image_tensor.shape)
        
        if label == "CN":
            mean_image_CN = torch.add(mean_image_CN, image_tensor)
            count_image_CN += 1
        elif label == "EMCI":
            mean_image_EMCI = torch.add(mean_image_EMCI, image_tensor)
            count_image_EMCI += 1
        elif label == "LMCI":
            mean_image_LMCI = torch.add(mean_image_LMCI, image_tensor)
            count_image_LMCI += 1
        elif label == "AD":
            mean_image_AD = torch.add(mean_image_AD, image_tensor)
            count_image_AD += 1
        else:
            raise ValueError("label must be CN or EMCI or AD")  
    else:
        continue
        
    # print(label)
    # print(mean_image_CN.mean())  
    # print(mean_image_AD.mean())  
    # print(image_tensor.mean()) 
    # print(count_image_CN)
    # print(count_image_AD)
    # break
    
    
print(mean_image_CN.mean())
print(mean_image_EMCI.mean())
print(mean_image_LMCI.mean())  
print(mean_image_AD.mean())  
print(mean_image_CN.max()) 
print(mean_image_EMCI.max())   
print(mean_image_LMCI.max())  
print(mean_image_AD.max()) 
print(mean_image_CN.min()) 
print(mean_image_EMCI.min())  
print(mean_image_LMCI.min())   
print(mean_image_AD.min()) 
print(count_image_CN)
print(count_image_EMCI)  
print(count_image_LMCI)
print(count_image_AD)

mean_image_CN /= count_image_CN
mean_image_EMCI /= count_image_EMCI
mean_image_LMCI /= count_image_LMCI
mean_image_AD /= count_image_AD

print(mean_image_CN.mean())
print(mean_image_EMCI.mean())
print(mean_image_LMCI.mean())  
print(mean_image_AD.mean())  
print(mean_image_CN.max()) 
print(mean_image_EMCI.max())   
print(mean_image_LMCI.max())  
print(mean_image_AD.max()) 
print(mean_image_CN.min()) 
print(mean_image_EMCI.min())  
print(mean_image_LMCI.min())   
print(mean_image_AD.min()) 


tensor(45.3884)
tensor(27.1659)
tensor(16.3325)
tensor(35.3058)
tensor(274.1535)
tensor(170.8169)
tensor(101.4349)
tensor(216.0457)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
349
238
139
278
tensor(0.1301)
tensor(0.1141)
tensor(0.1175)
tensor(0.1270)
tensor(0.7855)
tensor(0.7177)
tensor(0.7297)
tensor(0.7771)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)


In [3]:
torch.save(mean_image_CN, "mean_image_CN.pt")
torch.save(mean_image_EMCI, "mean_image_EMCI.pt")
torch.save(mean_image_LMCI, "mean_image_LMCI.pt")
torch.save(mean_image_AD, "mean_image_AD.pt")

# 2. Calcualte Mean Square Distance between each image and data

In [7]:
subject_dict_with_filter_distance = {}
loss = nn.MSELoss()
    
for key in subject_dict.keys():
    global mean_square_distance
    mean_square_distance = {}
    
    # for 3 class NC-EMCI-AD problem # need to change for generalization
    if (subject_dict[key][1] == "CN") or (subject_dict[key][1] == "EMCI") or (subject_dict[key][1] == "AD"):
        # print(key)
        image_absolute_path = os.path.join(root_train_unique, subject_dict[key][2])
        # print(image_absolute_path)
        
        label = subject_dict[key][1]
        # print(label)
        
        
        image_sample = nib.load(image_absolute_path)
        image_array = image_sample.get_fdata()
            
        image_tensor = torch.Tensor(image_array)
        image_tensor = torch.unsqueeze(image_tensor,0)
        
        image_transformation_tio = tio.transforms.Compose(
                [
                    tio.transforms.Resize((110,110,110)),
                    tio.ZNormalization(),
                    tio.RescaleIntensity(out_min_max=(0, 1)) #), in_min_max=(0., 8957.8574))
                ]
        )

        image_tensor = image_transformation_tio(image_tensor)
    
        distance_CN = loss(image_tensor, mean_image_CN)
        distance_EMCI = loss(image_tensor, mean_image_EMCI)
        distance_AD = loss(image_tensor, mean_image_AD)
        
        
        # print(distance_CN)
        # print(distance_EMCI)
        # print(distance_AD)
        # print(distance_CN.shape)
        # print(distance_EMCI.Shape)
        # print(distance_AD.shape)
        # print(distance_CN.dtype)
        # print(distance_EMCI.dtype)
        # print(distance_AD.dtype)
        
        mean_square_distance = {"DISTANCE CN": distance_CN.item(), "DISTANCE EMCI": distance_EMCI.item()  ,"DISTANCE AD": distance_AD.item()}

        subject_dict_with_filter_distance[key] = {"Subject ID": key,
                                                "Image ID": subject_dict[key][0],
                                                "Image Path": image_absolute_path, 
                                                "Image Target": subject_dict[key][1], 
                                            **mean_square_distance}
        
        
        
    # print(subject_dict_with_filter_distance)
    # print(mean_square_distance)
    
print(len(subject_dict_with_filter_distance.keys()))
# print(mean_square_distance)

865


In [8]:
filter_distance_dataframe = pd.DataFrame()
for key in subject_dict_with_filter_distance.keys():
    # print(subject_dict_with_filter_distance[key])     
    filter_distance_dataframe = filter_distance_dataframe.append(subject_dict_with_filter_distance[key], ignore_index = True)

        
filter_distance_dataframe.to_csv("../investigate/unique_subject_with_filter_distance_CN_EMCI_AD.csv", index=False)

In [9]:
filter_distance_dataframe = pd.read_csv("../investigate/unique_subject_with_filter_distance_CN_EMCI_AD.csv")
filter_distance_dataframe.head(30)

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD
0,002_S_0295,I13722,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.003989,0.008199,0.004507
1,002_S_0413,I14437,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007575,0.013994,0.008511
2,002_S_0559,I15948,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.005903,0.002665,0.005204
3,002_S_0619,I16392,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.012839,0.007077,0.011432
4,002_S_0685,I18211,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.006689,0.012382,0.007408
5,005_S_0814,I23573,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004964,0.00803,0.005108
6,005_S_0929,I25645,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006088,0.010617,0.006548
7,005_S_1341,I43188,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004963,0.009204,0.005419
8,006_S_0484,I17377,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007942,0.014387,0.008971
9,006_S_0498,I17505,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.002531,0.004697,0.002767


In [10]:
intra_inter_distance_dataframe = 0
intra_class_distance = []
inter_class_distance_CN_EMCI = []
inter_class_distance_CN_AD = []
inter_class_distance_EMCI_AD = []

for ii in range(len(filter_distance_dataframe)):
    subject_id = filter_distance_dataframe.loc[ii, "Subject ID"]
    label = filter_distance_dataframe.loc[ii, "Image Target"]
    
    distance_CN = filter_distance_dataframe.loc[ii, "DISTANCE CN"] 
    distance_EMCI = filter_distance_dataframe.loc[ii, "DISTANCE EMCI"] 
    distance_AD = filter_distance_dataframe.loc[ii, "DISTANCE AD"]
    
    if label == "AD" or label == "CN" or label == "EMCI":
        if label == "CN":
            intra_class_distance.append(distance_CN)
            inter_class_distance_CN_EMCI.append(distance_EMCI - distance_CN) # IMPORTANCE: 2.5 is the importance factor of how we want this class to be separate from other classes 
            inter_class_distance_CN_AD.append(distance_AD - distance_CN)
            inter_class_distance_EMCI_AD.append((distance_EMCI - distance_CN)+(distance_AD - distance_CN))

        elif label == "EMCI": #KMean, GMM
            intra_class_distance.append(distance_EMCI)
            inter_class_distance_CN_EMCI.append(distance_CN - distance_EMCI)
            inter_class_distance_CN_AD.append((distance_CN - distance_EMCI)+(distance_AD - distance_EMCI))
            inter_class_distance_EMCI_AD.append(distance_AD - distance_EMCI)
            
            
        elif label == "AD":
            intra_class_distance.append(distance_AD)
            inter_class_distance_CN_EMCI.append((distance_CN - distance_AD)+(distance_EMCI - distance_AD))
            inter_class_distance_CN_AD.append(distance_CN - distance_AD )
            inter_class_distance_EMCI_AD.append(distance_EMCI - distance_AD)
            
            
        
        else:
            raise ValueError("This dataframe only allow 3 labels: AD, EMCI and CN")
    else:
        raise ValueError("This dataframe only allow 3 labels: AD, EMCI and CN")
    # print(filter_distance_dataframe.loc[0,:].to_frame().T)
    
    
intra_inter_distance_dataframe = filter_distance_dataframe.assign(INTRA_CLASS_DISTANCE=pd.Series(np.array(intra_class_distance)).values)
intra_inter_distance_dataframe = intra_inter_distance_dataframe.assign(INTER_CLASS_DISTANCE_CN_EMCI=pd.Series(np.array(inter_class_distance_CN_EMCI)).values)
intra_inter_distance_dataframe = intra_inter_distance_dataframe.assign(INTER_CLASS_DISTANCE_CN_AD=pd.Series(np.array(inter_class_distance_CN_AD)).values)
intra_inter_distance_dataframe = intra_inter_distance_dataframe.assign(INTER_CLASS_DISTANCE_EMCI_AD=pd.Series(np.array(inter_class_distance_EMCI_AD)).values)
intra_inter_distance_dataframe.head(10) 

# # positive_inter_class_distance_CN_dataframe['newcol'] = positive_inter_class_distance_CN_dataframe.apply(lambda row: row['firstcolval'] * row['secondcolval'], axis=1)

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,002_S_0295,I13722,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.003989,0.008199,0.004507,0.003989,0.00421,0.000518,0.004729
1,002_S_0413,I14437,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007575,0.013994,0.008511,0.007575,0.006419,0.000935,0.007354
2,002_S_0559,I15948,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.005903,0.002665,0.005204,0.005903,-0.003238,-0.0007,-0.003938
3,002_S_0619,I16392,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.012839,0.007077,0.011432,0.011432,-0.002949,0.001407,-0.004355
4,002_S_0685,I18211,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.006689,0.012382,0.007408,0.006689,0.005692,0.000718,0.006411
5,005_S_0814,I23573,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004964,0.00803,0.005108,0.005108,0.002778,-0.000144,0.002922
6,005_S_0929,I25645,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006088,0.010617,0.006548,0.006548,0.003609,-0.00046,0.004069
7,005_S_1341,I43188,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004963,0.009204,0.005419,0.005419,0.00333,-0.000456,0.003785
8,006_S_0484,I17377,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007942,0.014387,0.008971,0.007942,0.006444,0.001029,0.007473
9,006_S_0498,I17505,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.002531,0.004697,0.002767,0.002531,0.002166,0.000236,0.002402


# 4. Cut off image have high root mean square distance

In [11]:
# positive_inter_class_distance_filter_CN_EMCI = intra_inter_distance_dataframe["INTER_CLASS_DISTANCE_CN_EMCI"] > -0.001
# positive_inter_class_distance_filter_CN_AD = intra_inter_distance_dataframe["INTER_CLASS_DISTANCE_CN_AD"] > -0.001
# positive_inter_class_distance_filter_EMCI_AD = intra_inter_distance_dataframe["INTER_CLASS_DISTANCE_EMCI_AD"] > -0.001

In [12]:
# positive_inter_class_distance_dataframe = intra_inter_distance_dataframe[positive_inter_class_distance_filter_CN_EMCI & positive_inter_class_distance_filter_CN_AD & positive_inter_class_distance_filter_EMCI_AD]
# positive_inter_class_distance_dataframe = intra_inter_distance_dataframe[positive_inter_class_distance_filter_CN_EMCI]
all_inter_class_distance_dataframe = intra_inter_distance_dataframe.copy()
print(len(all_inter_class_distance_dataframe))
all_inter_class_distance_dataframe.head(10)

865


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,002_S_0295,I13722,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.003989,0.008199,0.004507,0.003989,0.00421,0.000518,0.004729
1,002_S_0413,I14437,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007575,0.013994,0.008511,0.007575,0.006419,0.000935,0.007354
2,002_S_0559,I15948,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.005903,0.002665,0.005204,0.005903,-0.003238,-0.0007,-0.003938
3,002_S_0619,I16392,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.012839,0.007077,0.011432,0.011432,-0.002949,0.001407,-0.004355
4,002_S_0685,I18211,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.006689,0.012382,0.007408,0.006689,0.005692,0.000718,0.006411
5,005_S_0814,I23573,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004964,0.00803,0.005108,0.005108,0.002778,-0.000144,0.002922
6,005_S_0929,I25645,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006088,0.010617,0.006548,0.006548,0.003609,-0.00046,0.004069
7,005_S_1341,I43188,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004963,0.009204,0.005419,0.005419,0.00333,-0.000456,0.003785
8,006_S_0484,I17377,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007942,0.014387,0.008971,0.007942,0.006444,0.001029,0.007473
9,006_S_0498,I17505,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.002531,0.004697,0.002767,0.002531,0.002166,0.000236,0.002402


In [13]:
# Class distribution in positive_inter_class_distance_dataframe
print(all_inter_class_distance_dataframe["Image Target"].value_counts()) #CD 211, AD 148

CN      349
AD      278
EMCI    238
Name: Image Target, dtype: int64


In [14]:
CN_filter = all_inter_class_distance_dataframe["Image Target"] == "CN"
EMCI_filter = all_inter_class_distance_dataframe["Image Target"] == "EMCI"
AD_filter = all_inter_class_distance_dataframe["Image Target"] == "AD"

all_inter_class_distance_CN_dataframe = all_inter_class_distance_dataframe[CN_filter]
all_inter_class_distance_EMCI_dataframe = all_inter_class_distance_dataframe[EMCI_filter]
all_inter_class_distance_AD_dataframe = all_inter_class_distance_dataframe[AD_filter]

print(len(all_inter_class_distance_CN_dataframe))
all_inter_class_distance_CN_dataframe.head()

# positive_inter_class_distance_EMCI_dataframe.head()
# positive_inter_class_distance_AD_dataframe.head()

349


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,002_S_0295,I13722,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.003989,0.008199,0.004507,0.003989,0.00421,0.000518,0.004729
1,002_S_0413,I14437,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007575,0.013994,0.008511,0.007575,0.006419,0.000935,0.007354
2,002_S_0559,I15948,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.005903,0.002665,0.005204,0.005903,-0.003238,-0.0007,-0.003938
4,002_S_0685,I18211,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.006689,0.012382,0.007408,0.006689,0.005692,0.000718,0.006411
8,006_S_0484,I17377,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007942,0.014387,0.008971,0.007942,0.006444,0.001029,0.007473


In [15]:
print(len(all_inter_class_distance_EMCI_dataframe))
all_inter_class_distance_EMCI_dataframe.head()


238


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
14,009_S_4958,I338115,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.007011,0.003206,0.006109,0.003206,0.003806,0.00671,0.002904
15,009_S_5000,I342850,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.00496,0.003604,0.004397,0.003604,0.001357,0.00215,0.000794
20,073_S_4825,I312909,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.004983,0.009111,0.005708,0.009111,-0.004128,-0.007531,-0.003403
22,073_S_4986,I338196,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.024165,0.015493,0.02266,0.015493,0.008672,0.015838,0.007166
25,002_S_4447,I278815,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.003232,0.001451,0.002941,0.001451,0.001781,0.00327,0.00149


In [16]:
print(len(all_inter_class_distance_AD_dataframe))
all_inter_class_distance_AD_dataframe.head()


278


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
3,002_S_0619,I16392,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.012839,0.007077,0.011432,0.011432,-0.002949,0.001407,-0.004355
5,005_S_0814,I23573,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004964,0.00803,0.005108,0.005108,0.002778,-0.000144,0.002922
6,005_S_0929,I25645,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006088,0.010617,0.006548,0.006548,0.003609,-0.00046,0.004069
7,005_S_1341,I43188,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004963,0.009204,0.005419,0.005419,0.00333,-0.000456,0.003785
10,006_S_0547,I17794,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.00667,0.012383,0.007458,0.007458,0.004138,-0.000788,0.004926


Filter positive for CN class

In [17]:
positive_inter_class_distance_filter_CN_EMCI_for_CN = all_inter_class_distance_CN_dataframe["INTER_CLASS_DISTANCE_CN_EMCI"] > 0
positive_inter_class_distance_filter_CN_AD_for_CN = all_inter_class_distance_CN_dataframe["INTER_CLASS_DISTANCE_CN_AD"] > 0
# positive_inter_class_distance_filter_EMCI_AD_for_CN = all_inter_class_distance_CN_dataframe["INTER_CLASS_DISTANCE_EMCI_AD"] > 0
positive_inter_class_distance_CN_dataframe = all_inter_class_distance_CN_dataframe[positive_inter_class_distance_filter_CN_EMCI_for_CN & positive_inter_class_distance_filter_CN_AD_for_CN]
print(len(positive_inter_class_distance_CN_dataframe))

211


Sort values of CN class base on INTER_CLASS_DISTANCE_EMCI_AD OR INTRA_CLASS_DISTANCE

In [18]:
positive_inter_class_distance_CN_sorted_dataframe = positive_inter_class_distance_CN_dataframe.sort_values(by=['INTER_CLASS_DISTANCE_EMCI_AD'], ascending = False)
# positive_inter_class_distance_CN_sorted_dataframe = positive_inter_class_distance_CN_dataframe.sort_values(by=['INTRA_CLASS_DISTANCE'], ascending = True)
positive_inter_class_distance_CN_sorted_dataframe.head(10)


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
310,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
211,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
130,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
569,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
79,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756
61,033_S_0741,I19258,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008451,0.015023,0.009439,0.008451,0.006571,0.000988,0.00756
730,057_S_0818,I23927,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007724,0.014211,0.008776,0.007724,0.006487,0.001052,0.007539
584,073_S_0089,I10365,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007788,0.014242,0.008861,0.007788,0.006455,0.001073,0.007528
8,006_S_0484,I17377,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.007942,0.014387,0.008971,0.007942,0.006444,0.001029,0.007473
236,127_S_0684,I18896,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008013,0.014429,0.008992,0.008013,0.006416,0.000979,0.007395


Filter positive for EMCI class

In [19]:
positive_inter_class_distance_filter_CN_EMCI_for_EMCI = all_inter_class_distance_EMCI_dataframe["INTER_CLASS_DISTANCE_CN_EMCI"] > 0
# positive_inter_class_distance_filter_CN_AD_for_EMCI = all_inter_class_distance_EMCI_dataframe["INTER_CLASS_DISTANCE_CN_AD"] > 0
positive_inter_class_distance_filter_EMCI_AD_for_EMCI = all_inter_class_distance_EMCI_dataframe["INTER_CLASS_DISTANCE_EMCI_AD"] > 0
positive_inter_class_distance_EMCI_dataframe = all_inter_class_distance_EMCI_dataframe[positive_inter_class_distance_filter_CN_EMCI_for_EMCI & positive_inter_class_distance_filter_EMCI_AD_for_EMCI]
print(len(positive_inter_class_distance_EMCI_dataframe))

150


Sort values of EMCI class base on INTER_CLASS_DISTANCE_CN_AD OR INTRA_CLASS_DISTANCE

In [20]:
positive_inter_class_distance_EMCI_sorted_dataframe = positive_inter_class_distance_EMCI_dataframe.sort_values(by=['INTER_CLASS_DISTANCE_CN_AD'], ascending = False)
# positive_inter_class_distance_EMCI_sorted_dataframe = positive_inter_class_distance_EMCI_dataframe.sort_values(by=['INTRA_CLASS_DISTANCE'], ascending = True)
positive_inter_class_distance_EMCI_sorted_dataframe.head(10)


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
813,099_S_4022,I228827,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.029031,0.01885,0.027229,0.01885,0.010181,0.01856,0.008378
22,073_S_4986,I338196,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.024165,0.015493,0.02266,0.015493,0.008672,0.015838,0.007166
392,941_S_2060,I262653,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.024288,0.015818,0.022778,0.015818,0.00847,0.01543,0.00696
582,072_S_4871,I322971,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.026086,0.017756,0.024577,0.017756,0.00833,0.015151,0.006821
638,014_S_4068,I239633,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.022422,0.014498,0.021013,0.014498,0.007925,0.01444,0.006515
656,099_S_4480,I285329,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.017206,0.009353,0.015828,0.009353,0.007854,0.014329,0.006475
356,128_S_2220,I279601,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.022817,0.015093,0.021427,0.015093,0.007724,0.014059,0.006334
253,072_S_4610,I294881,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.020875,0.01326,0.019512,0.01326,0.007615,0.013867,0.006252
171,068_S_2187,I274620,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.016221,0.008647,0.014898,0.008647,0.007574,0.013825,0.006251
577,072_S_2093,I262039,/mnt/data_lab513/vqtran_data/data/data_train_d...,EMCI,0.015902,0.008443,0.014567,0.008443,0.007459,0.013583,0.006124


Filter positive for AD class

In [21]:
# positive_inter_class_distance_filter_CN_EMCI_for_AD = all_inter_class_distance_AD_dataframe["INTER_CLASS_DISTANCE_CN_EMCI"] > 0
positive_inter_class_distance_filter_CN_AD_for_AD = all_inter_class_distance_AD_dataframe["INTER_CLASS_DISTANCE_CN_AD"] > -0.0004
positive_inter_class_distance_filter_EMCI_AD_for_AD = all_inter_class_distance_AD_dataframe["INTER_CLASS_DISTANCE_EMCI_AD"] > 0
positive_inter_class_distance_AD_dataframe = all_inter_class_distance_AD_dataframe[positive_inter_class_distance_filter_CN_AD_for_AD & positive_inter_class_distance_filter_EMCI_AD_for_AD]
print(len(positive_inter_class_distance_AD_dataframe))

109


Sort values of AD class base on INTER_CLASS_DISTANCE_CN_AD OR INTRA_CLASS_DISTANCE

In [22]:
positive_inter_class_distance_AD_sorted_dataframe = positive_inter_class_distance_AD_dataframe.sort_values(by=['INTER_CLASS_DISTANCE_EMCI_AD'], ascending = False)
# positive_inter_class_distance_AD_sorted_dataframe = positive_inter_class_distance_AD_dataframe.sort_values(by=['INTRA_CLASS_DISTANCE'], ascending = True)
positive_inter_class_distance_AD_sorted_dataframe.head(10)


Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
294,011_S_0053,I8669,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.008338,0.013512,0.008686,0.008686,0.004479,-0.000347,0.004826
295,011_S_0183,I11673,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.007511,0.012129,0.00765,0.00765,0.004341,-0.000139,0.004479
95,099_S_0372,I14077,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006439,0.010762,0.006728,0.006728,0.003744,-0.00029,0.004033
188,057_S_0474,I14584,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.005188,0.009567,0.005576,0.005576,0.003604,-0.000387,0.003991
374,136_S_0299,I14339,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.00821,0.012309,0.008399,0.008399,0.003722,-0.000188,0.00391
533,131_S_0691,I19658,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006874,0.010793,0.006913,0.006913,0.00384,-3.9e-05,0.00388
757,013_S_1161,I33986,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.005777,0.010007,0.006134,0.006134,0.003517,-0.000357,0.003873
418,013_S_0996,I29147,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.005866,0.010115,0.006243,0.006243,0.003496,-0.000376,0.003873
289,127_S_0431,I15497,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006003,0.010093,0.006263,0.006263,0.00357,-0.00026,0.00383
531,131_S_0457,I14561,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.005322,0.009449,0.005688,0.005688,0.003395,-0.000366,0.003761


In [23]:
positive_inter_class_distance_AD_dataframe.head(30)
print(np.mean(positive_inter_class_distance_AD_dataframe["INTER_CLASS_DISTANCE_EMCI_AD"]))
print(np.mean(positive_inter_class_distance_AD_dataframe["INTER_CLASS_DISTANCE_CN_AD"]))

0.0020796649078966656
-1.552857569666979e-05


devide filtered dataframe to CN dataframe, EMCI dataframe and AD dataframe -> positive distance inter class filter in each dataframe -> sort sum distance to other classes of 3 dataframe CN, EMCI and AD (ascending order = False ) -> choose the first 100 subject in each dataframe CN, EMCI and AD -> combine 3 dataframe CN, EMCI and AD -> pytorch tensor

# CONCAT DATAFRAME OF 3 CLASS

CN_EMCI_AD problem

In [24]:
frames_to_concate_CN_EMCI_AD = [positive_inter_class_distance_CN_sorted_dataframe.head(100), positive_inter_class_distance_EMCI_sorted_dataframe.head(100), positive_inter_class_distance_AD_sorted_dataframe.head(100)]  # Or perform operations on the DFs
positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD = pd.concat(frames_to_concate_CN_EMCI_AD)

positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
310,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
211,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
130,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
569,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
79,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


In [25]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD))

300


In [26]:
positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD = positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.reset_index(drop=True)
positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
1,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
2,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
3,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
4,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


CN_AD problem

In [27]:
frames_to_concate_CN_AD = [positive_inter_class_distance_CN_sorted_dataframe.head(100), positive_inter_class_distance_AD_sorted_dataframe.head(100)]  # Or perform operations on the DFs
positive_inter_class_distance_sorted_dataframe_CN_AD = pd.concat(frames_to_concate_CN_AD)

positive_inter_class_distance_sorted_dataframe_CN_AD.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
310,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
211,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
130,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
569,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
79,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


In [28]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_AD))

200


In [29]:
positive_inter_class_distance_sorted_dataframe_CN_AD = positive_inter_class_distance_sorted_dataframe_CN_AD.reset_index(drop=True)
positive_inter_class_distance_sorted_dataframe_CN_AD.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
1,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
2,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
3,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
4,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


CN_EMCI problem

In [30]:
frames_to_concate_CN_EMCI = [positive_inter_class_distance_CN_sorted_dataframe.head(100), positive_inter_class_distance_EMCI_sorted_dataframe.head(100)]  # Or perform operations on the DFs
positive_inter_class_distance_sorted_dataframe_CN_EMCI = pd.concat(frames_to_concate_CN_EMCI)

positive_inter_class_distance_sorted_dataframe_CN_EMCI.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
310,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
211,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
130,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
569,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
79,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


In [31]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI))

200


In [32]:
positive_inter_class_distance_sorted_dataframe_CN_EMCI = positive_inter_class_distance_sorted_dataframe_CN_EMCI.reset_index(drop=True)
positive_inter_class_distance_sorted_dataframe_CN_EMCI.head()

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
0,062_S_1099,I30171,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008518,0.015217,0.009556,0.008518,0.006699,0.001038,0.007737
1,005_S_0602,I17681,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008672,0.015349,0.009723,0.008672,0.006677,0.00105,0.007727
2,051_S_1123,I33309,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008669,0.015305,0.009691,0.008669,0.006636,0.001022,0.007658
3,011_S_0021,I7679,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008052,0.014634,0.009088,0.008052,0.006582,0.001035,0.007617
4,133_S_0488,I14838,/mnt/data_lab513/vqtran_data/data/data_train_d...,CN,0.008088,0.014588,0.009147,0.008088,0.006501,0.001059,0.00756


output csv files

In [33]:
positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.to_csv("../investigate/positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.csv", index=False)
positive_inter_class_distance_sorted_dataframe_CN_AD.to_csv("../investigate/positive_inter_class_distance_sorted_dataframe_CN_AD.csv", index=False)
positive_inter_class_distance_sorted_dataframe_CN_EMCI.to_csv("../investigate/positive_inter_class_distance_sorted_dataframe_CN_EMCI.csv", index=False)

# 4. Get tensor from dataframe

4.1. CN_EMCI_AD problem

In [30]:
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.columns)

Index(['Subject ID', 'Image ID', 'Image Path', 'Image Target', 'DISTANCE CN',
       'DISTANCE EMCI', 'DISTANCE AD', 'INTRA_CLASS_DISTANCE',
       'INTER_CLASS_DISTANCE_CN_EMCI', 'INTER_CLASS_DISTANCE_CN_AD',
       'INTER_CLASS_DISTANCE_EMCI_AD'],
      dtype='object')


In [31]:
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.loc[0, 'Image Path'])

/mnt/data_lab513/vqtran_data/data/data_train_dec/unique/ADNI_062_S_1099_MR_MPRAGE_br_raw_20061120153545673_98_S22713_I30171.nii.gz


In [32]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD["Subject ID"].unique()))
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD["Image Target"].value_counts())

300
CN      100
EMCI    100
AD      100
Name: Image Target, dtype: int64


In [33]:
# print(subject_dict)
X_tensor_cross_val = []
Y_tensor_cross_val = []


for ii in range(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD)):
    subject_id = positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.loc[ii, "Subject ID"]
    label = positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.loc[ii, "Image Target"]
    
    # print(subject_id, label)
    image_absolute_path = positive_inter_class_distance_sorted_dataframe_CN_EMCI_AD.loc[ii, 'Image Path']
    
    np_label = -1
    if label == "CN" or label == "EMCI" or label == "AD":
        if label == "CN":
            np_label = np.array([0])
        elif label == "EMCI":
            np_label = np.array([1])
        elif label == "AD":
            np_label = np.array([2])
        else:
            raise ValueError("label must be CN, EMCI, or AD")  
    else:
        continue
        # raise ValueError("label can not be EMCI or LMCI")
        
    image_sample = nib.load(image_absolute_path)
    image_array = image_sample.get_fdata()
    
    image_tensor = torch.Tensor(image_array)
    image_tensor = torch.unsqueeze(image_tensor,0)
    
    image_transformation_tio = tio.transforms.Compose(
            [
                tio.transforms.Resize((110,110,110)),
                tio.ZNormalization(),
                tio.RescaleIntensity(out_min_max=(0, 1)) #), in_min_max=(0., 8957.8574))
            ]
    )

    image_tensor = image_transformation_tio(image_tensor)

    # print(image_tensor.shape)    
    label_tensor = torch.Tensor(np_label)

    X_tensor_cross_val.append(image_tensor)
    Y_tensor_cross_val.append(label_tensor)
    
X_tensor_cross_val = torch.stack(X_tensor_cross_val)
Y_tensor_cross_val = torch.stack(Y_tensor_cross_val)
    
Y_tensor_cross_val = Y_tensor_cross_val.ravel()
print(X_tensor_cross_val.shape)
print(Y_tensor_cross_val.shape)

torch.Size([300, 1, 110, 110, 110])
torch.Size([300])


In [34]:
torch.save(X_tensor_cross_val, os.path.join(root_train_unique_tensor, "x_tensor_NC_EMCI_AD_cv_data_filter.pt"))
torch.save(Y_tensor_cross_val, os.path.join(root_train_unique_tensor, "y_tensor_NC_EMCI_AD_cv_data_filter.pt"))

In [35]:
tensor_mean = torch.mean(X_tensor_cross_val) #tensor(86.5368) #tensor(0.1273)
tensor_std = torch.std(X_tensor_cross_val) #tensor(258.8274) #tensor(0.2380)
tensor_max = torch.max(X_tensor_cross_val) #tensor(8957.8574) #tensor(1.)
tensor_min = torch.min(X_tensor_cross_val) #tensor(0.)  #tensor(0.)
tensor_unique, tensor_count = torch.unique(Y_tensor_cross_val, return_counts=True)

# tensor_unique # tensor([0., 1.])
# tensor_count # tensor([100, 100])

In [36]:
print(tensor_mean)
print(tensor_std)
print(tensor_max)
print(tensor_min)
print(tensor_unique)
print(tensor_count)

tensor(0.1277)
tensor(0.2443)
tensor(1.)
tensor(0.)
tensor([0., 1., 2.])
tensor([100, 100, 100])


4.2. CN_AD problem

In [49]:
print(positive_inter_class_distance_sorted_dataframe_CN_AD.columns)

Index(['Subject ID', 'Image ID', 'Image Path', 'Image Target', 'DISTANCE CN',
       'DISTANCE EMCI', 'DISTANCE AD', 'INTRA_CLASS_DISTANCE',
       'INTER_CLASS_DISTANCE_CN_EMCI', 'INTER_CLASS_DISTANCE_CN_AD',
       'INTER_CLASS_DISTANCE_EMCI_AD'],
      dtype='object')


In [50]:
print(positive_inter_class_distance_sorted_dataframe_CN_AD.loc[0, 'Image Path'])

/mnt/data_lab513/vqtran_data/data/data_train_dec/unique/ADNI_062_S_1099_MR_MPRAGE_br_raw_20061120153545673_98_S22713_I30171.nii.gz


In [51]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_AD["Subject ID"].unique()))
print(positive_inter_class_distance_sorted_dataframe_CN_AD["Image Target"].value_counts())

200
CN    100
AD    100
Name: Image Target, dtype: int64


In [56]:
# print(subject_dict)
X_tensor_cross_val = []
Y_tensor_cross_val = []


for ii in range(len(positive_inter_class_distance_sorted_dataframe_CN_AD)):
    subject_id = positive_inter_class_distance_sorted_dataframe_CN_AD.loc[ii, "Subject ID"]
    label = positive_inter_class_distance_sorted_dataframe_CN_AD.loc[ii, "Image Target"]
    
    # print(subject_id, label)
    image_absolute_path = positive_inter_class_distance_sorted_dataframe_CN_AD.loc[ii, 'Image Path']
    
    np_label = -1
    if label == "CN"  or label == "AD":
        if label == "CN":
            np_label = np.array([0])
        elif label == "AD":
            np_label = np.array([1])
        else:
            raise ValueError("label must be CN, or AD")  
    else:
        continue
        # raise ValueError("label can not be EMCI or LMCI")
        
    image_sample = nib.load(image_absolute_path)
    image_array = image_sample.get_fdata()
    
    image_tensor = torch.Tensor(image_array)
    image_tensor = torch.unsqueeze(image_tensor,0)
    
    image_transformation_tio = tio.transforms.Compose(
            [
                tio.transforms.Resize((110,110,110)),
                tio.ZNormalization(),
                tio.RescaleIntensity(out_min_max=(0, 1)) #), in_min_max=(0., 8957.8574))
            ]
    )

    image_tensor = image_transformation_tio(image_tensor)

    # print(image_tensor.shape)    
    label_tensor = torch.Tensor(np_label)

    X_tensor_cross_val.append(image_tensor)
    Y_tensor_cross_val.append(label_tensor)
    
X_tensor_cross_val = torch.stack(X_tensor_cross_val)
Y_tensor_cross_val = torch.stack(Y_tensor_cross_val)
    
Y_tensor_cross_val = Y_tensor_cross_val.ravel()
print(X_tensor_cross_val.shape)
print(Y_tensor_cross_val.shape)

torch.Size([200, 1, 110, 110, 110])
torch.Size([200])


In [57]:
torch.save(X_tensor_cross_val, os.path.join(root_train_unique_tensor, "x_tensor_NC_AD_cv_data_filter_three_classes.pt"))
torch.save(Y_tensor_cross_val, os.path.join(root_train_unique_tensor, "y_tensor_NC_AD_cv_data_filter_three_classes.pt"))

In [58]:
tensor_mean = torch.mean(X_tensor_cross_val) #tensor(0.1461)
tensor_std = torch.std(X_tensor_cross_val) #tensor(0.2721)
tensor_max = torch.max(X_tensor_cross_val) ##tensor(1.)
tensor_min = torch.min(X_tensor_cross_val) #tensor(0.)
tensor_unique, tensor_count = torch.unique(Y_tensor_cross_val, return_counts=True)

# tensor([0., 2.])
# tensor_count # tensor([100, 100])

In [59]:
print(tensor_mean)
print(tensor_std)
print(tensor_max)
print(tensor_min)
print(tensor_unique)
print(tensor_count)

tensor(0.1461)
tensor(0.2721)
tensor(1.)
tensor(0.)
tensor([0., 1.])
tensor([100, 100])


4.3. CN_EMCI problem

In [60]:
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI.columns)

Index(['Subject ID', 'Image ID', 'Image Path', 'Image Target', 'DISTANCE CN',
       'DISTANCE EMCI', 'DISTANCE AD', 'INTRA_CLASS_DISTANCE',
       'INTER_CLASS_DISTANCE_CN_EMCI', 'INTER_CLASS_DISTANCE_CN_AD',
       'INTER_CLASS_DISTANCE_EMCI_AD'],
      dtype='object')


In [61]:
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI.loc[0, 'Image Path'])

/mnt/data_lab513/vqtran_data/data/data_train_dec/unique/ADNI_062_S_1099_MR_MPRAGE_br_raw_20061120153545673_98_S22713_I30171.nii.gz


In [62]:
print(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI["Subject ID"].unique()))
print(positive_inter_class_distance_sorted_dataframe_CN_EMCI["Image Target"].value_counts())

200
CN      100
EMCI    100
Name: Image Target, dtype: int64


In [63]:
# print(subject_dict)
X_tensor_cross_val = []
Y_tensor_cross_val = []


for ii in range(len(positive_inter_class_distance_sorted_dataframe_CN_EMCI)):
    subject_id = positive_inter_class_distance_sorted_dataframe_CN_EMCI.loc[ii, "Subject ID"]
    label = positive_inter_class_distance_sorted_dataframe_CN_EMCI.loc[ii, "Image Target"]
    
    # print(subject_id, label)
    image_absolute_path = positive_inter_class_distance_sorted_dataframe_CN_EMCI.loc[ii, 'Image Path']
    
    np_label = -1
    if label == "CN"  or label == "EMCI":
        if label == "CN":
            np_label = np.array([0])
        elif label == "EMCI":
            np_label = np.array([1])
        else:
            raise ValueError("label must be CN, or EMCI")  
    else:
        continue
        # raise ValueError("label can not be EMCI or LMCI")
        
    image_sample = nib.load(image_absolute_path)
    image_array = image_sample.get_fdata()
    
    image_tensor = torch.Tensor(image_array)
    image_tensor = torch.unsqueeze(image_tensor,0)
    
    image_transformation_tio = tio.transforms.Compose(
            [
                tio.transforms.Resize((110,110,110)),
                tio.ZNormalization(),
                tio.RescaleIntensity(out_min_max=(0, 1)) #), in_min_max=(0., 8957.8574))
            ]
    )

    image_tensor = image_transformation_tio(image_tensor)

    # print(image_tensor.shape)    
    label_tensor = torch.Tensor(np_label)

    X_tensor_cross_val.append(image_tensor)
    Y_tensor_cross_val.append(label_tensor)
    
X_tensor_cross_val = torch.stack(X_tensor_cross_val)
Y_tensor_cross_val = torch.stack(Y_tensor_cross_val)
    
Y_tensor_cross_val = Y_tensor_cross_val.ravel()
print(X_tensor_cross_val.shape)
print(Y_tensor_cross_val.shape)

torch.Size([200, 1, 110, 110, 110])
torch.Size([200])


In [64]:
torch.save(X_tensor_cross_val, os.path.join(root_train_unique_tensor, "x_tensor_NC_EMCI_cv_data_filter_three_classes.pt"))
torch.save(Y_tensor_cross_val, os.path.join(root_train_unique_tensor, "y_tensor_NC_EMCI_cv_data_filter_three_classes.pt"))

In [65]:
tensor_mean = torch.mean(X_tensor_cross_val) #tensor(0.1236)
tensor_std = torch.std(X_tensor_cross_val) #tensor(0.2386)
tensor_max = torch.max(X_tensor_cross_val) #tensor(1.)
tensor_min = torch.min(X_tensor_cross_val) #tensor(0.)
tensor_unique, tensor_count = torch.unique(Y_tensor_cross_val, return_counts=True)

# tensor([0., 1.])
# tensor_count # tensor([100, 100])

In [67]:
print(tensor_mean)
print(tensor_std)
print(tensor_max)
print(tensor_min)
print(tensor_unique)
print(tensor_count)

tensor(0.1236)
tensor(0.2386)
tensor(1.)
tensor(0.)
tensor([0., 1.])
tensor([100, 100])


## END HERE

In [None]:
# positive_inter_class_distance_CN_dataframe['newcol'] = positive_inter_class_distance_CN_dataframe.apply(lambda row: row['firstcolval'] * row['secondcolval'], axis=1)

In [None]:
# Class distribution in positive_inter_class_distance_dataframe
# print(positive_inter_class_distance_dataframe["Image Target"].value_counts()) #CD 211, AD 148

positive_inter_class_distance_AD_dataframe.head() #???

Unnamed: 0,Subject ID,Image ID,Image Path,Image Target,DISTANCE CN,DISTANCE EMCI,DISTANCE AD,INTRA_CLASS_DISTANCE,INTER_CLASS_DISTANCE_CN_EMCI,INTER_CLASS_DISTANCE_CN_AD,INTER_CLASS_DISTANCE_EMCI_AD
3,002_S_0619,I16392,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.012839,0.007077,0.011432,0.011432,inf,0.001407,-0.004355
5,005_S_0814,I23573,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004964,0.008030,0.005108,0.005108,inf,-0.000144,0.002922
6,005_S_0929,I25645,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006088,0.010617,0.006548,0.006548,inf,-0.000460,0.004069
7,005_S_1341,I43188,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004963,0.009204,0.005419,0.005419,inf,-0.000456,0.003785
10,006_S_0547,I17794,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006670,0.012383,0.007458,0.007458,inf,-0.000788,0.004926
...,...,...,...,...,...,...,...,...,...,...,...
282,011_S_4906,I346967,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.004376,0.007000,0.004536,0.004536,inf,-0.000160,0.002463
289,127_S_0431,I15497,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.006003,0.010093,0.006263,0.006263,inf,-0.000260,0.003830
294,011_S_0053,I8669,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.008338,0.013512,0.008686,0.008686,inf,-0.000347,0.004826
295,011_S_0183,I11673,/mnt/data_lab513/vqtran_data/data/data_train_d...,AD,0.007511,0.012129,0.007650,0.007650,inf,-0.000139,0.004479
