In [147]:
import os
import cv2
import glob
import shutil
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import csv
import re
import nilearn as nl
import nibabel as nib
import nilearn.plotting as nlplt

import keras
import keras.backend as K
from keras.callbacks import CSVLogger
from sklearn.metrics import log_loss, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_regression as MIR
import tensorflow as tf

# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)


In [148]:
def save_numpy_array(array, target_directory, file_name):
   
    # Ensure the target directory exists, and if not, create it
    os.makedirs(target_directory, exist_ok=True)
    
    # Save the numpy array to the file in the target directory
    file_path = os.path.join(target_directory, file_name)
    np.save(file_path, array)
    print(f"Array saved to '{file_path}'")

In [149]:
# DEFINE seg-areas  
SEGMENT_CLASSES = {
    0 : 'NOT tumor',
    1 : 'NECROTIC/CORE', 
    2 : 'EDEMA',
    3 : 'ENHANCING' # original 4 -> converted into 3 later
}

 

# there are 155 slices per volume
# to start at 5 and use 145 slices means we will skip the first 5 and last 5 
VOLUME_SLICES = 100 
VOLUME_START_AT = 22 # first slice of volume that we will include
IMG_SIZE=128
TRAIN_DATASET_PATH='../dataset/MICCAI_BraTS2020_TrainingData/'
VAL_DATASET_PATH='../dataset/MICCAI_BraTS2020_ValidationData/'

In [150]:
# lists of directories with studies
train_and_val_directories = [f.path for f in os.scandir(TRAIN_DATASET_PATH) if f.is_dir()]
train_and_val_directories+= [f.path for f in os.scandir(VAL_DATASET_PATH) if f.is_dir()]

# file BraTS20_Training_355 has ill formatted name for for seg.nii file
# train_and_val_directories.remove(TRAIN_DATASET_PATH+'BraTS20_Training_355')


def pathListIntoIds(dirList):
    x = []
    for i in range(0,len(dirList)):
        x.append(dirList[i][dirList[i].rfind('/')+1:])
    return x

train_and_test_ids = pathListIntoIds(train_and_val_directories)

In [151]:
print(train_and_val_directories)

['../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_352', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_223', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_172', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_274', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_257', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_198', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_097', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_119', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_315', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_066', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_307', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_327', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_040', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_239', '../dataset/MICCAI_BraTS2020_TrainingData/BraTS20_Training_176', '../dataset/MICCAI_BraTS

In [152]:

csvs = [
    r'../dataset/MICCAI_BraTS2020_TrainingData/survival_info.csv', 
    r'../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv'
]

age_dict = {}
days_dict = {}
category_short = 0
category_medium = 0
category_long = 0
max_days = 0

for csv_path in csvs:
    with open(csv_path, mode='r') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        # Skip the header
        next(csv_reader)
        for row in csv_reader:
            # print(row)
            key = row[0]
            age = row[1]
            days = row[2]
            try:
                age_dict[key] = float(age)
            except ValueError:
                continue
            
            if not days.isnumeric():
                continue
            days_dict[key] = int(days)
            max_days = max(max_days, int(days))
            
            if int(days) < 250:
                category_short += 1
            elif 250 <= int(days) <= 450:
                category_medium += 1
            else:
                category_long += 1

    print(f'Processed {len(age_dict)} entries for {csv_path}')
    print(f'Category Short: {category_short}, Medium: {category_medium}, Long: {category_long}')
    print(f'Max Days: {max_days}')


Processed 236 entries for ../dataset/MICCAI_BraTS2020_TrainingData/survival_info.csv
Category Short: 74, Medium: 74, Long: 87
Max Days: 1767
Processed 265 entries for ../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv
Category Short: 74, Medium: 74, Long: 87
Max Days: 1767


In [153]:

csvs = [
    r'../dataset/MICCAI_BraTS2020_TrainingData/survival_info.csv', 
    r'../dataset/MICCAI_BraTS2020_ValidationData/survival_evaluation.csv'
]

 
train_csv = {}
train_age = {}
train_days = {}
with open(csvs[0], mode='r') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    # Skip the header
    next(csv_reader)
    for row in csv_reader:
        if row[0] not in train_csv:
            train_csv[row[0]] = 1
            train_age[row[0]] = float(row[1])
            try:
                train_days[row[0]] = int(row[2])
            except ValueError:
                match = re.findall(r'\d+', row[2])
                if match:
                    # Join all found numbers into a single integer
                    train_days[row[0]] = int(''.join(match))
        else:
            train_csv[row[0]] = train_csv[row[0]] + 1

validate_csv = {}
validate_age = {}
# validate_days = {}
with open(csvs[1], mode='r') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    # Skip the header
    next(csv_reader)
    for row in csv_reader:
        if row[0] not in validate_csv:
            validate_csv[row[0]] = 1
            validate_age[row[0]] = float(row[1])
            # validate_days[row[0]] = int(row[2])
        else:
            validate_csv[row[0]] = validate_csv[row[0]] + 1

In [154]:
print("train", len(train_csv), len(train_age))
print("train", len(validate_csv), len(validate_age))

train 236 236
train 29 29


In [155]:

 
csv_filenames = [r'./results.csv',r'./results_val.csv']
 
train_p_ids = {}
with open(csv_filenames[0]) as f:
    reader = csv.DictReader(f)
    for row in reader:#going through rows of csv
        if row['Patient_ID'] in train_csv:
            if row['Patient_ID'] not in train_p_ids:
                train_p_ids[row['Patient_ID']] =   1
            else:
                train_p_ids[row['Patient_ID']] = train_p_ids[row['Patient_ID']]+ 1
          
validation_p_ids = {}
with open(csv_filenames[1]) as f:
    reader = csv.DictReader(f)
    for row in reader:#going through rows of csv
        if row['Patient_ID'] in validate_csv:
            if row['Patient_ID'] not in validation_p_ids:
                validation_p_ids[row['Patient_ID']] =   1
            else:
                validation_p_ids[row['Patient_ID']] = validation_p_ids[row['Patient_ID']]+ 1
          


In [156]:
print("test_csv =", len(train_csv))
print("validate_csv =", len(validate_csv))

test_csv = 236
validate_csv = 29


In [157]:
print("train_p_ids =",len(train_p_ids))
print("validation_p_ids =",len(validation_p_ids))

train_p_ids = 235
validation_p_ids = 28


In [194]:
print("train missings",[item for item in train_csv if item not in train_p_ids])

train missings ['BraTS20_Training_099']


In [193]:
print("validate missings",[item for item in validate_csv if item not in validation_p_ids])


validate missings ['BraTS20_Validation_116']


In [160]:
def collect_and_evaluate_features(csv_filename, id_list):
    data = []
    feature_keys = []
    selected_features = []
    
    with open(csv_filename) as file:
        reader = csv.DictReader(file)
        row_count = 0
        current_row = {}
        current_patient_id = None

        for row in reader:
            patient_id = row['Patient_ID']
            if patient_id not in id_list:
                continue

            if current_patient_id is None:
                current_patient_id = patient_id

            if current_patient_id != patient_id:
                data.append(current_row)
                current_row = {}
                row_count = 0
                current_patient_id = patient_id

            if not current_row:
                for key in row:
                    if key and 'diagnostic' not in key and 'Modality' not in key and 'Patient_ID' not in key:
                        value = eval(row[key])
                        if isinstance(value, tuple):
                            for idx, item in enumerate(value):
                                new_key = f"{key}{idx}{row['Modality'][row['Modality'].find('_', 19, -1):row['Modality'].find('.')]}"
                                if new_key in selected_features or not selected_features:
                                    feature_keys.append(new_key)
                            continue
                        new_key = f"{key}{row['Modality'][row['Modality'].find('_', 19, -1):row['Modality'].find('.')]}"
                        if new_key in selected_features or not selected_features:
                            feature_keys.append(new_key)

            for key in row:
                if key and 'diagnostic' not in key and 'Modality' not in key and 'Patient_ID' not in key:
                    value = eval(str(row[key]))
                    new_key = f"{key}{row['Modality'][row['Modality'].find('_', 19, -1):row['Modality'].find('.')]}"
                    if isinstance(value, tuple):
                        for idx, item in enumerate(value):
                            new_key = f"{key}{idx}{row['Modality'][row['Modality'].find('_', 19, -1):row['Modality'].find('.')]}"
                            current_row[new_key] = item
                        continue
                    current_row[new_key] = value

            current_row['Patient_ID'] = patient_id
            row_count += 1

        if current_row:
            data.append(current_row)

    return data, feature_keys


In [161]:
data_train, feature_keys_train = collect_and_evaluate_features('./results.csv', train_p_ids)
data_validation, feature_keys_validation = collect_and_evaluate_features('./results_val.csv', validation_p_ids)

In [175]:
# feature_keys_train

['original_shape_Elongation_flair',
 'original_shape_Flatness_flair',
 'original_shape_LeastAxisLength_flair',
 'original_shape_MajorAxisLength_flair',
 'original_shape_Maximum2DDiameterColumn_flair',
 'original_shape_Maximum2DDiameterRow_flair',
 'original_shape_Maximum2DDiameterSlice_flair',
 'original_shape_Maximum3DDiameter_flair',
 'original_shape_MeshVolume_flair',
 'original_shape_MinorAxisLength_flair',
 'original_shape_Sphericity_flair',
 'original_shape_SurfaceArea_flair',
 'original_shape_SurfaceVolumeRatio_flair',
 'original_shape_VoxelVolume_flair',
 'original_firstorder_10Percentile_flair',
 'original_firstorder_90Percentile_flair',
 'original_firstorder_Energy_flair',
 'original_firstorder_Entropy_flair',
 'original_firstorder_InterquartileRange_flair',
 'original_firstorder_Kurtosis_flair',
 'original_firstorder_Maximum_flair',
 'original_firstorder_MeanAbsoluteDeviation_flair',
 'original_firstorder_Mean_flair',
 'original_firstorder_Median_flair',
 'original_firstorde

In [162]:

def col_and_eval(ch_ft = []):
    id_list2 = []
    csv_filenames = [r'./results.csv',r'./results_val.csv']
    dt =[]
    jj = [] #key_list
    id_tp = 0
    row_cc = 0
    row2 = {}
    id_check = {}
    for csv_filename in csv_filenames:
        with open(csv_filename) as f:
            reader = csv.DictReader(f)
            for row in reader:#going through rows of csv
                dt_tp = row
                if row['Patient_ID'] not in id_check:#adding patient id if not in id chek list
                    id_check[row['Patient_ID']] = 0
                id_check[row['Patient_ID']] += 1
                if len(id_list2) != 0:
                    if id_list2[-1] == row["Patient_ID"]:
                        id_tp += 1
                    else:
                        dt.append(row2)
                        row2 = {}
                        id_tp = 0
                if id_tp == 0:
                    id_list2.append(row["Patient_ID"])
                if len(id_list2) == 1:
                    for keysj in row:
                        if keysj != None:
                            if 'diagnostic' in keysj or 'Modality' in keysj or 'Patient_ID' in keysj:
                                continue
                            # print(type(keysj),keysj, type(row[keysj]),row[keysj])
                            row[keysj] = eval(row[keysj])
                            if isinstance(row[keysj],tuple):
                                ct = 0
                                for it in row[keysj]:
                                    keysjn = keysj + str(ct)
                                    keysjn += row["Modality"][row["Modality"].find('_',19,-1):row["Modality"].find('.')]
                                    if (keysjn in ch_ft) or (len(ch_ft) == 0):
                                        jj.append(keysjn)
                                    ct += 1
                                continue
                            keysj += row["Modality"][row["Modality"].find('_',19,-1):row["Modality"].find('.')]
                            if (keysj in ch_ft) or (len(ch_ft) == 0):
                                jj.append(keysj)
                for keysj in row:
                    if keysj != None:
                        if 'diagnostic' in keysj or 'Modality' in keysj or 'Patient_ID' in keysj:
                            continue
                        # print(type(keysj),keysj, type(row[keysj]),row[keysj])
                        row[keysj] = eval(str(row[keysj]))
                        keys2j = keysj + row["Modality"][row["Modality"].find('_',19,-1):row["Modality"].find('.')]
                        if isinstance(row[keysj],tuple):
                            ct = 0
                            for it in row[keysj]:
                                keys2j =  keysj + str(ct) + row["Modality"][row["Modality"].find('_',19,-1):row["Modality"].find('.')]
                                row2[keys2j] = it
                                ct += 1
                            continue
                        row2[keys2j] = row[keysj]
                row2['Patient_ID'] = row['Patient_ID']
    dt.append(row2)    
    return dt, jj


In [173]:
# len(jj)

10700

In [178]:
# id: age, categories
def new_getListAgeDays(data, age_dict, days_dict = {}, ch_ft = []):
    x_data = []
    y_data = []
    for i in data:
        if ((i["Patient_ID"] not in days_dict) and ("Validation" not in i["Patient_ID"])):
            # print("skipping-",i["Patient_ID"])
            continue
      
        if (i["Patient_ID"] not in age_dict):
            # print("not found",i["Patient_ID"] )
            continue
        j = [age_dict[i["Patient_ID"]]]
        for keysj in i:
            if keysj != None:
                if 'diagnostic' in keysj or 'Modality' in keysj or 'Patient_ID' in keysj:
                    # print("not taking key", keysj)
                    continue
                if (keysj not in ch_ft) and (len(ch_ft) != 0):
                    # print("not taking ch_ft", ch_ft)
                    continue
               
                j.append(i[keysj])
        # print(j)
        if "Validation" in i["Patient_ID"]:
            x_data.append(j)
        else:
            x_data.append(j) 
            y_data.append(days_dict[i["Patient_ID"]])
      
            
    return np.array(x_data), np.array(y_data) 



In [164]:
# id: age, categories
def getListAgeDays(id_list, dt, jj, ch_ft = []):
    x_val = []
    y_val = []
    x_test = []
    for i in dt:
        if ((i["Patient_ID"] not in days_dict) and ("Validation" not in i["Patient_ID"])):
            print("skipping-",i["Patient_ID"])
            continue
      
        if (i["Patient_ID"] not in age_dict):
            print("not found",i["Patient_ID"] )
            continue
        j = [age_dict[i["Patient_ID"]]]
        for keysj in i:
            if keysj != None:
                if 'diagnostic' in keysj or 'Modality' in keysj or 'Patient_ID' in keysj:
                    print("not taking key", keysj)
                    continue
                if (keysj not in ch_ft) and (len(ch_ft) != 0):
                    print("not taking ch_ft", ch_ft)
                    continue
               
                j.append(i[keysj])
        # print(j)
        if "Validation" in i["Patient_ID"]:
            # print("vvv"+i["Patient_ID"])
            x_test.append(j)
        else:
            x_val.append(j) 
            y_val.append(days_dict[i["Patient_ID"]])
      
            
    return np.array(x_val), np.array(y_val), np.array(x_test)



In [165]:
dt, jj = col_and_eval()

In [171]:
print("data_train =", len(data_train),  "actual", len(train_p_ids), "CSV_ID =", len(train_csv))
print("data_validation =", len(data_validation), "actual", len(validation_p_ids), "CSV_ID =", len(validate_csv))
print("feature_keys_validation =", len(feature_keys_train))
print("dt=", len(dt), "jj =", len(jj))

data_train = 235 actual 235 CSV_ID = 236
data_validation = 28 actual 28 CSV_ID = 29
feature_keys_validation = 628625
dt= 492 jj = 10700


In [179]:
X_train_t, y_train_t = new_getListAgeDays(data_train, train_age, train_days)
X_validation_t, temp = new_getListAgeDays(data_validation, validate_age)


In [181]:
print(len(X_train_t))
print(len(X_validation_t))

235
28


In [182]:

X_all, y_all, x_val = X_train_t, y_train_t , X_validation_t
# X_all, y_all, x_val = getListAgeDays(train_and_test_ids,dt,jj)

In [183]:
print(len(X_all))
print(len(x_val))

235
28


In [184]:

print("total train_and_test_ids",len(train_and_test_ids))
print("X_all",X_all.shape)
print("y_all",y_all.shape)
print("x_val",x_val.shape)
print(f'X_all: {X_all.shape}')
columns = ["age"]+jj+["prediction"]
print(columns)
dfn = np.concatenate((X_all, y_all[:,None]), axis=1)
print(dfn.shape,len(columns))
df = pd.DataFrame(dfn, columns = columns)
df2 = pd.DataFrame(x_val,columns = ["age"]+jj)
df2.head()

total train_and_test_ids 494
X_all (235, 10701)
y_all (235,)
x_val (28, 10701)
X_all: (235, 10701)
['age', 'original_shape_Elongation_flair', 'original_shape_Flatness_flair', 'original_shape_LeastAxisLength_flair', 'original_shape_MajorAxisLength_flair', 'original_shape_Maximum2DDiameterColumn_flair', 'original_shape_Maximum2DDiameterRow_flair', 'original_shape_Maximum2DDiameterSlice_flair', 'original_shape_Maximum3DDiameter_flair', 'original_shape_MeshVolume_flair', 'original_shape_MinorAxisLength_flair', 'original_shape_Sphericity_flair', 'original_shape_SurfaceArea_flair', 'original_shape_SurfaceVolumeRatio_flair', 'original_shape_VoxelVolume_flair', 'original_firstorder_10Percentile_flair', 'original_firstorder_90Percentile_flair', 'original_firstorder_Energy_flair', 'original_firstorder_Entropy_flair', 'original_firstorder_InterquartileRange_flair', 'original_firstorder_Kurtosis_flair', 'original_firstorder_Maximum_flair', 'original_firstorder_MeanAbsoluteDeviation_flair', 'origin

Unnamed: 0,age,original_shape_Elongation_flair,original_shape_Flatness_flair,original_shape_LeastAxisLength_flair,original_shape_MajorAxisLength_flair,original_shape_Maximum2DDiameterColumn_flair,original_shape_Maximum2DDiameterRow_flair,original_shape_Maximum2DDiameterSlice_flair,original_shape_Maximum3DDiameter_flair,original_shape_MeshVolume_flair,...,getLBP2DImage_0_original_glszm_SmallAreaHighGrayLevelEmphasis_t2,getLBP2DImage_0_original_glszm_SmallAreaLowGrayLevelEmphasis_t2,getLBP2DImage_0_original_glszm_ZoneEntropy_t2,getLBP2DImage_0_original_glszm_ZonePercentage_t2,getLBP2DImage_0_original_glszm_ZoneVariance_t2,getLBP2DImage_0_original_ngtdm_Busyness_t2,getLBP2DImage_0_original_ngtdm_Coarseness_t2,getLBP2DImage_0_original_ngtdm_Complexity_t2,getLBP2DImage_0_original_ngtdm_Contrast_t2,getLBP2DImage_0_original_ngtdm_Strength_t2
0,68.17,0.708109,0.549494,44.532841,81.043346,77.491935,89.944427,82.540899,91.181138,90802.875,...,0.01388889,0.01388889,1.0,2.2e-05,2063521000.0,0.0,1000000.0,0.0,0.0,0.0
1,50.153,0.713522,0.581695,28.423128,48.862599,48.6621,50.606324,42.011903,51.662365,19210.95833,...,2.7e-09,2.7e-09,-3.2e-16,5.2e-05,0.0,0.0,1000000.0,0.0,0.0,0.0
2,21.726,0.572874,0.488275,47.082522,96.426173,70.830784,110.367568,99.609237,110.742946,59501.04167,...,0.02812822,0.02812822,2.0,6.7e-05,665427700.0,0.0,1000000.0,0.0,0.0,0.0
3,55.482,0.647488,0.498546,45.320963,90.906282,70.5762,94.339811,94.085068,94.620294,99705.66667,...,0.1676474,0.1676474,2.584963,6e-05,1381898000.0,0.0,1000000.0,0.0,0.0,0.0
4,64.753,0.729476,0.51604,43.413481,84.128141,80.0,84.403791,80.05623,87.464278,57203.79167,...,0.1718748,0.1718748,2.405639,0.00014,357215800.0,0.0,1000000.0,0.0,0.0,0.0


In [185]:
scaler = MinMaxScaler()
v = X_all
v_scaled = scaler.fit_transform(v)
X_all = v_scaled
X_val = scaler.fit_transform(x_val)

df = pd.DataFrame(X_all, columns = ["age"]+jj)
# display(df)
df2 = pd.DataFrame(X_val,columns = ["age"]+jj)
df2.head()

Unnamed: 0,age,original_shape_Elongation_flair,original_shape_Flatness_flair,original_shape_LeastAxisLength_flair,original_shape_MajorAxisLength_flair,original_shape_Maximum2DDiameterColumn_flair,original_shape_Maximum2DDiameterRow_flair,original_shape_Maximum2DDiameterSlice_flair,original_shape_Maximum3DDiameter_flair,original_shape_MeshVolume_flair,...,getLBP2DImage_0_original_glszm_SmallAreaHighGrayLevelEmphasis_t2,getLBP2DImage_0_original_glszm_SmallAreaLowGrayLevelEmphasis_t2,getLBP2DImage_0_original_glszm_ZoneEntropy_t2,getLBP2DImage_0_original_glszm_ZonePercentage_t2,getLBP2DImage_0_original_glszm_ZoneVariance_t2,getLBP2DImage_0_original_ngtdm_Busyness_t2,getLBP2DImage_0_original_ngtdm_Coarseness_t2,getLBP2DImage_0_original_ngtdm_Complexity_t2,getLBP2DImage_0_original_ngtdm_Contrast_t2,getLBP2DImage_0_original_ngtdm_Strength_t2
0,0.726244,0.414664,0.554768,0.618613,0.703363,0.864464,0.717478,0.746378,0.730597,0.678942,...,0.02777778,0.02777778,0.356207,0.007783,0.804518,0.0,0.0,0.0,0.0,0.0
1,0.444512,0.427461,0.639311,0.213227,0.207936,0.257954,0.173298,0.144148,0.186348,0.103571,...,5.2832e-09,5.2832e-09,0.0,0.024042,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.094908,0.39404,0.682773,0.940184,0.724329,1.0,1.0,1.0,0.427375,...,0.05625643,0.05625643,0.712414,0.032172,0.259434,0.0,0.0,0.0,0.0,0.0
3,0.527842,0.271328,0.421005,0.638445,0.855204,0.718974,0.778281,0.917915,0.77796,0.750492,...,0.3352948,0.3352948,0.920782,0.028378,0.538769,0.0,0.0,0.0,0.0,0.0
4,0.672812,0.465184,0.466935,0.590445,0.750854,0.917228,0.640832,0.709457,0.679408,0.408912,...,0.3437496,0.3437496,0.856906,0.071481,0.13927,0.0,0.0,0.0,0.0,0.0


In [186]:
# X_train, X_test, y_train, y_test = train_test_split(X_all,y_all,test_size = 0.2, random_state = 42, shuffle = True)
X_train = X_all
y_train = y_all
print("x_train shape:",X_train.shape)
# print("x_test shape:", X_test.shape)
print("y_train shape:",y_train.shape)
# print("y_test shape:", y_test.shape)

x_train shape: (235, 10701)
y_train shape: (235,)


In [187]:
from skfeature.function.similarity_based import fisher_score

print(X_train.shape,y_train.shape)
_ranks = fisher_score.fisher_score(X_train,y_train)

(235, 10701) (235,)


  score = 1.0 / lap_score - 1


In [188]:
columns = ["age"]+jj
colnew = []
def prune2(X):
    xx = []
    for row in X:
        iter = 0
        xxtp = []
        for col in row:
            if _ranks[iter]<400:
                xxtp.append(col)
                if columns[iter] not in colnew:
                    colnew.append(columns[iter])
            iter += 1
        xx.append(xxtp)
    return xx

x_train = np.array(prune2(X_train))
x_test = np.array(prune2(X_val))

print(x_train.shape)
print(x_test.shape)
df = pd.DataFrame(x_train, columns = colnew)
df2 = pd.DataFrame(x_test,columns = colnew)
# df2.head()
df.head()

(235, 400)
(28, 400)


Unnamed: 0,age,original_shape_Maximum2DDiameterSlice_flair,getLoGImage_4.5_original_glcm_JointEnergy_flair,getLoGImage_4.5_original_glcm_SumEntropy_flair,getLoGImage_4.5_original_glszm_GrayLevelVariance_flair,getLoGImage_4.0_original_glcm_Imc2_flair,getLoGImage_4.0_original_gldm_GrayLevelVariance_flair,getLoGImage_4.0_original_glszm_LargeAreaLowGrayLevelEmphasis_flair,getLoGImage_3.5_original_firstorder_Energy_flair,getLoGImage_3.5_original_gldm_LargeDependenceHighGrayLevelEmphasis_flair,...,getLBP2DImage_0_original_shape_MinorAxisLength_t2,getLBP2DImage_0_original_shape_Sphericity_t2,getLBP2DImage_0_original_shape_SurfaceArea_t2,getLBP2DImage_0_original_shape_SurfaceVolumeRatio_t2,getLBP2DImage_0_original_shape_VoxelVolume_t2,getLBP2DImage_0_original_firstorder_10Percentile_t2,getLBP2DImage_0_original_firstorder_90Percentile_t2,getLBP2DImage_0_original_firstorder_Energy_t2,getLBP2DImage_0_original_firstorder_Entropy_t2,getLBP2DImage_0_original_firstorder_InterquartileRange_t2
0,0.61303,0.405377,0.17041,0.506126,0.051512,0.952194,0.03766,0.000348,0.015988,0.13219,...,0.635505,0.322697,0.18563,0.040678,0.16872,0.333333,1.0,0.164775,0.0,0.421053
1,0.491866,0.228974,0.028966,0.796914,0.342793,0.963007,0.208471,3.9e-05,0.048565,0.297971,...,0.379174,0.862405,0.05908,0.012996,0.099866,0.333333,1.0,0.101971,0.0,0.842105
2,0.521979,0.11819,0.132636,0.513724,0.084872,0.922963,0.046727,8e-06,0.002284,0.073913,...,0.138853,0.421221,0.01798,0.1201,0.007518,0.333333,1.0,0.007328,0.0,0.842105
3,0.296896,0.346661,0.082135,0.648677,0.097101,0.96218,0.081906,3.1e-05,0.024502,0.147212,...,0.581933,0.203356,0.197999,0.072792,0.118956,0.333333,1.0,0.113733,0.0,0.631579
4,0.731681,0.385504,0.183492,0.465526,0.042801,0.875691,0.031168,8.6e-05,0.023304,0.013958,...,0.3397,0.142153,0.108563,0.149056,0.039199,0.333333,1.0,0.035159,0.0,0.631579


In [189]:
scaler = MinMaxScaler()
v = x_train
v_scaled = scaler.fit_transform(v)
x_train = v_scaled
v = x_test
v_scaled = scaler.fit_transform(v)
x_test = v_scaled

df = pd.DataFrame(x_train, columns = colnew)
display(df)

Unnamed: 0,age,original_shape_Maximum2DDiameterSlice_flair,getLoGImage_4.5_original_glcm_JointEnergy_flair,getLoGImage_4.5_original_glcm_SumEntropy_flair,getLoGImage_4.5_original_glszm_GrayLevelVariance_flair,getLoGImage_4.0_original_glcm_Imc2_flair,getLoGImage_4.0_original_gldm_GrayLevelVariance_flair,getLoGImage_4.0_original_glszm_LargeAreaLowGrayLevelEmphasis_flair,getLoGImage_3.5_original_firstorder_Energy_flair,getLoGImage_3.5_original_gldm_LargeDependenceHighGrayLevelEmphasis_flair,...,getLBP2DImage_0_original_shape_MinorAxisLength_t2,getLBP2DImage_0_original_shape_Sphericity_t2,getLBP2DImage_0_original_shape_SurfaceArea_t2,getLBP2DImage_0_original_shape_SurfaceVolumeRatio_t2,getLBP2DImage_0_original_shape_VoxelVolume_t2,getLBP2DImage_0_original_firstorder_10Percentile_t2,getLBP2DImage_0_original_firstorder_90Percentile_t2,getLBP2DImage_0_original_firstorder_Energy_t2,getLBP2DImage_0_original_firstorder_Entropy_t2,getLBP2DImage_0_original_firstorder_InterquartileRange_t2
0,0.613030,0.405377,0.170410,0.506126,0.051512,0.952194,0.037660,3.480496e-04,0.015988,0.132190,...,0.635505,0.322697,0.185630,0.040678,0.168720,0.333333,1.00,0.164775,0.0,0.421053
1,0.491866,0.228974,0.028966,0.796914,0.342793,0.963007,0.208471,3.877019e-05,0.048565,0.297971,...,0.379174,0.862405,0.059080,0.012996,0.099866,0.333333,1.00,0.101971,0.0,0.842105
2,0.521979,0.118190,0.132636,0.513724,0.084872,0.922963,0.046727,7.569299e-06,0.002284,0.073913,...,0.138853,0.421221,0.017980,0.120100,0.007518,0.333333,1.00,0.007328,0.0,0.842105
3,0.296896,0.346661,0.082135,0.648677,0.097101,0.962180,0.081906,3.094312e-05,0.024502,0.147212,...,0.581933,0.203356,0.197999,0.072792,0.118956,0.333333,1.00,0.113733,0.0,0.631579
4,0.731681,0.385504,0.183492,0.465526,0.042801,0.875691,0.031168,8.649484e-05,0.023304,0.013958,...,0.339700,0.142153,0.108563,0.149056,0.039199,0.333333,1.00,0.035159,0.0,0.631579
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,0.561860,0.433281,0.129286,0.575307,0.097481,0.961656,0.053613,8.440236e-04,0.060233,0.154612,...,0.540197,0.483136,0.184269,0.018722,0.261255,0.333333,0.75,0.223376,0.0,0.210526
231,0.783501,0.344855,0.215810,0.413746,0.026619,0.865110,0.021112,6.937882e-04,0.003540,0.040871,...,0.263555,0.640503,0.042524,0.036474,0.042399,0.333333,0.50,0.031541,0.0,0.210526
232,0.606188,0.286854,0.077462,0.663537,0.314664,0.954517,0.121867,3.128671e-06,0.019551,0.053699,...,0.193421,0.380783,0.039654,0.087976,0.021139,0.666667,0.75,0.019041,0.0,0.421053
233,0.443651,0.208680,0.021889,0.903908,0.786296,0.985242,1.000000,2.964243e-08,0.038810,0.227391,...,0.305453,0.332143,0.032854,0.117252,0.014071,0.333333,0.75,0.011677,0.0,0.210526


In [190]:
 
print("x_train shape:",x_train.shape)
print("x_test shape:",x_test.shape)

x_train shape: (235, 400)
x_test shape: (28, 400)


In [191]:
 # Save the array
save_numpy_array(x_train, "../radiomics features/all", "radiomics_train.npy")
save_numpy_array(x_test, "../radiomics features/all", "radiomics_validate.npy")

Array saved to '../radiomics features/all/radiomics_train.npy'
Array saved to '../radiomics features/all/radiomics_validate.npy'
