In [None]:
import os
import re
import cv2
import glob
import shutil
import numpy as np
np.set_printoptions(precision=3, suppress=True)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

TRAIN_DATASET_PATH='D:/brats/nii/archive (1)/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/'
surInfoFile = "D:/brats/1/BraTS2020_training_data/content/data/survival_info.csv"
MetaDataFile = "D:/brats/1/BraTS2020_training_data/content/data/meta_data.csv"

date_column = ['Brats20ID','Age','Survival_days','Extent_of_Resection']
df_surInfo = pd.read_csv(surInfoFile,names= date_column,skiprows=1)
df_metaData = pd.read_csv(MetaDataFile,names= ['slice_path','target','volume','slice'],skiprows=1)


def modify_value(value):
    idStr = str(value)
    number_str = idStr[-3:] 
    number_int = int(number_str)
    return number_int

def to_int(value):
    return int(value)

df_surInfo['Brats20ID'] = df_surInfo['Brats20ID'].apply(modify_value)
print(df_surInfo[df_surInfo['Brats20ID'] == 84])
df_surInfo = df_surInfo.drop(index=83)
df_surInfo['Survival_days'] = df_surInfo['Survival_days'].apply(to_int)
#
# split ids
#


from sklearn.model_selection import train_test_split
all_ids = df_surInfo['Brats20ID'].unique()

# Splitting the IDs into train and test sets (80-20 split)
train_ids, test_ids = train_test_split(all_ids, test_size=0.1489, random_state=42)

print(f"Number of train IDs: {len(train_ids)}")
print(f"Number of test IDs: {len(test_ids)}")
print(train_ids)
print(test_ids)
X_train = []
y_train = []
X_test = []
y_test = []




In [None]:

from radiomics import featureextractor
import SimpleITK as sitk



extractor = featureextractor.RadiomicsFeatureExtractor()
# extractor.enableImageTypeByName('LoG')
# extractor.enableImageTypeByName('Wavelet')
# extractor.enableFeaturesByName(shape=['VoxelVolume', 
#                                       'MeshVolume', 'SurfaceArea', 'SurfaceVolumeRatio', 
#                                       'Compactness1', 'Compactness2', 'Sphericity', 'SphericalDisproportion',
#                                       'Maximum3DDiameter','Maximum2DDiameterSlice','Maximum2DDiameterColumn','Maximum2DDiameterRow', 
#                                       'MajorAxisLength', 'MinorAxisLength', 'LeastAxisLength', 'Elongation', 'Flatness'])
print('Enabled filters:\n\t', extractor.enabledImagetypes)

count = 0
for patient_dir in os.listdir(TRAIN_DATASET_PATH):


    # 
    dirName = os.path.join(TRAIN_DATASET_PATH,patient_dir)
    patientId = re.search(r"Training_(\d+)", dirName).group(1)
    patient_id = int(patientId)  # Extract patient ID from directory name


    if patient_id in train_ids:
        if patient_id == 99 or patient_id == 355:
            continue
        # Assuming MRI image and segmentation mask paths are available within the patient directory
        featurelist = ['VoxelVolume', 
                        'MeshVolume', 'SurfaceArea', 'SurfaceVolumeRatio',# 'Compactness1', 'Compactness2', 'SphericalDisproportion',
                        'Sphericity', 
                        'Maximum3DDiameter','Maximum2DDiameterSlice','Maximum2DDiameterColumn','Maximum2DDiameterRow', 
                        'MajorAxisLength', 'MinorAxisLength', 'LeastAxisLength', 'Elongation', 'Flatness']#14
        
        feature = []

        T1_feature = []
        T1ce_feature = []
        T2_feature = []
        Flair_feature = []

        T1_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t1.nii')
        T1ce_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t1ce.nii')
        T2_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t2.nii')
        Flair_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_flair.nii')
        mask_path = os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_seg.nii')

        print(str(count)+'/200' +T1_path)

        T1_image = sitk.ReadImage(T1_path)
        T1ce_image = sitk.ReadImage(T1ce_path)
        T2_image = sitk.ReadImage(T2_path)
        Flair_image = sitk.ReadImage(Flair_path)
        mask = sitk.ReadImage(mask_path)


        age = df_surInfo.loc[df_surInfo['Brats20ID'] == patient_id, 'Age'].iloc[0]
        feature.append(age)


        T1_feature_values = extractor.execute(T1_image, mask)
        # T1ce_feature_values = extractor.execute(T1ce_image, mask)
        # T2_feature_values = extractor.execute(T2_image, mask)
        # Flair_feature_values = extractor.execute(Flair_image, mask)



        
        for ft in featurelist:
            if 'original_shape_'+ft in  T1_feature_values:
                value = T1_feature_values['original_shape_'+ft]
                if isinstance(value, np.ndarray):
                    value = value[()]
                    value = value.astype(np.float64)
                    feature.append(value)
                else:
                    feature.append(value)

            # if 'original_shape_'+ft in  T1ce_feature_values:
            #     value = T1ce_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         T1ce_feature.append(value)
            #     else:
            #         T1ce_feature.append(value)

            # if 'original_shape_'+ft in  T2_feature_values:
            #     value = T2_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         T2_feature.append(value)
            #     else:
            #         T2_feature.append(value)

            # if 'original_shape_'+ft in  Flair_feature_values:
            #     value = Flair_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         Flair_feature.append(value)
            #     else:
            #         Flair_feature.append(value)


        
        # print(T1_feature)
        # print(T1ce_feature)
        # print(T2_feature)
        # print(Flair_feature)

        # feature.append(T1_feature)
                    
        days = df_surInfo.loc[df_surInfo["Brats20ID"] == patient_id, 'Survival_days'].iloc[0]
        print(feature)
        X_train.append(feature)
        y_train.append(days)
        count+=1
        


# Now traindata contains tuples with patient ID and extracted features
# You can further process this data as needed


In [None]:
count = 0
for patient_dir in os.listdir(TRAIN_DATASET_PATH):


    # 
    dirName = os.path.join(TRAIN_DATASET_PATH,patient_dir)
    patientId = re.search(r"Training_(\d+)", dirName).group(1)
    patient_id = int(patientId)  # Extract patient ID from directory name


    if patient_id in test_ids:
        # Assuming MRI image and segmentation mask paths are available within the patient directory
        featurelist = ['VoxelVolume', 
                        'MeshVolume', 'SurfaceArea', 'SurfaceVolumeRatio',# 'Compactness1', 'Compactness2', 'SphericalDisproportion',
                        'Sphericity', 
                        'Maximum3DDiameter','Maximum2DDiameterSlice','Maximum2DDiameterColumn','Maximum2DDiameterRow', 
                        'MajorAxisLength', 'MinorAxisLength', 'LeastAxisLength', 'Elongation', 'Flatness']#14
        
        feature = []

        T1_feature = []
        T1ce_feature = []
        T2_feature = []
        Flair_feature = []

        T1_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t1.nii')
        T1ce_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t1ce.nii')
        T2_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_t2.nii')
        Flair_path =  os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_flair.nii')
        mask_path = os.path.join(TRAIN_DATASET_PATH, patient_dir,patient_dir+ '_seg.nii')

        print(str(count)+'/35' +T1_path)

        T1_image = sitk.ReadImage(T1_path)
        T1ce_image = sitk.ReadImage(T1ce_path)
        T2_image = sitk.ReadImage(T2_path)
        Flair_image = sitk.ReadImage(Flair_path)
        mask = sitk.ReadImage(mask_path)


        age = df_surInfo.loc[df_surInfo['Brats20ID'] == patient_id, 'Age'].iloc[0]
        feature.append(age)


        T1_feature_values = extractor.execute(T1_image, mask)
        # T1ce_feature_values = extractor.execute(T1ce_image, mask)
        # T2_feature_values = extractor.execute(T2_image, mask)
        # Flair_feature_values = extractor.execute(Flair_image, mask)



        
        for ft in featurelist:
            if 'original_shape_'+ft in  T1_feature_values:
                value = T1_feature_values['original_shape_'+ft]
                if isinstance(value, np.ndarray):
                    value = value[()]
                    value = value.astype(np.float64)
                    feature.append(value)
                else:
                    feature.append(value)

            # if 'original_shape_'+ft in  T1ce_feature_values:
            #     value = T1ce_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         T1ce_feature.append(value)
            #     else:
            #         T1ce_feature.append(value)

            # if 'original_shape_'+ft in  T2_feature_values:
            #     value = T2_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         T2_feature.append(value)
            #     else:
            #         T2_feature.append(value)

            # if 'original_shape_'+ft in  Flair_feature_values:
            #     value = Flair_feature_values['original_shape_'+ft]
            #     if isinstance(value, np.ndarray):
            #         value = value[()]
            #         value = value.astype(np.float64)
            #         Flair_feature.append(value)
            #     else:
            #         Flair_feature.append(value)


        
        # print(T1_feature)
        # print(T1ce_feature)
        # print(T2_feature)
        # print(Flair_feature)

        # feature.append(T1_feature)
                    
        days = df_surInfo.loc[df_surInfo["Brats20ID"] == patient_id, 'Survival_days'].iloc[0]
        days = days.astype(int)
        print(feature)
        X_test.append(feature)
        y_test.append(days)
        print(days)
        count+=1
        


# Now traindata contains tuples with patient ID and extracted features
# You can further process this data as needed


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


m = []

for State in range(30,50):
    
    random_forest = RandomForestRegressor(n_estimators=10, random_state=State)  

    # Train the model
    random_forest.fit(X_train, y_train)

    # Predict on the test set
    y_pred = random_forest.predict(X_test)
    y_pred = y_pred.astype(int)
    mse = mean_squared_error(y_test, y_pred)
    # Calculate Mean Squared Error (MSE)3
    print(f"{State-30} Mean Squared Error (MSE): {mse}")
    m.append(mse)

print(f"Average Mean Squared Error (MSE): {sum(m) / len(m)}")


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt






x_train_df = pd.DataFrame(X_train, columns=['age','VoxelVolume', 
                        'MeshVolume', 'SurfaceArea', 'SurfaceVolumeRatio',# 'Compactness1', 'Compactness2', 'SphericalDisproportion',
                        'Sphericity', 
                        'Maximum3DDiameter','Maximum2DDiameterSlice','Maximum2DDiameterColumn','Maximum2DDiameterRow', 
                        'MajorAxisLength', 'MinorAxisLength', 'LeastAxisLength', 'Elongation', 'Flatness'])
y_train_df = pd.DataFrame({'Target': y_train})  

print(x_train_df)

data = pd.concat([x_train_df, y_train_df], axis=1)


correlation_matrix = data.corr()


plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', annot_kws={"size": 10})
plt.title('')
plt.savefig('correlation_matrix.png', bbox_inches='tight')
plt.show()



In [None]:
correlations = data.corr()['Target'].abs().sort_values(ascending=False)
print(correlations)

In [None]:

# Example list of 20 MSE values (replace this with your actual MSE values)
mse_values = np.random.uniform(low=0.1, high=1.0, size=20)  # Generating random MSE values for demonstration

# Creating a box plot for MSE values
plt.figure(figsize=(8, 6))
plt.boxplot(m)
plt.title('Random Forest_10(MSE)')
plt.ylabel('MSE Values')
plt.grid(True)
plt.show()

