In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

np.random.seed(123)

In [2]:
# NETWORK = 'InceptionResNetV2'
# NETWORK = 'InceptionV3'
# NETWORK = 'Resnet'
# NETWORK = 'VGG16'
# NETWORK = 'VGG19'
# NETWORK = 'Xception'
# NETWORK = 'DenseNet201'
# NETWORK = 'EfficientNetB1'
# NETWORK = 'EfficientNetB4'
# NETWORK = 'EfficientNetB7'
# NETWORK = 'NASNetLarge'
NETWORK = 'ConvNeXtBase'

METHOD = "AVG-Median"

In [3]:
WORKING_DIRECTORY = '/home/amirhosein/HECKTOR2022/WHOLEIMAGE_MAMIP'
PET_features_filename = f"Features_{NETWORK}_MA-MIP_WHOLE-IMAGE.csv"

features_path = os.path.join(WORKING_DIRECTORY, f"EXTRACTED_FEATURES/ExtractedFeatures_{METHOD}")
processed_features_path = os.path.join(WORKING_DIRECTORY, f"PROCESSED_FEATURES/ProcessedFeatures_{METHOD}")
if not os.path.exists(processed_features_path):
    os.makedirs(processed_features_path)


processed_features_filename = f"Processed_Features_{NETWORK}_MA-MIP_WHOLE-IMAGE.csv"
processed_features_full_filename = os.path.join(processed_features_path, processed_features_filename)

PET_featurs_full_path = os.path.join(features_path, PET_features_filename)

PET_filename = PET_features_filename.split(".")[0]

PET_outcome_data = pd.read_csv(PET_featurs_full_path)

outcome_file = "/home/amirhosein/HECKTOR2022/hecktor2022_patient_endpoint_training.csv"

In [4]:
PET_outcome_data.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,CHUM-001,0.12936,0.288655,0.141157,0.24364,0.39243,0.243072,-0.170066,0.123305,-0.500381,...,-0.331825,0.279184,0.06036,0.4019,-0.469592,-0.279056,0.190906,0.301617,0.022881,-0.137185
1,CHUM-002,0.058118,0.287627,0.160652,0.504217,0.045921,0.25197,-0.385267,0.080156,-0.411928,...,-0.159075,0.241122,0.229397,0.517126,-0.301295,-0.391696,0.289187,0.121577,0.074949,-0.093768
2,CHUM-006,-0.535642,0.42068,0.071722,0.90466,0.156141,-0.009664,-0.403704,-0.305214,-0.237391,...,-0.434198,0.40197,0.138389,0.540217,-0.43819,-0.30166,0.121133,-0.162058,-0.700108,-0.232615
3,CHUM-007,-0.374285,0.324282,0.091341,1.00608,-0.094911,0.072483,-0.387957,-0.307615,-0.17051,...,-0.365541,0.497799,0.231057,0.573797,-0.540778,-0.448617,0.092297,-0.323527,-0.481696,-0.155474
4,CHUM-008,-0.190812,0.502399,0.030509,0.942705,0.008396,0.160513,-0.329345,-0.196814,-0.348275,...,-0.38877,0.540209,-0.058694,0.550546,-0.544725,-0.17742,-0.023516,-0.401771,-0.907764,-0.421566


# Up to here

Read Outcome csv

In [5]:
OD = pd.read_csv(outcome_file)
OD = OD.iloc[:,:]
OD.head()

Unnamed: 0,PatientID,Relapse,RFS
0,CHUM-001,0,1704
1,CHUM-002,1,439
2,CHUM-006,0,1186
3,CHUM-007,0,1702
4,CHUM-008,0,1499


In [6]:
OD = OD.rename(columns={'PatientID': 'Patient_ID'})
OD.head()

Unnamed: 0,Patient_ID,Relapse,RFS
0,CHUM-001,0,1704
1,CHUM-002,1,439
2,CHUM-006,0,1186
3,CHUM-007,0,1702
4,CHUM-008,0,1499


In [7]:
OD.columns[0]

'Patient_ID'

In [8]:
first_column = PET_outcome_data.columns[0]
PET_outcome_data = PET_outcome_data.rename(columns={first_column: 'Patient_ID'})

In [9]:
OD.columns[0]


'Patient_ID'

In [10]:
PET_outcome_data.columns[0]

'Patient_ID'

In [11]:
PET_outcome_data.head()

Unnamed: 0,Patient_ID,0,1,2,3,4,5,6,7,8,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,CHUM-001,0.12936,0.288655,0.141157,0.24364,0.39243,0.243072,-0.170066,0.123305,-0.500381,...,-0.331825,0.279184,0.06036,0.4019,-0.469592,-0.279056,0.190906,0.301617,0.022881,-0.137185
1,CHUM-002,0.058118,0.287627,0.160652,0.504217,0.045921,0.25197,-0.385267,0.080156,-0.411928,...,-0.159075,0.241122,0.229397,0.517126,-0.301295,-0.391696,0.289187,0.121577,0.074949,-0.093768
2,CHUM-006,-0.535642,0.42068,0.071722,0.90466,0.156141,-0.009664,-0.403704,-0.305214,-0.237391,...,-0.434198,0.40197,0.138389,0.540217,-0.43819,-0.30166,0.121133,-0.162058,-0.700108,-0.232615
3,CHUM-007,-0.374285,0.324282,0.091341,1.00608,-0.094911,0.072483,-0.387957,-0.307615,-0.17051,...,-0.365541,0.497799,0.231057,0.573797,-0.540778,-0.448617,0.092297,-0.323527,-0.481696,-0.155474
4,CHUM-008,-0.190812,0.502399,0.030509,0.942705,0.008396,0.160513,-0.329345,-0.196814,-0.348275,...,-0.38877,0.540209,-0.058694,0.550546,-0.544725,-0.17742,-0.023516,-0.401771,-0.907764,-0.421566


In [12]:
# ranges_list = list()
# for col in PET_outcome_data.iloc[:, 1:]:
#     col_range = PET_outcome_data[col].max() - PET_outcome_data[col].min()
#     ranges_list.append(col_range)
#     print(f"Range of column {col}: {col_range}")
# print(f"maximum range: {max(ranges_list)}")

In [13]:
full_features_df = pd.merge(PET_outcome_data, OD, on=OD.columns[0])

In [14]:
full_features_df.head()

Unnamed: 0,Patient_ID,0,1,2,3,4,5,6,7,8,...,1016,1017,1018,1019,1020,1021,1022,1023,Relapse,RFS
0,CHUM-001,0.12936,0.288655,0.141157,0.24364,0.39243,0.243072,-0.170066,0.123305,-0.500381,...,0.06036,0.4019,-0.469592,-0.279056,0.190906,0.301617,0.022881,-0.137185,0,1704
1,CHUM-002,0.058118,0.287627,0.160652,0.504217,0.045921,0.25197,-0.385267,0.080156,-0.411928,...,0.229397,0.517126,-0.301295,-0.391696,0.289187,0.121577,0.074949,-0.093768,1,439
2,CHUM-006,-0.535642,0.42068,0.071722,0.90466,0.156141,-0.009664,-0.403704,-0.305214,-0.237391,...,0.138389,0.540217,-0.43819,-0.30166,0.121133,-0.162058,-0.700108,-0.232615,0,1186
3,CHUM-007,-0.374285,0.324282,0.091341,1.00608,-0.094911,0.072483,-0.387957,-0.307615,-0.17051,...,0.231057,0.573797,-0.540778,-0.448617,0.092297,-0.323527,-0.481696,-0.155474,0,1702
4,CHUM-008,-0.190812,0.502399,0.030509,0.942705,0.008396,0.160513,-0.329345,-0.196814,-0.348275,...,-0.058694,0.550546,-0.544725,-0.17742,-0.023516,-0.401771,-0.907764,-0.421566,0,1499


In [15]:
full_features_df.to_csv(processed_features_full_filename, encoding='utf-8', index=False)