## Part1) Feature Extraction

In [None]:
# The tumor in the Lung PET-CT data set was segmented and will be quantified in the following steps.
# The purpose is to quantify the tumor mass into mineable data by extracting features from the tumor.
# The definitions of some of the features were provided in the supplementary document.

In [None]:
# Importing required libraries

from sklearn.model_selection import train_test_split
from radiomics import firstorder, glcm
from sklearn import metrics
from sklearn.svm import SVC
import SimpleITK as sitk
import pandas as pd
import numpy as np
import six

In [None]:
# Reading the images along with their segmentation masks

CT_Image = sitk.ReadImage('Data/CT_Data.nii.gz')
CT_Mask = sitk.ReadImage('Data/CT_Mask.nii.gz')

PET_Image = sitk.ReadImage('Data/PET_Data.nii.gz')
PET_Mask = sitk.ReadImage('Data/PET_Mask.nii.gz')

In [None]:
# First Order Statistics Features from CT image

FOS_CT = firstorder.RadiomicsFirstOrder(CT_Image, CT_Mask)
FOS_CT.enableAllFeatures()
FOS_CT_Features = FOS_CT.execute()
for (key, val) in six.iteritems(FOS_CT_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

In [None]:
# First Order Statistics Features from PET image

FOS_PET = firstorder.RadiomicsFirstOrder(PET_Image, PET_Mask)
FOS_PET.enableAllFeatures()
FOS_PET_Features = FOS_PET.execute()
for (key, val) in six.iteritems(FOS_PET_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

In [None]:
# TODO: Can you interprete these results?

print("The average intensity of the tumor in CT image is:", FOS_CT_Features["Mean"])
print("The average intensity of the tumor in PET image is:", FOS_PET_Features["Mean"])

In [None]:
# Textural Features (Gray Level Co-occurrence Matrix) for CT image

CT_GLCM = glcm.RadiomicsGLCM(CT_Image, CT_Mask)
CT_GLCM.enableAllFeatures()
CT_GLCM_Features = CT_GLCM.execute()
for (key, val) in six.iteritems(CT_GLCM_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

In [None]:
# Textural Features (Gray Level Co-occurrence Matrix) for PET image

PET_GLCM = glcm.RadiomicsGLCM(PET_Image, PET_Mask)
PET_GLCM.enableAllFeatures()
PET_GLCM_Features = PET_GLCM.execute()
for (key, val) in six.iteritems(PET_GLCM_Features):
  print('The value of the feature  ', key, ' is equal to:', val)

## Part2) Classification

#### The tumors from 31 PET-CT images were already segmented and first order as well as textural features were already extracted and saved in excel files. We already know which of the patients survived or not. From the machine learning perspective, that means we know the outcome labels.  Now, with the extracted features and the outcome labels, we can train a classifier to predict the survival status. For this task, we will use Support Vector Machine as a learning algorithm.

In [None]:
# Importing the Feature set

ALL_CT = pd.read_csv('Data/CT_All.csv', header=None) 
ALL_PET = pd.read_csv('Data/PET_ALL.csv', header=None) 
Labels = pd.read_csv('Data/Labels.csv', header=None)

In [None]:
# Normalizing the feature set

X_ALL_CT = ALL_CT.values[:, :]
X_ALL_CT = (X_ALL_CT-np.mean(X_ALL_CT))/np.std(X_ALL_CT)


X_ALL_PET = ALL_PET.values[:,:]
X_ALL_PET = (X_ALL_PET-np.mean(X_ALL_PET))/np.std(X_ALL_PET)

y_ALL = Labels.values[:,:]  # Outcome Labels

In [None]:
# Dividing the CT feature set into train and test sets

X_train_CT, X_test_CT, y_train_CT, y_test_CT = train_test_split(X_ALL_CT, y_ALL, test_size=0.3, random_state=42)

In [None]:
# Building a classifier model with CT features

Model_CT = SVC(gamma='auto')
Model_CT.set_params(kernel='rbf').fit(X_train_CT,y_train_CT.ravel())  

y_pred_CT = Model_CT.predict(X_test_CT) 
FOS_CT_Acc = metrics.accuracy_score(y_test_CT,y_pred_CT)
print('Accuracy Score:',metrics.accuracy_score(y_test_CT,y_pred_CT))

In [None]:
# Dividing the PET feature set into train and test sets

X_train_PET, X_test_PET, y_train_PET, y_test_PET = train_test_split(X_ALL_PET, y_ALL, test_size=0.3, random_state=42)

In [None]:
# Building a classifier model with PET features

Model_PET = SVC(gamma='auto') 
Model_PET.set_params(kernel='rbf').fit(X_train_PET,y_train_PET.ravel())  

y_pred_PET = Model_PET.predict(X_test_PET) 
FOS_PET_Acc = metrics.accuracy_score(y_test_PET, y_pred_PET)
print('Accuracy Score:',metrics.accuracy_score(y_test_PET,y_pred_PET))

In [None]:
# TODO

# Which feature set have more prediction power? Can you explain?