In [1]:
#Add repo path to the system path
from pathlib import Path
import os, sys
repo_path= Path.cwd().resolve()
while '.gitignore' not in os.listdir(repo_path): # while not in the root of the repo
    repo_path = repo_path.parent #go up one level
sys.path.insert(0,str(repo_path)) if str(repo_path) not in sys.path else None

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import SimpleITK as sitk
import cv2 as cv
from radiomics import featureextractor
from tqdm import tqdm
from IPython.display import clear_output

#Import paths and patients classes
from notebooks.info import path_label, patient
import notebooks.utils as utils

# Functions

In [2]:
def extractor_settings(param_path, show=False):
    """set extraction settings for pyradiomics

    Args:
        param_path (str): relative path of parameter file
        show (bool, optional): if printing setting or not. Defaults to False.

    Returns:
        obj: extractor of pyradiomics
    """
    extractor = featureextractor.RadiomicsFeatureExtractor(str(repo_path /param_path))
    if show:
        print('Extraction parameters:\n\t', extractor.settings)
        print('Enabled filters:\n\t', extractor.enabledImagetypes)
        print('Enabled features:\n\t', extractor.enabledFeatures)
    return extractor

In [3]:
def feature_extraction(df: pd.DataFrame, pat: object, rad: str, time: int, stype: str, i: int):
    im = sitk.JoinSeries(pat.im_sitk('SET')) #Add dimension to be able to use pyradiomics. Image (a,b) turns into (a,b,1)
    #get radiomics features
    param_path = 'data/param_files/Param_64bin_all_radiomics.json' #path of parameter file
    extractor = extractor_settings(param_path, show=False)
    #mask path
    mask_path = str(repo_path / pat.seg_path(rad, time, stype)[0])
    #extract
    result = extractor.execute(im,mask_path) # Extract features
    #feature vector length
    fv_len = 102
    print(list(result.values())[-fv_len:])
    #create df is not yet defined
    if df is None:
        column_names = list(result.keys())[-fv_len:] #get column names
        column_names = [x.replace('original_','') for x in column_names] #remove original_ string
        column_names.insert(0, 'pat_num') #insert pat_num at the beginning
        df = pd.DataFrame(columns=column_names)
    #add feature vector to df if it has values
    feature_vector = list(result.values())[-fv_len:] #get feature vector
    feature_vector.insert(0, pat.pat_num) #insert pat_num at the beginning
    #add feature vector according to loc
    df.loc[i] = feature_vector
    return df

# IMP

# Feature extraction

In [13]:
# get the name of the features from the budget
budget = pd.read_excel(repo_path/ 'data' / 'budget' / 'budget.xlsx' , index_col=0)
# change name of column
budget.columns = ['budget']
# get all features with values greater than 1
excluded = budget[budget[ 'budget' ] > 1].index
# get all other names
included = budget[budget[ 'budget' ] <= 1].index

In [18]:
# get info
info = path_label()
# get patient
num = 0
pat = patient(info, num)

In [27]:
# segmentation type
time=1
stype='G'
for rad in ['L', 'V', 'M']:
    for time in [1,2]:
        df = None # df to store all feature vectors
        for i in tqdm(range(info.len)): # go thourgh all patients
            pat = patient(info, num=i)
            df = feature_extraction(df, pat, rad, time, stype, i)
            clear_output(wait=True)
        df.to_csv(repo_path / 'data' / 'features' / f'features_{rad}_{time}_{stype}.csv')

100%|██████████| 33/33 [00:10<00:00,  3.28it/s]


# Predictions

In [2]:
info = path_label()
# get patient
num = 0
pat = patient(info, num)