In [None]:
# Create some of the needed objects and import required packages
import WORC
import fastr
import glob
import os

# Note: please give the below value the name of your account on Ubuntu, such that the home folder can be correctly found
username = 'worc'

# Create a network with the name "Tutorial", which will be used upon execution
network = WORC.WORC('Advanced')

# Make sure you do add the images, segmentations, label sources and a configuration as described in the tutorial. 
# We will again use the example data, but you are again free to use your own files!
image_sources = glob.glob(('/home/{}/Documents/Data/STWStrategyMMD/*/image.nii.gz').format(username))
image_sources = [i.replace(('/home/{}').format(username), 'vfs://home') for i in image_sources]
image_sources = {os.path.basename(os.path.dirname(i)): i for i in image_sources}
network.images_train.append(image_sources)

segmentation_sources = glob.glob(('/home/{}/Documents/Data/STWStrategyMMD/*/mask.nii.gz').format(username))
segmentation_sources = [i.replace(('/home/{}').format(username), 'vfs://home') for i in segmentation_sources]
segmentation_sources = {os.path.basename(os.path.dirname(i)): i for i in segmentation_sources}
network.segmentations_train.append(segmentation_sources)

network.labels_train.append('vfs://home/Documents/WORCTutorial/Data/StrategyMMD/pinfo.txt')

config = network.defaultconfig()

config['SampleProcessing']['SMOTE'] = 'False'
config['CrossValidation']['N_iterations'] = '5'
config['Genetics']['label_names'] = 'imaginary_label_1'
config['HyperOptimization']['test_size'] = '0.3'
network.fastr_plugin = 'LinearExecution'
network.configs.append(config)


# Advanced WORCflows

After you have gone throught the WORCTutorial, you can use this notebook to gain some information on more advanced WORCflows. The following topics will be discussed

1. Adding metadata and semantic features
2. Instead of using images and segmentations, use features. (WIP)
3. Using the XNAT plugin. (WIP)
4. Adding nodes to WORC: example makes use of the XNAT plugin. (WIP)
5. Using Elastix for registration when using multiple modalities. (WIP)
6. Using a separate training and test dataset. (WIP)



## 1. Adding metadata and semantic features
There are numerous types of sources supported by WORC. In the tutorial, we only added images and segmentations two the workflow. Here, we will discuss two other types of data you can add and show how to add them.

Metadata on the imaging protocols and the patients can be added through DICOM files when using PREDICT as the feature extraction toolbox. When adding metadata, multiple DICOM tags will be extracted and used as features. These are called "patient features" in PREDICT. By default, the following are extracted when turned on in PREDICT (see also PREDICT):

- [0x10, 0x1010]: Patient Age
- [0x10, 0x40]: Patient Sex

Other features can of course be added by editing the PREDICT function.

As we already saved the DICOMs in the tutorial, we can simply add these to the network in a similar matter to the images and segmentations:


In [None]:
# Locate metadata sources and convert to VFS sources
metadata_sources = glob.glob(('/home/{}/Documents/Data/STWStrategyMMD/*/metadata.dcm').format(username))
metadata_sources = [i.replace(('/home/{}').format(username), 'vfs://home') for i in metadata_sources]
metadata_sources = {os.path.basename(os.path.dirname(i)): i for i in metadata_sources}
print metadata_sources

# Add to network
network.metadata_train.append(metadata_sources)

Semantic features in radiomics are defined as features which are not extracted from an image, but determined in advance. These can for example be a score by a radiologist or the age and gender of a patient. The metadata features are in theory also semantic features. We separate them in WORC as the semantic features are extracted from a CSV file.

Let's take a look at the semantic feature file we provided with this repository. We are using exactly the same function that WORC/PREDICT is using. Change the paths accordingly to where you located the repository.

In [None]:
import csv

# Change this path to where the semantics csv file is located
semantics_file = ('/home/{}/Documents/WORCTutorial/Data/StrategyMMD/semantics.csv').format(username)

# Load the semantic file
semantics = dict()
with open(semantics_file, 'rb') as f:
    reader = csv.reader(f)
    for num, row in enumerate(reader):
        print(row)
        if num == 0:
            header = row
            if header[0] != 'Patient':
                raise ae.PREDICTAssertionError('First column of the semantics file should be patient ID!')

            keys = list()
            for key in header:
                semantics[key] = list()
                keys.append(key)
        else:
            for column in range(len(row)):
                if column > 0:
                    semantics[keys[column]].append(float(row[column]))
                else:
                    semantics[keys[column]].append(row[column])

print(semantics)

From the print, you can see the structure of the CSV file WORC expects.The first column should always head 'Patient' and should contain the patient labels used for matching against the images. The headers of the other columns are used as the names for the semantic names. The other values in the columns should correspond to the patients.

**Note:** Currently, only numerical semantic features are supported.

We can again add it simply to the network as follows:

In [None]:
# Add to network
semantics_source = semantics_file.replace(('/home/{}').format(username), 'vfs://home')
network.semantics_train.append(semantics_source)

## WIP

### XNAT Plugin

Navigate to the CT session of patient Interobs005 on the XNAT of the Multidelination project, which will lead you [here](https://xnat.bmia.nl/app/action/DisplayItemAction/search_element/xnat%3ActSessionData/search_field/xnat%3ActSessionData.ID/search_value/BMIAXNAT_E32058/popup/false/project/stwstrategymmd). Instead of having to download the images from XNAT ourselves, we can directly refer to them in WORC/fastr by using the XNAT plugin:


xnat://xnat.example.com/search?projects=sandbox&subjects=subject[0-9][0-9][0-9]&experiments=*_BRAIN&scans=T1&resources=DICOM

source_image_patient05 = 'xnat://xnat.bmia.nl/data/archive/projects/stwstrategymmd/subjects/subject001/experiments/experiment001/scans/T1/resources/DICOM'