# Structure Relations


## Setup

### Imports

In [7]:
# Type imports
from typing import Any, Dict, Tuple

# Standard Libraries
from pathlib import Path
from math import sqrt, pi
from statistics import mean
from itertools import zip_longest

# Shared Packages
import numpy as np
import pandas as pd
import xlwings as xw

import pydicom
from shapely.geometry import Polygon

import RS_DICOM_Utilities


### Global Settings

In [8]:
PRECISION = 3


### Utility Functions

In [9]:
# Load Table
def load_table(file_name: Path, sheet_name: str, options: Dict[str, Any])->Any:
    '''Get a data collection from a spreadsheet.

    Returns the data found starting in the upper left cell of the spreadsheet.

    Args:
        file_name (Path): Path to the workbook.
        sheet_name (str): Name of the sheet in the workbook
        options (Dict[str, Any]): Converter options for teh data.

    Returns:
        Any: The data from the spreadsheet in the format dictated by the
            options.
    '''
    with xw.App() as app:
        book = app.books.open(file_name)

        range1 = book.sheets[sheet_name].range('A1').expand()
        data = range1.options(**options).value
    return data

### File Paths

In [10]:
base_path = Path.cwd()
data_path = base_path / 'Test Data'
dicom_path = data_path / 'StructureVolumeTests' / 'GJS_Struct_Tests'

In [11]:
struct_nm_file = data_path / 'StructureNames Expected Info.xlsx'

In [12]:
non_dicom_file = data_path / 'ClinicalLung.dvh'

In [13]:

structure_names_file = dicom_path / 'RS.GJS_Struct_Tests.StructureNames.dcm'

## Tests

### Test invalid file

In [8]:
RS_DICOM_Utilities.get_structure_file_info(non_dicom_file)

{}

### 1. Select and load a DICOM structure file  (Use Structures from DICOM)


In [9]:
structure_set_info = RS_DICOM_Utilities.get_structure_file_info(structure_names_file)
structure_set_info

{'PatientName': 'StructureVolumes^Test',
 'PatientLastName': 'StructureVolumes',
 'PatientID': 'GJS_Struct_Tests',
 'StructureSet': 'StructureNames',
 'StudyID': 'Phantom1',
 'SeriesNumber': '7',
 'File': WindowsPath("d:/OneDrive - Queen's University/Python/Projects/StructureRelations/Test Data/StructureVolumeTests/GJS_Struct_Tests/RS.GJS_Struct_Tests.StructureNames.dcm")}

In [10]:
structure_set_info_expected = load_table(struct_nm_file, 'structure_set_info',
                                         {'convert': dict})
structure_set_info_expected

{'PatientID': 'GJS_Struct_Tests',
 'PatientLastName': 'StructureVolumes',
 'StructureSet': 'StructureNames',
 'StudyID': 'Phantom1',
 'SeriesNumber': 7.0}

In [11]:
dataset = pydicom.dcmread(structure_set_info['File'])

### 2. Collect information on all structures
   - Structure Id
   - Code Meaning 
   - Volume type
   - Structure class (Target, OAR, External, Utility)


In [12]:
roi_id = RS_DICOM_Utilities.get_names_nums(dataset)
roi_id.info()

<class 'pandas.core.series.Series'>
Index: 95 entries, 26 to 98
Series name: StructureID
Non-Null Count  Dtype 
--------------  ----- 
95 non-null     object
dtypes: object(1)
memory usage: 1.5+ KB


In [13]:
roi_gen = RS_DICOM_Utilities.get_gen_alg(dataset)
roi_gen.describe()

count         95
unique         2
top       MANUAL
freq          94
Name: GenerationAlgorithm, dtype: object

In [14]:
roi_labels = RS_DICOM_Utilities.get_roi_labels(dataset)
roi_labels.describe()

Unnamed: 0,StructureID,StructureName,DICOM_Type,Code,CodeScheme,CodeMeaning,GenerationAlgorithm
count,95,93,93,93,93,93,95
unique,95,93,10,48,4,48,2
top,Mandible,Mandible,ORGAN,PTV_Low,99VMS_STRUCTCODE,Planning Target Volume Low Risk,MANUAL
freq,1,1,24,10,64,10,94


In [15]:
roi_labels.columns

Index(['StructureID', 'StructureName', 'DICOM_Type', 'Code', 'CodeScheme',
       'CodeMeaning', 'GenerationAlgorithm'],
      dtype='object')

In [16]:
structure_names_expected = load_table(struct_nm_file, 'StructureNames',
                                      {'convert': pd.DataFrame})
structure_names_expected.describe()

Unnamed: 0,StructureID,VolumeName,DICOM_Type,Code,CodeScheme,CodeMeaning
count,95,92,86,93,93,93
unique,95,92,10,48,4,48
top,BODY,BODY,ORGAN,PTV_Low,99VMS_STRUCTCODE,Planning Target Volume Low Risk
freq,1,1,24,10,64,10


In [17]:
structure_names_expected.columns

Index(['StructureID', 'VolumeName', 'DICOM_Type', 'Code', 'CodeScheme',
       'CodeMeaning'],
      dtype='object')

In [18]:
compare_columns = ['StructureID', 'DICOM_Type', 'Code', 'CodeScheme']

### 3. Drop non-standard structures:
   - Matchplane, Baseline
   - Z, X structures
   - Dose Structures  -- No Keep!
   - Avoid Structures  -- No Keep!
   - $ Structures??


In [19]:
drop_conditions = {
    'startswith': ('x','z'),
    'isin':  ['dpv', 'baseline', 'matchplane', 'field']
    }

In [20]:

cnd_startswith = drop_conditions['startswith']
idx_startswith = roi_labels.StructureID.str.lower().str.startswith(cnd_startswith)


In [21]:

cnd_isin = drop_conditions['isin']
idx_isin = roi_labels.StructureID.str.lower().isin(cnd_isin)


In [22]:
drop_idx = (idx_isin | idx_startswith)

In [23]:
roi_labels.loc[drop_idx, 'StructureID']

13           DPV
67            X1
68          xCTV
59            Z1
60            Z2
61            Z3
64      Baseline
65    Matchplane
77         Field
81         X PRV
Name: StructureID, dtype: object

In [24]:
keep_structures = roi_labels.loc[~drop_idx, :]


In [26]:
#xw.view(keep_structures)
keep_structures.info()

<class 'pandas.core.frame.DataFrame'>
Index: 85 entries, 26 to 98
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   StructureID          85 non-null     object
 1   StructureName        83 non-null     object
 2   DICOM_Type           83 non-null     object
 3   Code                 83 non-null     object
 4   CodeScheme           83 non-null     object
 5   CodeMeaning          83 non-null     object
 6   GenerationAlgorithm  85 non-null     object
dtypes: object(7)
memory usage: 5.3+ KB


# Done To Here

### 4. Interpolate structures with missing slices
- How to tell the difference between structures with missing slices and multi-region structures
1. distance between slices with contours
2. Multiple Gaps
3. intersection between projection of contour from previous slice onto contour in current slice


5. Collect structure geometry information
   - Volume
   - Resolution 
   - Single / Multi volume 
   - Shell


6. Identify relationships between all structures
   - Structures are nodes relationships are directional edges
   - Calculate relationship metrics 
   - Flag Logical relations?
     - Logical relations are identified by multiple paths between two nodes
     - The shorter path is a logical relationship 

In [14]:

dataset = pydicom.dcmread(structure_names_file)

## Structure Set File Parameters Available

|Element      |Description                         |Data Type|
|-------------|------------------------------------|---------|
|(0010, 0020) |Patient ID                          |LO|
|(0010, 0010) |Patient's Name                      |PN|
|(0010, 0030) |Patient's Birth Date                |DA|
|(0010, 0040) |Patient's Sex                       |CS|
|-------------|------------------------------------|---------|
|(0008, 0060) |Modality                            |CS|
|(0020, 0010) |Study ID                            |SH|
|(0020, 0011) |Series Number                       |IS|
|(0008, 1030) |Study Description                   |LO|
|(0008, 103e) |Series Description                  |LO|
|(3006, 0002) |Structure Set Label                 |SH|
|(300e, 0002) |Approval Status                     |CS|
|-------------|------------------------------------|---------|
|(0008, 0020) |Study Date                          |DA|
|(0008, 0030) |Study Time                          |TM|
|(3006, 0008) |Structure Set Date                  |DA|
|(3006, 0009) |Structure Set Time                  |TM|
|(0008, 0012) |Instance Creation Date              |DA|
|(0008, 0013) |Instance Creation Time              |TM|
|-------------|------------------------------------|---------|
|(0008, 0070) |Manufacturer                        |LO|
|(0008, 1090) |Manufacturer's Model Name           |LO|
|(0018, 1000) |Device Serial Number                |LO|
|(0018, 1020) |Software Versions                   |LO|
|(0008, 0090) |Referring Physician's Name          |PN|
|(0008, 1070) |Operators' Name                     |PN|
|(0008, 1010) |Station Name                        |SH|
|(0008, 0050) |Accession Number                    |SH|
|-------------|------------------------------------|---------|
|(0008, 0005) |Specific Character Set              |CS|
|(0002, 0013) |Implementation Version Name         |SH|
|(0002, 0000) |File Meta Information Group Length  |UL|
|(0002, 0001) |File Meta Information Version       |OB|
|-------------|------------------------------------|---------|
|(0002, 0002) |Media Storage SOP Class UID         |UI|
|(0002, 0003) |Media Storage SOP Instance UID      |UI|
|(0002, 0010) |Transfer Syntax UID                 |UI|
|(0002, 0012) |Implementation Class UID            |UI|
|(0008, 0016) |SOP Class UID                       |UI|
|(0008, 0018) |SOP Instance UID                    |UI|
|(0020, 000d) |Study Instance UID                  |UI|
|(0020, 000e) |Series Instance UID                 |UI|



## (3006, 0020)  Structure Set ROI Sequence
|Element      |Description                         |Data Type|
|-------------|------------------------------------|---------|
|(3006, 0022)|ROI Number                          |IS|
|(3006, 0024)|Referenced Frame of Reference UID   |UI|
|(3006, 0026)|ROI Name                            |LO|
|(3006, 0036)|ROI Generation Algorithm            |CS|


## (3006, 0039)  ROI Contour Sequence

|Element      |Description                         |Data Type|
|-------------|------------------------------------|---------|
|(3006, 002a) |ROI Display Color                   |IS|
|(3006, 0084) |Referenced ROI Number               |IS|


## (3006, 0040)  Contour Sequence

|Element      |Description                         |Data Type|
|-------------|------------------------------------|---------|
|(3006, 0042) |Contour Geometric Type              |CS|
|(3006, 0046) |Number of Contour Points            |IS|
|(3006, 0050) |Contour Data                        |DS|


## (3006, 0080)  RT ROI Observations Sequence
|Element      |Description                         |Data Type|
|-------------|------------------------------------|---------|
|(3006, 0082) |Observation Number                  |IS|
|(3006, 0084) |Referenced ROI Number               |IS|
|(3006, 0085) |ROI Observation Label               |SH|
|(3006, 00a4) |RT ROI Interpreted Type             |CS|
|(3006, 00a6) |ROI Interpreter                     |PN|

### (3006, 00b0)  ROI Physical Properties Sequence
|Element      |Description                  |Data Type|
|-------------|-----------------------------|---------|
|(3006, 00b2) |ROI Physical Property        |CS|
|(3006, 00b4) |ROI Physical Property Value  |DS|

### (3006, 0086)  RT ROI Identification Code Sequence
|Element      |Description                 |Data Type|
|-------------|----------------------------|---------|
|(0008, 0100) |Code Value                  |SH|
|(0008, 0102) |Coding Scheme Designator    |SH|
|(0008, 0103) |Coding Scheme Version       |SH|
|(0008, 0104) |Code Meaning                |LO|
|(0008, 0105) |Mapping Resource            |CS|
|(0008, 0106) |Context Group Version       |DT|
|(0008, 010f) |Context Identifier          |CS|
|(0008, 0117) |Context UID                 |UI|
|(0008, 0118) |Mapping Resource UID        |UI|
|(0008, 0122) |Mapping Resource Name       |LO|


In [None]:
dataset