In [None]:
%load_ext autoreload
%autoreload 2
#%matplotlib widget
#%matplotlib ipympl

#%reload_ext tensorboard
#%matplotlib qt

In [None]:
import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import SimpleITK as sitk
from tqdm.auto import tqdm
import pickle, subprocess
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
import torch
import sklearn
import csv
import gc

from radiomics import featureextractor
import radiomics

import glob

from hnc_project import data_prep as dp
from hnc_project import myshow
from hnc_project import graph_making as gm
from hnc_project.pytorch import dataset_class as dc
from hnc_project.pytorch.run_model_torch import RunModel
#%matplotlib notebook
%matplotlib widget
plt.ion()
#import initial_ml as iml

In [None]:
data_directory = '../../data/LIDC-IDRI'
nii_directory = '../../data/LIDC-IDRI/Nii'
resample_directory = '../../data/LIDC-IDRI/Nii_resample_111'
graph_directory = '../../data/LIDC-IDRI/graph_staging'
edge_directory = '../../data/HNSCC/edge_staging'
patch_directory = '../../data/HNSCC/HNSCC_Nii_222_50_50_60_Crop_v2'
location_pickle = '../../data/HNSCC/HNSCC_Nii_222_50_50_60_Crop_v2/locations.pkl'
plot_directory = '../../data/HNSCC/plots'
radiomics_directory = '../../data/HNSCC/radiomics_wavelets'

data_path = Path(data_directory)
nii_path = Path(nii_directory)
resample_path = Path(resample_directory)
patch_path = Path(patch_directory)
location_pickle_path = Path(location_pickle)
plot_path = Path(plot_directory)
graph_path = Path(graph_directory)
edge_path = Path(edge_directory)
radiomics_path = Path(radiomics_directory)

resample_path.mkdir(exist_ok=True, parents=True)
patch_path.mkdir(exist_ok=True, parents=True)
plot_path.mkdir(exist_ok=True, parents=True)
graph_path.mkdir(exist_ok=True, parents=True)
edge_path.mkdir(exist_ok=True, parents=True)
radiomics_path.mkdir(exist_ok=True, parents=True)


In [None]:
patient_patch_paths = patch_path.glob('*/')
tumor_locations = pd.read_pickle(location_pickle_path)
centered_locations = {}
for pat in patient_patch_paths:
    pat_str = pat.as_posix().split('/')[-1]
    centered_locations[pat_str] = {}
    n_tumors = len(tumor_locations[pat_str])
    translation_factor = np.array([0, 0, 0])
    if n_tumors == 1:
        centered_locations[pat_str]['GTVp'] = np.array([0., 0., 0.])
        continue
    else:
        if 'GTVp' in tumor_locations[pat_str].keys():
            translation_factor = tumor_locations[pat_str]['GTVp']
        else:
            array_locs = np.array([val for val in tumor_locations[pat_str].values()])
            origin_idx = np.where(array_locs == np.max(array_locs, axis=0)[2])[0][0]
            translation_factor = array_locs[origin_idx]
    for tumor in tumor_locations[pat_str]:
        centered_locations[pat_str][tumor.replace('.nii.gz','')] = tumor_locations[pat_str][tumor] - translation_factor

with open(edge_path.joinpath('centered_locations_010424.pkl'), 'wb') as f:
    pickle.dump(centered_locations, f)
    f.close()

In [None]:
# r = sqrt(x^2 + y^2 + z^2)
# theta = atan2 ( sqrt(x^2+y^2) / z) accounting for different quadrants (make sure to use atan2 not atan)
# phi = atan2 (y/x) 

spherical_locations = {}
for pat, locs in centered_locations.items():
    spherical_locations[pat] = {}
    for gtv, l in locs.items():
        if np.all([not(l[0]), not(l[1]), not(l[2])]): 
            #print(f'origin: {pat}, {gtv}')
            spherical_locations[pat][gtv] = np.array([0.,0.,0.])
            continue
        radius = np.sqrt(l[0]**2+l[1]**2+l[2]**2)
        theta = np.arctan2(np.sqrt(l[0]**2+l[1]**2), l[2])
        phi = np.arctan2(l[1], l[0])
        if phi < 0 and abs(phi) > np.pi/2:
            phi = phi + 2*np.pi
        spherical_locations[pat][gtv] = np.array([radius, theta, phi])

look into once starting training:
Make the CTs into an object containing vertex objects that store position/volume information. Within this object you then loop through all nodes and find possible nearby connections

In [None]:
dict_edges = {}
for pat in spherical_locations.keys():
    #if '0628' not in pat: continue
    patient_plot_path = plot_path.joinpath(pat)
    patient_plot_path.mkdir(exist_ok=True, parents=True)
    print(f"Processing patient: {pat}")
    pat_locs = spherical_locations[pat]
    if len(pat_locs) == 1:
        print("    one node, empty edge array")
        dict_edges[pat] = []
        continue
    df_pat, primary = gm.make_loc_df(pat_locs)
    if len(pat_locs) == 2 and len(df_pat) == 1:
        print("    two nodes, single edge entry")
        print(f"edge: [{primary.index[0]}, {df_pat.index[0]}]")
        dict_edges[pat] = [[primary.index[0], df_pat.index[0]]]
        continue
    clust_model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
    clust_model = clust_model.fit(df_pat[['x', 'y', 'z']])
    node_tree = gm.create_node_tree(clust_model.children_, df_pat)
    connections = gm.create_connection_tree(node_tree)
    print(connections)
    edges = gm.make_edges(connections, df_pat, primary.index[0])
    dict_edges[pat] = edges
    print(edges)
    
    #plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    points = []
    label = []
    for gtv in df_pat.index:
        points.append([df_pat.loc[gtv]['x'], df_pat.loc[gtv]['y'], df_pat.loc[gtv]['z']])
        label.append(gtv)
    for gtv in primary.index:
        points.append([primary.loc[gtv]['x'], primary.loc[gtv]['y'], primary.loc[gtv]['z']])
        label.append(gtv)
        
    points = np.array(points)
    ax.scatter(points[:,0], points[:,1], points[:,2])
    for i, l in enumerate(label):
        ax.text(points[i, 0], points[i, 1], points[i, 2], l)
    
    edge_points = []
    for e in edges:
        edge_points.append([points[label.index(e[0])], points[label.index(e[1])]])
    
    for e in edge_points:
        ex_diff = e[1][0] - e[0][0]
        ey_diff = e[1][1] - e[0][1]
        ez_diff = e[1][2] - e[0][2]
        ax.quiver(e[0][0], e[0][1], e[0][2], ex_diff, ey_diff, ez_diff, color='r')
    plt.savefig(patient_plot_path.joinpath('connections_3D.pdf'))
    #plt.show()
    plt.close()
    gc.collect()


In [None]:
with open(edge_path.joinpath('edges_122823.pkl'), 'wb') as f:
    pickle.dump(dict_edges, f)
    f.close()

### Basic radiomic feature extraction

In [None]:
radiomics.setVerbosity(20)
extractor = featureextractor.RadiomicsFeatureExtractor()
extractor.enableImageTypeByName('Wavelet')
print(extractor.settings)
print(extractor.enabledImagetypes)
print(extractor.enabledFeatures)

In [None]:
patient_patch_paths = patch_path.glob('*/')
for pat in patient_patch_paths:
    pat_str = pat.as_posix().split('/')[-1]
    print(pat_str)

    patches = pat.glob('image*.nii.gz')
    features_to_keep = {}
    for p in patches:
        p_name = p.as_posix().split('_')[-1].replace('.nii.gz','')
        print(f"    {p_name}")
        image = p.as_posix()
        mask = p.as_posix().replace('image', 'Struct')
        features = extractor.execute(image, mask)
        features_to_keep[p_name] = {key: value for key, value in features.items() if key.startswith('original')}
        
    with open(radiomics_path.joinpath(f"features_{pat_str}.pkl"), 'wb') as f:
        pickle.dump(features_to_keep, f)        
        f.close()
      
 