In [None]:
%load_ext autoreload
%autoreload 2
#%matplotlib widget
#%matplotlib ipympl

#%reload_ext tensorboard
#%matplotlib qt

In [None]:
import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import SimpleITK as sitk
from tqdm.notebook import tqdm
import pickle, subprocess
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
from scipy.ndimage import label as scipy_label
import torch
import sklearn
import csv
import gc
import pydicom
import networkx as nx
#from radiomics import featureextractor
#import radiomics

import glob
from platipy.imaging import ImageVisualiser
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct, read_dicom_image

from hnc_project import data_prep as dp
#from hnc_project import myshow
from hnc_project import graph_making as gm
from hnc_project.pytorch import dataset_class_prototype as dc
from hnc_project.pytorch.run_model_lightning import RunModel
#%matplotlib notebook
%matplotlib widget
plt.ion()
#import initial_ml as iml

In [None]:
data_directory = '../../data/RADCURE/'
nii_directory = '../../data/RADCURE/Nii'
resample_directory = '../../data/RADCURE/Nii_resample_111'
graph_directory = '../../data/RADCURE/graph_staging'
edge_directory = '../../data/RADCURE/edge_staging'
patch_directory = '../../data/RADCURE/Nii_111_80_80_80_Crop'
location_pickle = '../../data/RADCURE/Nii_111_80_80_80_Crop/locations.pkl'
plot_directory = '../../data/RADCURE/plots'

vis_dir = '../../data/RADCURE/RADCURE_vis_snapshots'
vis_path = Path(vis_dir)
vis_path.mkdir(exist_ok=True, parents=True)

data_path = Path(data_directory)
nii_path = Path(nii_directory)
resample_path = Path(resample_directory)
patch_path = Path(patch_directory)
location_pickle_path = Path(location_pickle)
plot_path = Path(plot_directory)
graph_path = Path(graph_directory)
edge_path = Path(edge_directory)

nii_path.mkdir(exist_ok=True, parents=True)
resample_path.mkdir(exist_ok=True, parents=True)
patch_path.mkdir(exist_ok=True, parents=True)
plot_path.mkdir(exist_ok=True, parents=True)
graph_path.mkdir(exist_ok=True, parents=True)
edge_path.mkdir(exist_ok=True, parents=True)

meta_df = pd.read_csv(data_path.joinpath('metadata.csv'))

## Match RSTRUCT to CTs
This uses the meta file to match rtstructs to CTs and drops patients without a corresponding CT or rtstruct

## Convert selected pats to NifTI

In [None]:
selected_rows = []
for pat, df_group in tqdm(meta_df.groupby("Subject ID")):

    # Select the structure set with the later date
    longest_len = None
    latest_file = None
    linked_ct_uid = None
    #print(f"{pat}")
    for idx, rtstruct_row in df_group[df_group.Modality == "RTSTRUCT"].iterrows():
        rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"].replace('\\','/'))
        #print(rtstruct_dir)
        rtstruct_file = list(rtstruct_dir.glob("*"))[0]
        rtstruct = pydicom.read_file(rtstruct_file)
        try:
            rtstruct_len = len(rtstruct.StructureSetROISequence)
        except:
            print(f'{pat}, RTStruct ROI Sequence is empty')
            rstruct_len = 0
        #print(f"    {rtstruct_len}")
        if longest_len is None or rtstruct_len > longest_len:
            longest_len = rtstruct_len
            if longest_len != 0:
                latest_file = idx
                linked_ct_uid = rtstruct.ReferencedFrameOfReferenceSequence[0].RTReferencedStudySequence[0].RTReferencedSeriesSequence[0].SeriesInstanceUID
    #print(f"    Chosen file: {latest_file} with len: {longest_len}") 
    # Select the RTSTRUCT for this patient
    if latest_file is None:
        print(f"{pat} has no relevant RTStruct")
        continue
    selected_rows.append(latest_file)
    
    # Also select the CT image linked to the RTSTRUCT
    try: 
        ct_idx = meta_df[meta_df["Series UID"] == linked_ct_uid].iloc[0].name
        selected_rows.append(ct_idx)
    except:
        print(f"{pat} does not have linked CT or RTStruct")
meta_df_clean = meta_df.loc[selected_rows]

In [None]:
checks = [
          'gtv',
         ]
for patient, pat_df in tqdm(meta_df_clean.groupby("Subject ID")):
    
    patient_nii_path = nii_path.joinpath(patient)
    patient_nii_path.mkdir(exist_ok=True, parents=True)

    #Convert the CT Image
    ct_row = pat_df[pat_df["Modality"] == "CT"].iloc[0]
    ct_directory = data_path.joinpath(ct_row["File Location"].replace('\\','/'))
    ct_image = read_dicom_image(ct_directory)
    output_file = patient_nii_path.joinpath("image.nii.gz")
    sitk.WriteImage(ct_image, str(output_file))
  
    # Convert the Structures
    rtstruct_row = pat_df[pat_df["Modality"] == "RTSTRUCT"].iloc[0]
    rtstruct_dir = data_path.joinpath(rtstruct_row["File Location"].replace('\\','/'))
    rtstruct_file = list(rtstruct_dir.glob("*"))[0]
    try:
        convert_rtstruct(ct_directory, rtstruct_file, output_dir=patient_nii_path)
    except:
        print(f"failed: {patient}")
        continue
  
    # Prepare and save the visualisation
    if not np.any(['gtv' in str(s).lower() for s in patient_nii_path.glob("Struct_*.nii.gz")]):
        print(f"failed to visualize: {patient}")
        
    #vis = ImageVisualiser(ct_image)
    #
    #contours = {s.name.split(".")[0].replace("Struct_", ""): sitk.ReadImage(str(s)) for s in patient_nii_path.glob("Struct_*.nii.gz") if np.any([n in str(s).lower() for n in checks])}
    #vis.add_contour(contours)
    #try:
    #    fig = vis.show()
    #except:
    #    print(f"failed to visualize: {patient}")
    #output_file_path = vis_path.joinpath(f"{patient}_vis.png")
    #fig.savefig(output_file_path, dpi=fig.dpi)
    #plt.close()

## list of patients to remove

In [None]:
patients_to_remove = [
'RADCURE-0012',
'RADCURE-0046',
'RADCURE-0055',
'RADCURE-0082',
'RADCURE-0087',
'RADCURE-0430',
'RADCURE-0757',
'RADCURE-0776',
'RADCURE-0781',
'RADCURE-0821',
'RADCURE-0923',
'RADCURE-1011',
'RADCURE-1084',
'RADCURE-1206',
'RADCURE-1230',
'RADCURE-1246',
'RADCURE-1330',
'RADCURE-1364',
'RADCURE-1432',
'RADCURE-1463',
'RADCURE-1493',
'RADCURE-1532',
'RADCURE-1576',
'RADCURE-1582',
'RADCURE-1810',
'RADCURE-1814',
'RADCURE-1873',
'RADCURE-1983',
'RADCURE-1985',
'RADCURE-2006',
'RADCURE-2011',
'RADCURE-2069',
'RADCURE-2070',
'RADCURE-2071',
'RADCURE-2124',
'RADCURE-2214',
'RADCURE-2216',
'RADCURE-2258',
'RADCURE-2273',
'RADCURE-2275',
'RADCURE-2288',
'RADCURE-2295',
'RADCURE-2306',
'RADCURE-2372',
'RADCURE-2695',
'RADCURE-2789',
'RADCURE-2790',
'RADCURE-2809',
'RADCURE-2854',
'RADCURE-2860',
'RADCURE-2869',
'RADCURE-2995',
'RADCURE-3077',
'RADCURE-3418',
'RADCURE-3476',
'RADCURE-3528',
'RADCURE-3585',
'RADCURE-3636',
'RADCURE-3710',
'RADCURE-3733',
'RADCURE-3747',
'RADCURE-3834',
'RADCURE-3849',
'RADCURE-3916',
'RADCURE-3926',
'RADCURE-3933',
'RADCURE-4090',
'RADCURE-4117',
'RADCURE-4130',
]

## Remove extraneous masks and patients

In [None]:
def rmdir(directory):
    directory = Path(directory)
    for item in directory.iterdir():
        if item.is_dir():
            rmdir(item)
        else:
            item.unlink()
    directory.rmdir()

In [None]:
for pat in tqdm(list(nii_path.glob('*'))):
    pat_str = pat.as_posix().split('/')[-1]
    print(pat_str)
    if pat_str in patients_to_remove:
        print(f'    {pat} is missing GTVs, removing corresponding directory')
        rmdir(pat)    
        continue
    print(f'    GTV structures present:')
    for m in pat.glob('*.nii.gz'):
        list_m = list(pat.glob('*.nii.gz'))
        if 'image' in str(m):
            continue
        elif 'gtv' not in str(m).lower():
            m.unlink()
        else:
            print(f'        {m.as_posix().split("/")[-1].replace(".nii.gz","")}')
                

## Resampling code

In [None]:
resampler = sitk.ResampleImageFilter()
resampler.SetOutputDirection([1, 0, 0, 0, 1, 0, 0, 0, 1])
resampling = [1,1,1]
resampler.SetOutputSpacing(resampling)

In [None]:
def get_bouding_boxes(ct, pt):
    """
    Get the bounding boxes of the CT and PT images.
    This works since all images have the same direction
    """

    ct_origin = np.array(ct.GetOrigin())
    pt_origin = np.array(pt.GetOrigin())

    ct_position_max = ct_origin + np.array(ct.GetSize()) * np.array(
        ct.GetSpacing())
    pt_position_max = pt_origin + np.array(pt.GetSize()) * np.array(
        pt.GetSpacing())
    return np.concatenate(
        [
            np.maximum(ct_origin, pt_origin),
            np.minimum(ct_position_max, pt_position_max),
        ],
        axis=0,
    )

In [None]:
def resample_one_patient(p):
    pat_str = p.as_posix().split('/')[-1]
    patient_resample_path = resample_path.joinpath(pat_str)
    patient_resample_path.mkdir(exist_ok=True, parents=True)
    try:
        ct = sitk.ReadImage(p.joinpath('image.nii.gz').as_posix())
    except:
        print(f"    unable to read image file for {pat_str}")
        #os.rmdir(p)
        #os.rmdir(patient_resample_path)
        #print(f"{pat_str} folder removed due to being empty")
        return
    #label = sitk.ReadImage(os.path.join(savePath, p, 'mask_GTVp.nii.gz'))
    bb = get_bouding_boxes(ct, ct)
    size = np.round((bb[3:] - bb[:3]) / resampling).astype(int)
    resampler.SetOutputOrigin(bb[:3])
    resampler.SetSize([int(k) for k in size])  # sitk is so stupid
    resampler.SetInterpolator(sitk.sitkBSpline)
    ct = resampler.Execute(ct)

    #sitk.WriteImage(ct, patient_resample_path.joinpath('image.nii.gz').as_posix())
    resampler.SetInterpolator(sitk.sitkNearestNeighbor)

    mask_sizes = []
    for m in p.glob('*.nii.gz'):
        if 'image' in str(m): continue
        label = sitk.ReadImage(m.as_posix())
        label = resampler.Execute(label)

        label_array = sitk.GetArrayViewFromImage(label)
        label_locations = np.where(label_array > 0)
        mask_sizes.append(np.max(label_locations, axis=1) - np.min(label_locations, axis=1))
        #sitk.WriteImage(label, patient_resample_path.joinpath(m.as_posix().split('/')[-1]).as_posix())
    return mask_sizes

In [None]:
tumor_sizes = [[0,0,0]]
for pat in tqdm(list(nii_path.glob('*'))):
    pat_str = pat.as_posix().split('/')[-1]
    #print(f"{pat_str}")
    #if pat_str in patients_to_drop: continue   
    t_size = resample_one_patient(pat)
    tumor_sizes.extend(t_size)
    

In [None]:
with open(data_path.joinpath('tumor_sizes.pkl'), 'wb') as f:
    pickle.dump(tumor_sizes, f)
    f.close()

In [None]:
len(tumor_sizes)

In [None]:
tumor_sizes = pd.read_pickle(data_path.joinpath('tumor_sizes.pkl'))
tumor_sizes = np.array(tumor_sizes)
#tumor_sizes = np.delete(tumor_sizes, (0), axis=0)

In [None]:
##### tumor_sizes_dict = {}
idx = 0
for pat in tqdm(list(nii_path.glob('*'))):
    pat_str = pat.as_posix().split('/')[-1]
    for m in pat.glob('*.nii.gz'):
        if 'image' in str(m): 
            continue
        m_str = m.as_posix().split('/')[-1].strip('.nii.gz').strip('Struct_')
        tumor_sizes_dict[f"{pat_str}_{m_str}"] = tumor_sizes[idx]
        idx += 1

In [None]:
tumor_sizes_df = pd.DataFrame(tumor_sizes_dict.values(), columns=['z', 'y', 'x'], index=tumor_sizes_dict.keys())
print(tumor_sizes_df)

In [None]:
len(tumor_sizes_df)

In [None]:
tumor_sizes_df[['GTVp' in pat for pat in tumor_sizes_df.index]].idxmax()

In [None]:
tumor_sizes_df[['3746' in pat for pat in tumor_sizes_df.index]] 

## 4. Cropping

In [None]:
def find_centroid(mask, p):

    stats = sitk.LabelShapeStatisticsImageFilter()
    stats.Execute(mask)
    try:
        centroid_coords = stats.GetCentroid(1)
    except:
        print(f'Something is wrong with centroid calculation for patient: {p}')
    centroid_idx = mask.TransformPhysicalPointToIndex(centroid_coords)

    return np.asarray(centroid_idx, dtype=np.float64), np.asarray(centroid_coords, dtype=np.float64)

In [None]:
def tune_range(min_d, max_d, d, size_d, p):
    min_pad = 0
    max_pad = 0
    if min_d<0:
        min_pad = abs(min_d)
        min_d = 0
        #max_d = min_d + d
        #if max_d - size_d > 0:
        #    max_pad = max_d-size_d
            
        #assert (max_d<size_d), f"Cannot extract the patch with the shape {size_d} from the image with the shape {d} for patient {p}."
    
    if max_d>d:
        max_pad = max_d - d
        max_d = d
        #min_d = max_d - size_d
        #if min_d < 0:
        #    min_pad = abs(min_d)
            
        #assert (min_d>0), f"Cannot extract the patch with the shape {size_d} from the image with the shape {d} for patient {p}."

    return min_d, max_d, int(min_pad), int(max_pad)
patients_to_retry = [
    'HNSCC-01-0225',
    'HNSCC-01-0259',
    'HNSCC-01-0272',
    'HNSCC-01-0434',
]
physical_locations = {}
for p_dir in tqdm(list(resample_path.glob('*'))):
    p_str = p_dir.as_posix().split('/')[-1]
    print(p_str)
    #if p_str not in patients_to_retry: continue
    #try:
    #if p_str in patients_to_drop:
    #    print('skip ', p_str)
    #    continue
    patient_patch_path = patch_path.joinpath(p_str)
    patient_patch_path.mkdir(exist_ok=True, parents=True)
    physical_locations[p_str] = {}
    patch_size = np.array([80,80,80])
    for m in p_dir.glob('*.nii.gz'):
        print('-----------------')
        m_str = m.as_posix().split('/')[-1]
        if 'image' in m_str: continue
        #try:
        image = sitk.ReadImage(p_dir.joinpath('image.nii.gz').as_posix())
        mask = sitk.ReadImage(m.as_posix())
        print(m_str)
        #crop the image to patch_size around the tumor center
        tumour_center, center_location = find_centroid(mask, p_str) # center of GTV
        size = patch_size
        min_coords = np.floor(tumour_center - size / 2).astype(np.int64)
        max_coords = np.floor(tumour_center + size / 2).astype(np.int64)
        min_x, min_y, min_z = min_coords
        max_x, max_y, max_z = max_coords
        (img_x, img_y, img_z)=image.GetSize()
        min_x, max_x, min_pad_x, max_pad_x = tune_range(min_x, max_x, img_x, size[0], p_str) 
        min_y, max_y, min_pad_y, max_pad_y = tune_range(min_y, max_y, img_y, size[1], p_str) 
        min_z, max_z, min_pad_z, max_pad_z = tune_range(min_z, max_z, img_z, size[2], p_str) 

        min_pad = int(max([min_pad_x, min_pad_y, min_pad_z]))
        max_pad = int(max([max_pad_x, max_pad_y, max_pad_z]))
        lpad = list([min_pad_x, min_pad_y, min_pad_z])
        upad = list([max_pad_x, max_pad_y, max_pad_z])
        #print(m_str)
        #print(lpad)
        #print(upad)
        print(image.GetSize())
        print(min_coords, max_coords)
        print(min_pad, max_pad)
        image = image[min_x:max_x, min_y:max_y, min_z:max_z]
        # window image intensities to [-500, 1000] HU range
        image = sitk.Clamp(image, sitk.sitkFloat32, -500, 500)
        mask = mask[min_x:max_x, min_y:max_y, min_z:max_z]
        print(image.GetSize())
        image = sitk.ConstantPad(image, lpad, upad, 0.0)
        mask = sitk.ConstantPad(mask, lpad, upad, 0.0)
        print(image.GetSize())
        sitk.WriteImage(image, patient_patch_path.joinpath(f"image_{m_str.replace('Struct_','')}").as_posix())
        sitk.WriteImage(mask, patient_patch_path.joinpath(m_str).as_posix())
        physical_locations[p_str][m_str.replace('Struct_','').replace('.nii.gz','')] = center_location
        del(image)
        del(mask)
        #except:
        #    print(m)
        #    raise Exception('something went wrong...')
    
    #except:
    #    print(p_str)
        
with open(patch_path.joinpath('locations.pkl'), 'wb') as f:
    pickle.dump(physical_locations, f)
    f.close()

## Graph Making

In [None]:
patient_patch_paths = patch_path.glob('*/')
tumor_locations = pd.read_pickle(location_pickle_path)
centered_locations = {}
no_gtvp = []
for pat in tqdm(patient_patch_paths):
    pat_str = pat.as_posix().split('/')[-1]
    if 'locations' in pat_str: continue
    if 'no_gtvp' in pat_str: continue
    print(pat_str)
    centered_locations[pat_str] = {}
    n_tumors = len(tumor_locations[pat_str])
    translation_factor = np.array([0., 0., 0.])
    if n_tumors == 1:
        if 'GTVp' in tumor_locations[pat_str].keys():
            centered_locations[pat_str]['GTVp'] = np.array([0., 0., 0.])
        else:
            centered_locations[pat_str][next(iter(tumor_locations[pat_str].keys()))] = np.array([0., 0., 0.])
            no_gtvp.append(pat_str)
        continue
    else:
        gtvs = tumor_locations[pat_str].keys()
        print(f"    {tumor_locations[pat_str].keys()}")
        if 'GTVp' in tumor_locations[pat_str].keys():
            translation_factor = tumor_locations[pat_str]['GTVp']
        else:
            no_gtvp.append(pat_str)
            print('    no GTVp, choosing highest GTVn in Z')
            array_locs = np.array([val for val in tumor_locations[pat_str].values()])
            origin_idx = np.where(array_locs == np.max(array_locs, axis=0)[2])[0][0]
            translation_factor = array_locs[origin_idx]
    for tumor in tumor_locations[pat_str]:
        centered_locations[pat_str][tumor.replace('.nii.gz','')] = tumor_locations[pat_str][tumor] - translation_factor

with open(edge_path.joinpath('centered_locations_radcure_100324.pkl'), 'wb') as f:
    pickle.dump(centered_locations, f)
    f.close()

In [None]:
with open(patch_path.joinpath('patients_with_no_gtvp.pkl'), 'wb') as f:
    pickle.dump(no_gtvp, f)
    f.close()

In [None]:
# r = sqrt(x^2 + y^2 + z^2)
# theta = atan2 ( sqrt(x^2+y^2) / z) accounting for different quadrants (make sure to use atan2 not atan)
# phi = atan2 (y/x) 

spherical_locations = {}
for pat, locs in centered_locations.items():
    spherical_locations[pat] = {}
    for gtv, l in locs.items():
        if np.all([not(l[0]), not(l[1]), not(l[2])]): 
            #print(f'origin: {pat}, {gtv}')
            spherical_locations[pat][gtv] = np.array([0.,0.,0.])
            continue
        radius = np.sqrt(l[0]**2+l[1]**2+l[2]**2)
        theta = np.arctan2(np.sqrt(l[0]**2+l[1]**2), l[2])
        phi = np.arctan2(l[1], l[0])
        if phi < 0 and abs(phi) > np.pi/2:
            phi = phi + 2*np.pi
        spherical_locations[pat][gtv] = np.array([radius, theta, phi])

look into once starting training:
Make the CTs into an object containing vertex objects that store position/volume information. Within this object you then loop through all nodes and find possible nearby connections

In [None]:
dict_edges = {}
for pat in tqdm(spherical_locations.keys()):
    #if '0628' not in pat: continue
    #patient_plot_path = plot_path.joinpath(pat)
    #patient_plot_path.mkdir(exist_ok=True, parents=True)
    print(f"Processing patient: {pat}")
    pat_locs = spherical_locations[pat]
    if len(pat_locs) == 1:
        print("    one node, empty edge array")
        dict_edges[pat] = []
        continue
    df_pat, primary = gm.make_loc_df(pat_locs)
    if len(pat_locs) == 2 and len(df_pat) == 1:
        print("    two nodes, single edge entry")
        print(f"edge: [{primary.index[0]}, {df_pat.index[0]}]")
        dict_edges[pat] = [[primary.index[0], df_pat.index[0]]]
        continue
    clust_model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
    clust_model = clust_model.fit(df_pat[['x', 'y', 'z']])
    node_tree = gm.create_node_tree(clust_model.children_, df_pat)
    connections = gm.create_connection_tree(node_tree)
    print(connections)
    edges = gm.make_edges(connections, df_pat, primary.index[0])
    dict_edges[pat] = edges
    print(edges)
    
    #plt.ion()
    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    points = []
    label = []
    for gtv in df_pat.index:
        points.append([df_pat.loc[gtv]['x'], df_pat.loc[gtv]['y'], df_pat.loc[gtv]['z']])
        label.append(gtv)
    for gtv in primary.index:
        points.append([primary.loc[gtv]['x'], primary.loc[gtv]['y'], primary.loc[gtv]['z']])
        label.append(gtv)
        
    points = np.array(points)
    ax.scatter(points[:,0], points[:,1], points[:,2])
    for i, l in enumerate(label):
        ax.text(points[i, 0], points[i, 1], points[i, 2], l)
    
    edge_points = []
    for e in edges:
        edge_points.append([points[label.index(e[0])], points[label.index(e[1])]])
    
    for e in edge_points:
        ex_diff = e[1][0] - e[0][0]
        ey_diff = e[1][1] - e[0][1]
        ez_diff = e[1][2] - e[0][2]
        ax.quiver(e[0][0], e[0][1], e[0][2], ex_diff, ey_diff, ez_diff, color='r')
    plt.savefig(plot_path.joinpath(f'connections_3D_{pat}.pdf'))
    #plt.show()
    plt.close()
    gc.collect()


In [None]:
with open(edge_path.joinpath('edges_radcure_053024.pkl'), 'wb') as f:
    pickle.dump(dict_edges, f)
    f.close()

In [None]:
patient_list = [pat.as_posix().split('/')[-1] for pat in patch_path.glob('*/')]
for p in patient_list:
    print(p)

In [None]:
print(len(patient_list))

## New Edge creation

In [None]:
patient_patch_paths = patch_path.glob('*/')
tumor_locations = pd.read_pickle(location_pickle_path)
centered_locations = {}
no_gtvp = []
for pat in tqdm(patient_patch_paths):
    pat_str = pat.as_posix().split('/')[-1]
    if 'locations' in pat_str: continue
    if 'no_gtvp' in pat_str: continue
    print(pat_str)
    centered_locations[pat_str] = {}
    n_tumors = len(tumor_locations[pat_str])
    translation_factor = np.array([0., 0., 0.])
    if n_tumors == 1:
        if 'GTVp' in tumor_locations[pat_str].keys():
            centered_locations[pat_str]['GTVp'] = np.array([0., 0., 0.])
        else:
            centered_locations[pat_str][next(iter(tumor_locations[pat_str].keys()))] = np.array([0., 0., 0.])
            no_gtvp.append(pat_str)
        continue
    else:
        gtvs = tumor_locations[pat_str].keys()
        print(f"    {tumor_locations[pat_str].keys()}")
        if 'GTVp' in tumor_locations[pat_str].keys():
            translation_factor = tumor_locations[pat_str]['GTVp']
        else:
            no_gtvp.append(pat_str)
            print('    no GTVp, choosing highest GTVn in Z')
            array_locs = np.array([val for val in tumor_locations[pat_str].values()])
            origin_idx = np.where(array_locs == np.max(array_locs, axis=0)[2])[0][0]
            translation_factor = array_locs[origin_idx]
    for tumor in tumor_locations[pat_str]:
        centered_locations[pat_str][tumor.replace('.nii.gz','')] = tumor_locations[pat_str][tumor] - translation_factor

#with open(edge_path.joinpath('centered_locations_radcure_100324.pkl'), 'wb') as f:
#    pickle.dump(centered_locations, f)
#    f.close()

In [None]:
patient_graphs = {}
edge_dict = {}
for pat in centered_locations.keys():
    patient_graphs[pat] = nx.DiGraph(directed=True)
    edges_for_nx = []
    nodes = list(centered_locations[pat].keys())
    node_pos = list(centered_locations[pat].values())
    n_nodes = len(nodes)
    if n_nodes < 2:
        edges_for_nx.extend([(nodes[0], nodes[0])])
    else:
        n_neighbors = n_nodes-1 if n_nodes <= 3 else 3
        edge_list = sklearn.neighbors.kneighbors_graph(node_pos, n_neighbors).toarray()
        for node_idx, node_name in enumerate(nodes):
            #edges_for_nx.extend([(nodes[node_idx], nodes[jdx]) for jdx in range(len(edge_list[node_idx])) if edge_list[node_idx][jdx]])
            edges_for_nx.extend([(nodes[node_idx], nodes[jdx]) for jdx in range(len(edge_list[node_idx]))])

    patient_graphs[pat].add_edges_from(edges_for_nx)

        

In [None]:
patient_graphs['RADCURE-0006'].edges

In [None]:
with open(edge_path.joinpath('proto_complete_graphs_100424.pkl'), 'wb') as f:
    pickle.dump(patient_graphs, f)
    f.close()

## Basic radiomic feature extraction

In [None]:
rad_dict = {}
for pat in tqdm(list(nii_path.glob('*'))):
    pat_str = pat.as_posix().split('/')[-1]
    for m in pat.glob('*.nii.gz'):
        if 'image' in str(m):
            continue
        m_str = m.as_posix().split('/')[-1].strip('.nii.gz').strip('Struct_')
        key_name = f"{pat_str}__{m_str}"
        rad_dict[key_name] = {}
        rad_dict[key_name]['Image'] = m.as_posix().replace(m.as_posix().split('/')[-1], 'image.nii.gz')
        rad_dict[key_name]['Mask'] = m.as_posix()

In [None]:
rad_df = pd.DataFrame.from_dict(rad_dict, orient='index')

In [None]:
print(list(range(0,16000, 1000)))

In [None]:
rad_df.iloc[15000:16000]

In [None]:
data_path

In [None]:
import subprocess

for idx in range(8000, 9000, 1000):
    rad_df.iloc[idx:idx+1000].to_csv(data_path.joinpath('proto_radiomics.csv'))
    command = [
        "pyradiomics",
        data_path.joinpath('proto_radiomics.csv').as_posix(),
        "-o", data_path.joinpath(f"radiomics_part_{idx}.csv").as_posix(),
        "-f", "csv",
        "--param", './hnc_project/radiomics/pyradiomics_param.yaml',
    ]
    subprocess.run(command)

In [None]:
rad_df.index

In [None]:
radiomics.setVerbosity(20)
extractor = featureextractor.RadiomicsFeatureExtractor()
extractor.enableImageTypeByName('Wavelet')
print(extractor.settings)
print(extractor.enabledImagetypes)
print(extractor.enabledFeatures)

In [None]:
patient_patch_paths = patch_path.glob('*/')
for pat in patient_patch_paths:
    pat_str = pat.as_posix().split('/')[-1]
    print(pat_str)

    patches = pat.glob('image*.nii.gz')
    features_to_keep = {}
    for p in patches:
        p_name = p.as_posix().split('_')[-1].replace('.nii.gz','')
        print(f"    {p_name}")
        image = p.as_posix()
        mask = p.as_posix().replace('image', 'Struct')
        features = extractor.execute(image, mask)
        features_to_keep[p_name] = {key: value for key, value in features.items() if key.startswith('original')}
        
    with open(radiomics_path.joinpath(f"features_{pat_str}.pkl"), 'wb') as f:
        pickle.dump(features_to_keep, f)        
        f.close()
      
 

## testing dataset class

In [None]:
os.getcwd()

In [None]:
from hnc_project.pytorch.gen_params_torch_cfg import model_config
test_model = RunModel(model_config)

In [None]:
test_dataset = dc.DatasetGeneratorImage(test_model.config)