# Delta Radiomics

In [10]:
import pandas as pd
import numpy as np
import os
import yaml

In [15]:
def calculate_delta_radiomics(data_folder_path):
    """
    Reads radiomics data from subfolders (Time A and Time B), filters for 'suv2.5' 
    segmentation, calculates the delta (B - A) for numeric features, and stores
    the results in a dictionary per patient.

    Args:
        data_folder_path (str): The path to the main folder containing patient subfolders.

    Returns:
        (pd.DataFrame, pd.DataFrame, pd.DataFrame):
            delta_df: Delta radiomics (B - A), patients as index, features as columns.
            A_df: Radiomics at time A, same shape.
            B_df: Radiomics at time B, same shape.
    """
    all_delta_radiomics = {}
    A_radiomics, B_radiomics = {}, {}

    # 1. Iterate through all items in the main data folder
    for patient_folder_name in os.listdir(data_folder_path):
        patient_path = os.path.join(data_folder_path, patient_folder_name)
        
        # Ensure it is actually a directory (a patient folder)
        if os.path.isdir(patient_path):
            print(f"--- Processing {patient_folder_name} ---")
            
            # Initialize paths for Time A and Time B files
            file_A_path = None
            file_B_path = None
            
            # 2. Find the radiomics files for Time A and Time B in the patient folder
            for filename in os.listdir(patient_path):
                path_excel = os.path.join(patient_path, filename)

                # Assuming filenames contain '_A' or '_B' (case-insensitive) + .xlsx
                upper_name = path_excel.upper()
                if '_A' in upper_name and path_excel.endswith('.xlsx'):
                    file_A_path = path_excel
                elif '_B' in upper_name and path_excel.endswith('.xlsx'):
                    file_B_path = path_excel

            if file_A_path and file_B_path:
                try:
                    # 3. Read and preprocess the data
                    df_A = pd.read_excel(file_A_path)
                    df_B = pd.read_excel(file_B_path)
                    
                    # 4. Filter for the 'suv2.5' segmentation row, take columns from 23 onwards
                    row_A = df_A[df_A['Segmentation'].str.contains('suv2.5')].iloc[0, 23:]
                    row_B = df_B[df_B['Segmentation'].str.contains('suv2.5')].iloc[0, 23:]

                    # 5. Convert to numeric, coercing errors to NaN
                    numeric_A = pd.to_numeric(row_A, errors='coerce')
                    numeric_B = pd.to_numeric(row_B, errors='coerce')

                    # 6. Calculate Delta Radiomics (Time B - Time A)
                    delta_radiomics = numeric_B - numeric_A
                    
                    # Store as dicts, dropping NaNs
                    all_delta_radiomics[patient_folder_name] = delta_radiomics.dropna().to_dict()
                    A_radiomics[patient_folder_name] = numeric_A.dropna().to_dict()
                    B_radiomics[patient_folder_name] = numeric_B.dropna().to_dict()

                    print(f"Successfully calculated radiomics and delta radiomics for {patient_folder_name}.")

                except Exception as e:
                    print(f"Error processing files for {patient_folder_name}: {e}")
            else:
                print(f"Could not find both A and B files in {patient_folder_name}.")

    # Convert dicts to DataFrames (patients = rows, features = columns)
    A_df = pd.DataFrame.from_dict(A_radiomics, orient='index')
    B_df = pd.DataFrame.from_dict(B_radiomics, orient='index')
    delta_df = pd.DataFrame.from_dict(all_delta_radiomics, orient='index')

    return delta_df, A_df, B_df

In [19]:
with open("config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

# Extract the actual path STRING from the config
data_folder_path = cfg["paths"]["data_folder"]

# Run the function with a string path, NOT the whole dict
delta_radiomics_results, a_radiomics, b_radiomics = calculate_delta_radiomics(data_folder_path)

# ---- PRINT SUMMARY ----
print("\n--- Final Results Summary ---")
for patient, row in delta_radiomics_results.iterrows():
    # row is a Series of feature values for that patient
    non_na = row.dropna()
    print(f"\n{patient} Delta Radiomics ({len(non_na)} features):")
    print(non_na.head().to_dict())  # first 5 features

--- Processing 015 ---
Successfully calculated radiomics and delta radiomics for 015.
--- Processing 046 ---
Successfully calculated radiomics and delta radiomics for 046.
--- Processing 048 ---
Successfully calculated radiomics and delta radiomics for 048.
--- Processing 077 ---
Successfully calculated radiomics and delta radiomics for 077.
--- Processing 070 ---
Successfully calculated radiomics and delta radiomics for 070.
--- Processing 013 ---
Successfully calculated radiomics and delta radiomics for 013.
--- Processing 014 ---
Successfully calculated radiomics and delta radiomics for 014.
--- Processing 022 ---
Successfully calculated radiomics and delta radiomics for 022.
--- Processing 047 ---
Successfully calculated radiomics and delta radiomics for 047.
--- Processing 007 ---
Successfully calculated radiomics and delta radiomics for 007.
--- Processing 009 ---
Successfully calculated radiomics and delta radiomics for 009.
--- Processing 031 ---
Successfully calculated radiomi

In [20]:
delta_radiomics_results

Unnamed: 0,MeshVolume (cc),Volume (cc),Compactness1,Compactness2,Elongation,Flatness,LeastAxisLength,MajorAxisLength,Maximum2DDiameterColumn,Maximum2DDiameterRow,...,glrlm_LongRunLowGrayLevelEmphasis,glrlm_LowGrayLevelRunEmphasis,glrlm_RunEntropy,glrlm_RunLengthNonUniformity,glrlm_RunLengthNonUniformityNormalized,glrlm_RunPercentage,glrlm_RunVariance,glrlm_ShortRunEmphasis,glrlm_ShortRunHighGrayLevelEmphasis,glrlm_ShortRunLowGrayLevelEmphasis
15,49.867963,49.214353,0.016168,0.256642,0.266914,0.405557,-0.581837,-299.663486,-166.887134,-267.583392,...,,,,,,,,,,
46,81.359002,81.531557,0.00445,0.05048,0.116233,0.059981,-53.057164,-718.377143,-106.43608,-167.299933,...,,,,,,,,,,
48,132.95964,134.90532,-0.003025,-0.029388,-0.000437,0.012482,16.392367,101.549356,0.102906,24.886956,...,,,,,,,,,,
77,62.073205,61.188505,0.012091,0.27319,0.263203,0.349453,19.774698,-36.960332,-28.05052,-31.14318,...,91.455757,0.0,1.217388,26.828424,-0.119355,-0.197984,13.049563,-0.193916,-0.193916,-0.193916
70,-1857.982951,-1865.534346,-0.00259,-0.022411,0.178294,0.071478,18.907248,-127.680835,-720.751501,-693.996017,...,-137.743053,-0.012458,-0.989757,-8.304674,0.060624,0.093988,-50.938508,0.045758,0.081561,0.036807
13,282.473562,285.526503,-0.015736,-0.255738,0.442808,-0.060073,26.527685,124.898852,64.273602,104.376945,...,-7.80773,0.042759,-0.268261,153.580232,0.007379,0.014061,-1.636574,-0.009532,-0.101302,0.013411
14,-618.581947,-619.5321,-0.001067,-0.007992,0.153009,-0.070289,-34.663123,144.560198,68.070835,63.707567,...,,,,,,,,,,
22,-1760.492863,-1745.137872,-0.000683,-0.003,0.113102,0.155583,-7.834522,-259.682419,-251.945347,-282.016102,...,,,,,,,,,,
47,124.20569,126.635013,-0.011014,-0.145086,0.269972,0.144158,33.857741,24.010052,270.764063,288.724811,...,,,,,,,,,,
7,-338.856476,-53.558539,-0.000507,-0.00037,0.071425,0.016399,20.010696,30.697467,-81.079674,-33.35876,...,-11.045203,-0.001383,-0.419821,5728.933845,0.064305,0.09378,-5.936345,0.076732,0.083133,0.07546


In [21]:
# Clean and prepare dataframes
# by dropping columns with any NaN values and resetting index
# to keep only the complete cases (some patients have 99 columns with NaNs, but 43 are always present)
# we'll work with those 43.
for df in [delta_radiomics_results, a_radiomics, b_radiomics]:
    df.dropna(axis=1, how='any', inplace=True)
    df.reset_index(inplace=True)
    df.rename(columns={'index': 'id'}, inplace=True)
    df['id'] = df['id'].astype(int)

In [22]:
# to differentiate the columns of A and B datasets
a_radiomics = a_radiomics.add_suffix('_a')

In [23]:
a_radiomics.head()

Unnamed: 0,id_a,MeshVolume (cc)_a,Volume (cc)_a,Compactness1_a,Compactness2_a,Elongation_a,Flatness_a,LeastAxisLength_a,MajorAxisLength_a,Maximum2DDiameterColumn_a,...,SUV_StandardDeviation_a,SUV_TotalEnergy_a,SUV_Uniformity_a,SUV_Variance_a,TLG_a,Number of lesions_a,Dmax Patient (mm)_a,Spread Patient (mm)_a,Dmax Bulk (mm)_a,Spread Bulk (mm)_a
0,15,221.214992,222.556487,0.014253,0.072181,0.473212,0.140572,56.523435,402.096359,273.123144,...,0.670973,2523821.0,1.0,0.450205,734.434052,4.0,530.927813,1122.497606,530.927813,1122.497606
1,46,16.306867,17.72892,0.013737,0.067051,0.120633,0.108154,122.121675,1129.145457,582.706796,...,2.084291,365293.3,1.0,4.344271,71.489771,11.0,1091.820444,7430.718599,1091.820444,7430.718599
2,48,110.32296,112.73328,0.015185,0.08193,0.187392,0.09539,41.559987,435.685464,552.831991,...,2.631478,3952316.0,1.0,6.924674,597.957639,13.0,1063.58772,9799.746563,1063.58772,4069.22703
3,77,33.362503,34.751743,0.02575,0.235598,0.309269,0.188114,21.34048,113.444101,100.33121,...,0.854167,509565.5,1.0,0.729602,129.71955,1.0,0.0,0.0,0.0,0.0
4,70,2089.485878,2101.476888,0.013472,0.064485,0.210901,0.152324,101.187908,664.292216,1470.812246,...,5.636118,232879200.0,0.999858,31.765827,18684.376646,21.0,1964.433198,24724.466747,1346.149563,14381.983054


In [24]:
b_radiomics = b_radiomics.add_suffix('_b')

In [25]:
b_radiomics.head()

Unnamed: 0,id_b,MeshVolume (cc)_b,Volume (cc)_b,Compactness1_b,Compactness2_b,Elongation_b,Flatness_b,LeastAxisLength_b,MajorAxisLength_b,Maximum2DDiameterColumn_b,...,SUV_StandardDeviation_b,SUV_TotalEnergy_b,SUV_Uniformity_b,SUV_Variance_b,TLG_b,Number of lesions_b,Dmax Patient (mm)_b,Spread Patient (mm)_b,Dmax Bulk (mm)_b,Spread Bulk (mm)_b
0,15,271.082955,271.77084,0.030421,0.328823,0.740126,0.546129,55.941599,102.432873,106.236011,...,0.762871,4260973.0,1.0,0.581973,1055.946947,1.0,0.0,0.0,0.0,0.0
1,46,97.66587,99.260477,0.018188,0.11753,0.236866,0.168135,69.06451,410.768314,476.270716,...,9.492439,25502770.0,0.734631,90.106402,1282.041848,7.0,741.104645,2741.335065,395.708306,1335.079507
2,48,243.2826,247.6386,0.012161,0.052543,0.186955,0.107872,57.952354,537.23482,552.934897,...,2.792084,11997230.0,1.0,7.79573,1578.893332,13.0,1060.465092,8903.999307,1060.465092,4926.875652
3,77,95.435707,95.940248,0.037841,0.508788,0.572472,0.537567,41.115178,76.483769,72.28069,...,2.407237,4011490.0,1.0,5.79479,575.782092,1.0,0.0,0.0,0.0,0.0
4,70,231.502927,235.942542,0.010882,0.042074,0.389195,0.223803,120.095156,536.611381,750.060745,...,6.056244,25785460.0,0.989585,36.678095,2010.487193,21.0,1207.871635,15972.127667,810.596971,5661.262295


In [26]:
for patient, delta_data in delta_radiomics_results.items():
    if len(delta_data) == 99:
        print(patient)

In [27]:
filtered_results = {patient: data for patient, data in delta_radiomics_results.items() if len(data) != 99}

In [28]:
len(filtered_results)

44

In [29]:
for patient, delta_data in delta_radiomics_results.items():
        print(patient)

id
MeshVolume (cc)
Volume (cc)
Compactness1
Compactness2
Elongation
Flatness
LeastAxisLength
MajorAxisLength
Maximum2DDiameterColumn
Maximum2DDiameterRow
Maximum2DDiameterSlice
Maximum3DDiameter
MinorAxisLength
SphericalDisproportion
Sphericity
SurfaceArea
SurfaceVolumeRatio (cc)
SUV_10Percentile
SUV_90Percentile
SUV_Energy
SUV_Entropy
SUV_InterquartileRange
SUV_Kurtosis
SUV_Maximum
SUV_MeanAbsoluteDeviation
SUV_Mean
SUV_Median
SUV_Peak
SUV_Minimum
SUV_Range
SUV_RobustMeanAbsoluteDeviation
SUV_RootMeanSquared
SUV_Skewness
SUV_StandardDeviation
SUV_TotalEnergy
SUV_Uniformity
SUV_Variance
TLG
Number of lesions
Dmax Patient (mm)
Spread Patient (mm)
Dmax Bulk (mm)
Spread Bulk (mm)


# Clinical Data

In [31]:
with open("config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

data_folder_path = cfg["paths"]["data_folder"]
clinical_path = cfg["paths"]["clinical_data"]

clinic_data = pd.read_excel(clinical_path)