In [None]:
# DICOM Viewing, Anonymization, and Format Conversion
import os
import pydicom
import SimpleITK as sitk

# --- Step 1: Anonymization of DICOM Files ---
dcm_folder = input("Enter the path to your DICOM folder: ").strip()
file_list = os.listdir(dcm_folder)
anonymized_folder = dcm_folder  # Overwrite in place. For safety, set a new path if desired.
if not os.path.exists(anonymized_folder):
    os.makedirs(anonymized_folder)
for file in file_list:
    file_path = os.path.join(dcm_folder, file)
    ds = pydicom.dcmread(file_path, force=True)
    ds.PatientName = 'XXX'
    ds.PatientID = '0000'
    ds.save_as(os.path.join(anonymized_folder, file))
print("DICOM files anonymized.")

# --- Step 2: DICOM to NIfTI Format Conversion ---
reader = sitk.ImageSeriesReader()
dicom_names = reader.GetGDCMSeriesFileNames(anonymized_folder)
reader.SetFileNames(dicom_names)
image = reader.Execute()
output_nii = anonymized_folder + '.nii'
sitk.WriteImage(image, output_nii)
print(f"DICOM series converted to {output_nii}")

# --- Step 3: Image Metadata Extraction ---
size = image.GetSize()       # (x, y, z)
origin = image.GetOrigin()   # (x, y, z)
spacing = image.GetSpacing() # (x, y, z)
direction = image.GetDirection() # (x, y, z)
print(f"Image size: {size}")
print(f"Image origin: {origin}")
print(f"Image spacing: {spacing}")
print(f"Image direction: {direction}")

# --- Step 4: Convert Image to NumPy Array ---
image_arr = sitk.GetArrayFromImage(image)  # (z, y, x)
print(f"Array shape (z, y, x): {image_arr.shape}")

In [None]:
# Radiomics feature extraction
import os
import SimpleITK as sitk
import pandas as pd
from radiomics import featureextractor

# ----------- Image Preprocessing Settings -----------
base_path = input("Enter the base path to your image folders: ").strip()
settings = {
    'binWidth': 25,
    'resampledPixelSpacing': [1, 1, 1],  # unit: mm
    'interpolator': sitk.sitkNearestNeighbor,
    'normalize': True
}

# ----------- Single Image Feature Extraction Example -----------
image_file = os.path.join(base_path, 'image.nrrd')
mask_file = os.path.join(base_path, 'label.nrrd')
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
feature_vector = extractor.execute(image_file, mask_file)
for feature_name in feature_vector.keys():
    print(f"{feature_name}: {feature_vector[feature_name]}")

# ----------- Batch Extraction of Radiomics Features -----------
folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
print("Found folders:", folders)
df = pd.DataFrame()
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
for folder in folders:
    folder_path = os.path.join(base_path, folder)
    files = os.listdir(folder_path)
    image_file = mask_file = None
    for file in files:
        if file.endswith('image.nrrd'):
            image_file = os.path.join(folder_path, file)
        elif file.endswith('label.nrrd'):
            mask_file = os.path.join(folder_path, file)
    if image_file and mask_file:
        print(f"Processing: {image_file}, {mask_file}")
        extractor.enableImageTypes(Original={}, LoG={"sigma": [4.0]}, Wavelet={})
        feature_vector = extractor.execute(image_file, mask_file)
        df_new = pd.DataFrame(feature_vector.values()).T
        df_new.columns = feature_vector.keys()
        df = pd.concat([df, df_new], ignore_index=True)
    else:
        print(f"Warning: Missing image or mask file in folder {folder}")

# Save extracted features to Excel
output_file = os.path.join(base_path, 'portal_original.xlsx')
df.to_excel(output_file, index=False)
print(f"Radiomics features saved to {output_file}")

In [None]:
# Radiomics feature selection through ICC > 0.75
import pandas as pd
import numpy as np
from pingouin import intraclass_corr

# Prompt the user for file paths
icc_file_paths = []
for i, desc in enumerate(["inter-observer Excel file", "intra-observer Excel file", "original feature Excel file"], 1):
    path = input(f"Enter path to {desc}: ").strip()
    icc_file_paths.append(path)

# Read feature data from the three Excel files
dataframes = [pd.read_excel(file) for file in icc_file_paths]

# Assume all Excel files have the same format and feature columns
# Combine all data into a single DataFrame
combined_data = pd.concat(dataframes, keys=range(len(dataframes)), names=['session', 'index'])

# Calculate ICC for each feature
icc_results = []
for feature in combined_data.columns:
    if feature not in ['session', 'index']:
        icc_data = combined_data.reset_index().pivot(index='index', columns='session', values=feature).reset_index()
        icc_data.columns.name = None
        icc_data = icc_data.melt(id_vars='index', var_name='session', value_name='measurement')
        icc = intraclass_corr(data=icc_data, targets='index', raters='session', ratings='measurement')
        icc_value = icc['ICC'][1]  # ICC(2,1)
        icc_results.append((feature, icc_value))

# Print all ICC values and their range
icc_df = pd.DataFrame(icc_results, columns=['Feature', 'ICC'])
print("All ICC values:")
print(icc_df)

icc_min = icc_df['ICC'].min()
icc_max = icc_df['ICC'].max()
print(f"\nICC range: {icc_min} to {icc_max}")

# Count features with ICC > 0.75
stable_features = icc_df[icc_df['ICC'] > 0.75]
num_stable_features = len(stable_features)
print(f"\nNumber of features with ICC > 0.75: {num_stable_features}")

# Print features with ICC > 0.75
print("\nFeatures with ICC > 0.75:")
print(stable_features)

# Extract stable feature names
stable_feature_names = stable_features['Feature'].tolist()

# Ask user for the path to the new Excel file to be filtered
new_file_path = input("Enter path to the new Excel file to filter: ").strip()
new_df = pd.read_excel(new_file_path)

# Keep only selected features
filtered_df = new_df[stable_feature_names]

# Ask user for the output file path
output_file_path = input("Enter output Excel file path for filtered features: ").strip()
filtered_df.to_excel(output_file_path, index=False)

print(f"Filtered features saved to {output_file_path}")