In [5]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import (
    RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier,
    VotingClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

In [6]:

# === Paths ===
file_path = r"D:/research/cd-ml/H/converted_nii/radiomics_features_all.xlsx"
output_path = r"D:/research/cd-ml/H/converted_nii/fusion_patient_probabilities.xlsx"

# === Selected Features ===
selected_features = [
    "log-sigma-1-0-mm-3D_firstorder_Uniformity",
    "log-sigma-1-0-mm-3D_glcm_Idm",
    "log-sigma-1-0-mm-3D_glcm_MaximumProbability",
    "log-sigma-1-0-mm-3D_glrlm_GrayLevelNonUniformityNormalized",
    "log-sigma-1-0-mm-3D_glrlm_RunLengthNonUniformityNormalized",
    "log-sigma-1-0-mm-3D_glrlm_ShortRunEmphasis",
    "log-sigma-1-0-mm-3D_glszm_SmallAreaEmphasis",
    "log-sigma-3-0-mm-3D_glszm_SmallAreaEmphasis",
    "wavelet-LL_glszm_SizeZoneNonUniformityNormalized",
    "wavelet-LL_glszm_SmallAreaEmphasis"
]

# === Models for Fusion ===
base_models = [
    ("RF", RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_split=5, min_samples_leaf=4, random_state=20)),
    ("LR", LogisticRegression(max_iter=2000, C=0.5, penalty='l2')),
    ("SVM", SVC(C=0.5, kernel='rbf', gamma='scale', probability=True)),
    ("DT", DecisionTreeClassifier(max_depth=4, min_samples_leaf=5, random_state=42)),
    ("KNN", KNeighborsClassifier(n_neighbors=7, weights='distance')),
    ("NB", GaussianNB()),
    ("GBDT", GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, subsample=0.8, random_state=20)),
    ("AdaBoost", AdaBoostClassifier(n_estimators=100, learning_rate=0.8, random_state=20)),
    ("XGBoost", XGBClassifier(use_label_encoder=False, eval_metric="logloss", n_estimators=100, max_depth=3,
                              learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, reg_alpha=0.5, reg_lambda=1.0))
]

In [7]:

# === Load Excel Sheets ===
df_pet = pd.read_excel(file_path, sheet_name="PET")
df_ct = pd.read_excel(file_path, sheet_name="CT")

# === Task list: (sheet name, dataframe, label name, label column index) ===
tasks = [
    ("PET", df_pet, "progression", -2),
    ("PET", df_pet, "death", -1),
    ("CT", df_ct, "progression", -2),
    ("CT", df_ct, "death", -1),
]

In [8]:

# === Collect all results ===
all_results = []

for dataset_name, df, label_name, label_col_idx in tasks:
    print(f"🔍 Processing: {dataset_name} - {label_name}")
    
    # Extract features, label, and patient column (third from last)
    X = df[selected_features].copy()
    y = df.iloc[:, label_col_idx]
    patient_col = df.iloc[:, -3]  # third from the end

    # Filter valid data
    valid_idx = y.notnull() & X.notnull().all(axis=1)
    X = X[valid_idx]
    y = y[valid_idx]
    patients = patient_col[valid_idx].reset_index(drop=True)

    # Normalize
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Train fusion model
    fusion_model = VotingClassifier(estimators=base_models, voting="soft")
    fusion_model.fit(X_scaled, y)
    probs = fusion_model.predict_proba(X_scaled)[:, 1]  # probability of positive class

    # Save result
    result = pd.DataFrame({
        "patient": patients,
        "dataset": dataset_name,
        "label": label_name,
        "true_label": y.values,
        "fusion_probability": probs
    })
    all_results.append(result)

# === Write final output ===
final_df = pd.concat(all_results, ignore_index=True)
final_df.to_excel(output_path, index=False)
print(f"\n✅ Fusion probabilities saved to: {output_path}")


🔍 Processing: PET - progression


Parameters: { "use_label_encoder" } are not used.



🔍 Processing: PET - death


Parameters: { "use_label_encoder" } are not used.



🔍 Processing: CT - progression


Parameters: { "use_label_encoder" } are not used.



🔍 Processing: CT - death


Parameters: { "use_label_encoder" } are not used.




✅ Fusion probabilities saved to: D:/research/cd-ml/H/converted_nii/fusion_patient_probabilities.xlsx
