In [None]:
##### Feature Visualization and PCA/LDA with Optional Polynomial Fit #####

# Install libraries:
# 1 - Create a new environment (type in Terminal): python -m venv venv
# 2 - cd venv\scripts
# 3 - ./activate.ps1
# 4 - pip install pandas numpy matplotlib seaborn scikit-learn ipywidgets openpyxl ipykernel

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.feature_selection import mutual_info_classif
import ipywidgets as widgets
from IPython.display import display, clear_output
from itertools import combinations

# --- CONFIG ---
default_excel = "Final_Feature_Excel_File.xlsx"
candidate_features = [
    "areashape_maximumradius", "areashape_meanradius", "areashape_equivalentdiameter",
    "areashape_minoraxislength", "areashape_eccentricity", "intensity_integratedintensity_input_microtubule",
    "areashape_area", "areashape_minferetdiameter", "intensity_integratedintensity_input_septin",
    "texture_infomeas1_input_microtubule_3_03_256", "texture_correlation_input_microtubule_3_01_256", 
    "texture_infomeas2_input_microtubule_3_03_256", "texture_correlation_input_septin_3_03_256", 
    "texture_differenceentropy_input_septin_3_00_256", "intensity_massdisplacement_input_microtubule", 
    "texture_angularsecondmoment_input_microtubule_3_03_256", "texture_contrast_input_microtubule_3_00_256",
    "septin_delta_ef", "diama_micron", "diamm_micron", "delta_diam2", "ratio_diam", "abs_key", 
    "dx_septin", "delta_diam", "diam_septin_ring", "balance_microtubule", "balance_septin3"
    ]
output_dir = "PCA_feature_combos"
os.makedirs(output_dir, exist_ok=True)

# --- Load Excel ---
def load_feature_excel(file_path):
    df = pd.read_excel(file_path)
    df = df.dropna(subset=candidate_features + ["stage_key"])
    return df

# Color palette
colors_nadja = ['#332288','#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77']


# --- PCA Plot ---
def plot_pca(df, selected_features, n_components, pc_x, pc_y, show_polyfit=False, poly_degree=3):
    X = df[selected_features].values
    X_scaled = (X - X.mean(axis=0)) / X.std(axis=0)
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X_scaled)
    df["PCx"] = X_pca[:, pc_x]
    df["PCy"] = X_pca[:, pc_y]

    plt.figure(figsize=(7, 6))
    sns.scatterplot(data=df, x="PCx", y="PCy", hue="stage_key", palette=colors_nadja, s=25, alpha=0.8)
    plt.title(f"PCA: PC{pc_x} vs PC{pc_y}")

    if show_polyfit:
        xx = df["PCx"].values
        yy = df["PCy"].values
        fit = np.poly1d(np.polyfit(xx, yy, deg=poly_degree))
        xxx = np.linspace(np.min(xx), np.max(xx), 300)
        plt.plot(xxx, fit(xxx), linestyle='--', color='black', label=f"PolyFit deg={poly_degree}")
        plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"PCA_PC{pc_x}_vs_PC{pc_y}.svg"))
    plt.show()


# --- LDA Plot ---
def plot_lda(df, selected_features, apply_polyfit=True, poly_degree=3):
    X = df[selected_features].values
    y = df["stage_key"]
    lda = LDA(n_components=2)
    X_lda = lda.fit_transform(X, y)
    df["LD1"], df["LD2"] = X_lda[:, 0], X_lda[:, 1]

    plt.figure(figsize=(7, 6))
    sns.scatterplot(data=df, x="LD1", y="LD2", hue="stage_key", palette=colors_nadja, s=25, alpha=0.8)
    plt.title("LDA projection")

    if apply_polyfit:
        xx = df["LD1"].values
        yy = df["LD2"].values
        fit = np.poly1d(np.polyfit(xx, yy, deg=poly_degree))
        xxx = np.linspace(np.min(xx), np.max(xx), 300)
        plt.plot(xxx, fit(xxx), linestyle='--', color='black', label=f"PolyFit deg={poly_degree}")
        plt.legend()

    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "LDA_projection_.svg"))
    plt.show()

# --- Feature vs Stage ---
def plot_feature_vs_stage(df, features):
    for feat in features:
        plt.figure(figsize=(6, 4))
        sns.stripplot(data=df, x="stage_key", y=feat, jitter=0.25, alpha=0.6)
        plt.title(f"{feat} vs stage_key")
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"{feat}_vs_stage_key.png"))
        plt.show()

# --- Auto PCA Combos ---
def auto_pca_combinations(df, top_n=6):
    X = df[candidate_features].astype(float)
    y = df["stage_key"]
    mi = mutual_info_classif(X, y, discrete_features=False)
    top_feats = pd.Series(mi, index=candidate_features).sort_values(ascending=False).head(top_n).index.tolist()
    for combo in combinations(top_feats, 2):
        try:
            plot_pca(df.copy(), list(combo), n_components=2, pc_x=0, pc_y=1)
        except:
            print(f"Skipped combo: {combo}")

# --- GUI ---
def interactive_feature_gui(excel_path=default_excel):
    df = load_feature_excel(excel_path)

    feature_selector = widgets.SelectMultiple(
        options=candidate_features,
        value=tuple(candidate_features[:3]),
        description="Features",
        layout=widgets.Layout(height="600px")
    )
    n_pc_slider = widgets.IntSlider(value=3, min=2, max=6, description="# PCs")
    pcx_dropdown = widgets.Dropdown(options=[0, 1, 2], value=0, description="PC X")
    pcy_dropdown = widgets.Dropdown(options=[0, 1, 2], value=1, description="PC Y")
    show_polyfit_checkbox = widgets.Checkbox(value=False, description="Show Polynomial Fit")
    poly_degree_slider = widgets.IntSlider(value=3, min=1, max=12, description="Degree")

    def update_pc_dropdowns(change):
        new_opts = list(range(n_pc_slider.value))
        pcx_dropdown.options = new_opts
        pcy_dropdown.options = new_opts
        if pcx_dropdown.value not in new_opts:
            pcx_dropdown.value = 0
        if pcy_dropdown.value not in new_opts:
            pcy_dropdown.value = 1

    n_pc_slider.observe(update_pc_dropdowns, names="value")

    button_pca = widgets.Button(description="Run PCA")
    button_lda = widgets.Button(description="Run LDA")
    button_auto = widgets.Button(description="Auto PCA Combos")
    button_plot_feat_vs_stage = widgets.Button(description="Plot Feature vs Stage")

    output = widgets.Output()

    def on_button_pca_clicked(_):
        with output:
            clear_output(wait=True)
            plot_pca(df.copy(), list(feature_selector.value), n_pc_slider.value, pcx_dropdown.value, pcy_dropdown.value,
                     show_polyfit_checkbox.value, poly_degree_slider.value)

    def on_button_lda_clicked(_):
        with output:
            clear_output(wait=True)
            plot_lda(df.copy(), list(feature_selector.value))

    def on_button_auto_clicked(_):
        with output:
            clear_output(wait=True)
            auto_pca_combinations(df.copy())

    def on_button_fvstage_clicked(_):
        with output:
            clear_output(wait=True)
            plot_feature_vs_stage(df.copy(), list(feature_selector.value))

    button_pca.on_click(on_button_pca_clicked)
    button_lda.on_click(on_button_lda_clicked)
    button_auto.on_click(on_button_auto_clicked)
    button_plot_feat_vs_stage.on_click(on_button_fvstage_clicked)

    controls = widgets.VBox([
        feature_selector,
        widgets.HBox([n_pc_slider, pcx_dropdown, pcy_dropdown]),
        widgets.HBox([show_polyfit_checkbox, poly_degree_slider]),
        widgets.HBox([button_pca, button_lda, button_auto, button_plot_feat_vs_stage])
    ])

    display(controls, output)

# --- Run GUI ---
interactive_feature_gui()


VBox(children=(SelectMultiple(description='Features', index=(0, 1, 2), layout=Layout(height='600px'), options=…

Output()