In [None]:
import torch
import numpy as np

import os, sys
project_root_dir = os.path.join(os.getcwd(),'../..')
if project_root_dir not in sys.path:
    sys.path.append(project_root_dir)
    import config

In [None]:
from FeatureSelection.Experiments.doc import ExperimentDoc

def generate_figures(exp) -> None:
    save_dir = os.path.join(exp.config()['save_result_dir'], 'imgs')
    
    # Histrogram
    hist_fig = ExperimentDoc.hist_phi(exp)
    hist_fig.savefig(os.path.join(save_dir, 'hist_phi.pdf'), bbox_inches='tight')

    # Band Selected
    dataset = exp.dataset
    X, y = dataset[:]

    # get 1 samper per class
    samples = np.zeros((3, 128))
    sample_labels = ['Healthy', 'Tumor', 'Hypervascular']
    for i in range(0, 3):
        idx = np.where(y == i)[0][0]
        samples[i] = X[idx].numpy()

    samples = dataset.transform.inverse_transform(torch.tensor(samples))
    wv = np.round(dataset.wv, 0).astype(int)

    selected_band_fig = ExperimentDoc.plot_band_selection(exp, samples=samples, labels=sample_labels, wv=wv)
    selected_band_fig.savefig(os.path.join(save_dir, 'selected_band.pdf'), bbox_inches='tight')


In [None]:
from FeatureSelection.Experiments.BrainCancerHSIBandSelection import VNIRimagesOp8C1, VNIRimagesOp12C1, VNIRimagesOp15C1, VNIRimagesOp20C1, VNIRimagesOp12C2, VNIRimagesOp8C2
experiments = [VNIRimagesOp8C1, VNIRimagesOp12C1, VNIRimagesOp15C1, VNIRimagesOp20C1, VNIRimagesOp12C2, VNIRimagesOp8C2]

for _exp in experiments:
    exp = _exp()
    print(exp.experiment)
    exp.run()
    exp.save_config()
    exp.save_results()
    generate_figures(exp)

In [None]:
from FeatureSelection.Experiments.BrainCancerHSIBandSelection import VNIRimagesOpAll_KMeans, VNIRimagesOpAll_OCSP
experiments = [VNIRimagesOpAll_OCSP, VNIRimagesOpAll_KMeans]

for _exp in experiments:
    exp = _exp()
    print(exp.experiment)
    exp.run()
    exp.save_config()
    exp.save_results()
    generate_figures(exp)

In [None]:
from FeatureSelection.Experiments.BrainCancerHSIBandSelection import VNIRimagesOpAll_KMeans, VNIRimagesOpAll_OCSP

# Most Relevant Spectral Bands Identification for Brain Cancer Detection Using Hyperspectral Imaging
exp = VNIRimagesOpAll_KMeans()
reference_selection = [1,2,3,5,7,8,14,17,18,19,20,33,34,36,38,41,43,44,45,46,47,49,53,55,56,57,58,59,60,61,62,63,72,74,75,76,77,78,79,81,95,101,104,117,123,124,126,128]
reference_selection = np.array(reference_selection) - 1

from FeatureSelection.Experiments.doc.ExperimentDoc import _get_random_samples
from FeatureSelection.Experiments.doc.utils import plot_band_selection

samples = _get_random_samples(exp)
wv = np.round(exp.dataset.wv, 0).astype(int)
activated_gates = np.zeros(128)
activated_gates[reference_selection-1] = 1

from matplotlib import pyplot as plt
fig = plot_band_selection(reference_selection, samples, sample_labels=None, n_features=len(activated_gates), wavelength=wv)
plt.show(fig)
fig.savefig(os.path.join(config.RESULTS_DIR, 'Chapter6/BrainCancerHSIBandSelection/other/reference.pdf'), bbox_inches='tight')

In [None]:
len(reference_selection)

In [None]:
activated_gates = torch.where(exp.model.feature_selector.variational_parameter() < exp.model.feature_selector.p_threshold)[0]
activated_gates

In [None]:
activated_gates

# Laparoscopy

In [None]:
import torch
import numpy as np

import os, sys
project_root_dir = os.path.join(os.getcwd(),'../..')
if project_root_dir not in sys.path:
    sys.path.append(project_root_dir)
    import config

In [None]:
from FeatureSelection.Experiments.LaparoscopyHSIBandSelection import ExperimentFactory, ExperimentType
from FeatureSelection.Experiments.doc import ExperimentDoc

def generate_figures(exp) -> None:
    save_dir = os.path.join(exp.config()['save_result_dir'], 'imgs')
    
    # Histrogram
    hist_fig = ExperimentDoc.hist_phi(exp)
    hist_fig.savefig(os.path.join(save_dir, 'hist_phi.pdf'), bbox_inches='tight')

    # Band Selected
    dataset = exp.dataset
    X, y = dataset[:]

    # get 1 samper per class
    samples = np.zeros((4, 68))
    sample_labels = ['Fat', 'Muscle', 'Nerve', 'Vessel']
    for i in range(0, 4):
        idx = np.where(y == i)[0][0]
        samples[i] = X[idx].numpy()

    samples = dataset.transform.inverse_transform(torch.tensor(samples))
    # wv = np.round(dataset.wv, 0).astype(int)
    wv = np.array(exp.dataset.wv).astype(int)

    selected_band_fig = ExperimentDoc.plot_band_selection(exp, samples=samples, labels=sample_labels, wv=wv)
    selected_band_fig.savefig(os.path.join(save_dir, 'selected_band.pdf'), bbox_inches='tight')


In [None]:
# exp_type = [ExperimentType.Concrete_2, ExperimentType.Concrete_3, ExperimentType.Concrete_5, ExperimentType.Gaussian_5]
# reg_factors = [1, 1.5, 2, 3]

# for _exp_type in exp_type:
#     for _reg_factor in reg_factors:
#         factory = ExperimentFactory()
#         exp = factory.create_experiment(_exp_type, reg_factor=_reg_factor)
#         print(exp.experiment)
#         exp.run()
#         exp.save_config()
#         exp.save_results()
#         generate_figures(exp)


In [None]:
import torch
import numpy as np

import os, sys
project_root_dir = os.path.join(os.getcwd(),'../..')
if project_root_dir not in sys.path:
    sys.path.append(project_root_dir)
    import config

In [None]:
from FeatureSelection.Experiments.LaparoscopyHSIBandSelection import ExperimentFactory, ExperimentType
factory = ExperimentFactory()
exp = factory.create_experiment(ExperimentType.Concrete_IP)

In [None]:
exp.model.feature_selector.matrix_estimator

In [None]:
exp.run()

In [None]:
exp.save_config()
exp.save_results()
generate_figures(exp)

In [None]:
exp.ip.to_df()

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import colormaps as cm
def moving_average(a, n=10, padding_size=0) :
    # ret = np.cumsum(a, dtype=float) if padding_size == 0 else np.cumsum(np.insert(a, 0, np.zeros(padding_size)), dtype=float)
    ret = np.cumsum(a, dtype=float) if padding_size == 0 else np.cumsum(np.insert(a, 0, np.ones(padding_size)*a[0]), dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

def show_ip(ip_df: pd.DataFrame, n=25, labels=[], moving_average_n=25):
    '''
        Create a Information Plane illustration

        Parameters
        ----------
        ip_df : pd.Dataframe, dataframe which contains the MI. This dataframe
            has a specific structure which is created from IPDL.InformationPlane class
        n: int, Number of samples to visualize, sampling generated by a log-scale
    '''
    colors = ['Greys', 'Reds', 'Blues', 'Greens', 'Oranges',
                'Purples', 'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd',
                'RdPu', 'BuPu', 'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn',
                'BuGn', 'YlGn']

    with plt.style.context('seaborn'):
        fig = plt.figure(figsize=(6, 4),constrained_layout=False)
        gs1 = fig.add_gridspec(nrows=1, ncols=1, left=0.08, right=0.84, wspace=0.05)
        gs2 = fig.add_gridspec(nrows=1, ncols=len(ip_df.keys()[::2]), left=0.85, right=0.95, wspace=0)
        f8_ax1 = fig.add_subplot(gs1[:, :])
        f8_ax1.set_xlabel("$\mathcal{I}(X; T)$", fontsize=14)
        f8_ax1.set_ylabel("$\mathcal{I}(T; Y)$", fontsize=14)

        for idx, (layer, _) in enumerate(ip_df.columns[::2]):
            Ity = moving_average(ip_df[layer]['Ity'].to_numpy(), n=moving_average_n, padding_size=int(moving_average_n*0.75))
            Ixt = moving_average(ip_df[layer]['Ixt'].to_numpy(), n=moving_average_n, padding_size=int(moving_average_n*0.75))
            
            cmap = cm[colors[idx]]
            iterations = np.geomspace(1, len(Ity)-1, num=n, dtype=np.uint)

            color = np.array([cmap(iterations[int(len(iterations)*0.95)])])
            sc = f8_ax1.scatter(Ixt[iterations], Ity[iterations], c=iterations, vmin=0, vmax=iterations.max(), cmap=cmap, edgecolor=color)
            if not labels:
                f8_ax1.scatter([], [], c=color, label=layer)
            else:
                f8_ax1.scatter([], [], c=color, label=labels[idx])

            f8_ax2 = fig.add_subplot(gs2[0, idx])
            cb = fig.colorbar(sc, cax=f8_ax2, pad=0)
            cb.set_ticks([])

        f8_ax1.legend()
        cb.set_ticks([0, iterations.max()])
        f8_ax2.set_yticklabels(['0', ip_df[layer]['Ixt'].size - 1])
        cb.set_label("Iterations", labelpad=-18)

    return fig

In [None]:
ip_df = exp.ip.to_df()
fig = show_ip(ip_df, 150, moving_average_n=20, labels=['$X_s$', '$L_1$', '$L_2$', '$L_3$'])
save_dir = os.path.join(exp.config()['save_result_dir'], 'imgs')
fig.savefig(os.path.join(save_dir, 'ip.pdf'), bbox_inches='tight')

# LASSO

In [None]:
from sklearn.linear_model import LogisticRegression
from FeatureSelection.Experiments.LaparoscopyHSIBandSelection import ExperimentFactory, ExperimentType

# Laparoscopy dataset
factory = ExperimentFactory()
exp = factory.create_experiment(ExperimentType.Concrete_2)

train_dataset = exp.config()['train_dataset']
test_dataset = exp.config()['test_dataset']
save_dir = os.path.join(config.RESULTS_DIR, 'Chapter6/LaparoscopyHSIBandSelection/other/Lasso/')

In [None]:
lasso_model = LogisticRegression(penalty='l1', solver='saga', max_iter=10000, tol=1e-3, C=.01, multi_class='multinomial')
X, y = train_dataset[:]

lasso_model.fit(X.numpy(), y.numpy())

X_test, y_test = test_dataset[:]
y_pred = lasso_model.predict(X_test.numpy())

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(y_test, y_pred)

In [None]:
activated_gates = np.argwhere(lasso_model.coef_.sum(axis=0) != 0).flatten()
n_activated_gates = len(activated_gates)

In [None]:
from matplotlib import pyplot as plt

def plot_band_selection(features_selected:np.ndarray, samples:list, sample_labels=None, wavelength=None, n_features=-1):
    if sample_labels is not None:
        assert(len(samples) == len(sample_labels))
    else:
        sample_labels = [None] * len(samples)

    diff = np.diff(features_selected)
    features_ranges = np.split(features_selected, np.where(diff != 1)[0]+1)

    with plt.style.context('seaborn-colorblind'):
        fig = plt.figure()
        for idx, sample in enumerate(samples):
            plt.plot(sample, label=sample_labels[idx])
            plt.scatter(features_selected, sample[features_selected], alpha=.5)

        for r in features_ranges:
            plt.axvspan(r[0]-.25, r[-1]+.25, alpha=0.25)

        if wavelength is not None:
            ticks = np.linspace(0, len(wavelength)-1, 12, dtype=int)
            plt.xlabel('Wavelength (nm)', fontsize='x-large')
            plt.xticks(ticks, wavelength[ticks], rotation=45, fontsize='large')

        plt.ylabel('Reflectance', fontsize='x-large')
        plt.yticks(fontsize='large')
        
        if sample_labels[0] is not None:
            plt.legend()
            # plt.legend(loc='upper right')
    
        if n_features != -1:
            sparse_rate = (n_features - len(features_selected)) / n_features 
            title = 'Sparsity: {:.2f}'.format(sparse_rate)
            plt.title(title, fontsize='xx-large')
 
        plt.margins(x=0.01)

    return fig

cliped_wavelength = np.array(exp.dataset.wv).astype(int)

sample_per_class = []
for i in range(4):
    sample_per_class.append(test_dataset.dataset.X[np.where(test_dataset.dataset.y==i)[0][0]].cpu().numpy())

labels = ['Fat', 'Muscle', 'Nerve', 'Vessel']

In [None]:
fig = plot_band_selection(activated_gates, sample_per_class, sample_labels=labels, 
                            wavelength=cliped_wavelength, n_features=len(cliped_wavelength))
plt.show(fig)

fig.savefig(os.path.join(save_dir, 'lasso_band_selection.pdf'), bbox_inches='tight')

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def metrics_result(model, X, y):
    y_pred = model.predict(X)

    metrics_df = pd.DataFrame({
        'accuracy': [accuracy_score(y, y_pred)],
        'precision': [precision_score(y, y_pred, average='macro')],
        'recall': [recall_score(y, y_pred, average='macro')],
        'f1': [f1_score(y, y_pred, average='macro')]
    })

    return metrics_df

def confusion_matrix_result(model, X, y, normalize=False):
    y_pred = model.predict(X)

    labels = ['Fat', 'Muscle', 'Nerve', 'Vessel']
    cm = confusion_matrix(y, y_pred, normalize='true' if normalize else None)
    cm_df = pd.DataFrame(cm, index=labels, columns=labels)

    return cm_df

def confusion_matrix_figure(model, X, y):
    cm_df = confusion_matrix_result(model, X, y, normalize=True)

    sns.set()
    fig = plt.figure(figsize=(6,4))
    # heatmap with labels
    ax = sns.heatmap(cm_df, annot=True, cmap='Blues')
    cbar = ax.collections[0].colorbar
    cbar.set_ticks([0, 0.25, 0.5, 0.75, 1])

    plt.xlabel('Predicted')
    plt.ylabel('True')

    return fig

In [None]:
X_test, y_test = test_dataset[:]

metrics_df = metrics_result(lasso_model, X_test.numpy(), y_test.numpy())
cm_df = confusion_matrix_result(lasso_model, X_test.numpy(), y_test.numpy())
cm_fig = confusion_matrix_figure(lasso_model, X_test.numpy(), y_test.numpy())

metrics_df.to_csv(os.path.join(save_dir, 'lassonet_metrics.csv'))
cm_df.to_csv(os.path.join(save_dir, 'lassonet_cm.csv'))
cm_fig.savefig(os.path.join(save_dir, 'lassonet_cm.pdf'), bbox_inches='tight')
