In [1]:
import pandas as pd 

features = pd.read_feather("../../all_data/feature_importances/scalars/Arterial_All_Scalars.feather").set_index("feature")

features.columns = pd.MultiIndex.from_tuples(
    list(map(eval, features.columns.tolist())), names=["algorithm", "observation"]
)
best_algorithm = "light_gbm"

sorted_features = features.abs().sort_values(by=[("light_gbm", "mean")], ascending=False).index

In [2]:
from dash_website.feature_importances import BAR_PLOT_TABLE_COLUMNS

table_features = pd.DataFrame(None, columns=BAR_PLOT_TABLE_COLUMNS.keys())

table_features["feature"] = sorted_features

for algorithm in ["correlation", "elastic_net", "light_gbm", "neural_network"]:
    table_features[f"percentage_{algorithm}"] = features.loc[sorted_features, (algorithm, "mean")].round(3).astype(str).values  + " +- " + features.loc[sorted_features, (algorithm, "std")].round(3).astype(str).values

In [3]:
import plotly.graph_objects as go
from dash_website import ALGORITHMS_RENDERING

fig = go.Figure()
hovertemplate = "Feature: %{y} <br>Percentage of overall feature importance: %{x:.3f} +- %{customdata:.3f}<br><extra></extra>"

for algorithm in ["correlation", "elastic_net", "light_gbm", "neural_network"]:
    fig.add_bar(
            name=ALGORITHMS_RENDERING[algorithm],
            x=features.loc[sorted_features, (algorithm, "mean")].abs().values[::-1],
            y=sorted_features[::-1],
            error_x={
                "array": features.loc[sorted_features, (algorithm, "std")].values[::-1],
                "type": "data",
            },
            orientation="h",
            hovertemplate=hovertemplate,
            customdata=features.loc[sorted_features, (algorithm, "std")].values[::-1]
        )

fig.update_layout(
    {
        "width": 800,
        "height": int(25 * len(sorted_features)),
        "xaxis": {"title": "Percentage of overall feature importance", "showgrid": False},
        "yaxis": {"title": "Features", "showgrid": False},
    }
)
fig.show()

In [8]:
from dash_website.feature_importances import FEATURES_CORRELATIONS_TABLE_COLUMNS
correlation_type = "pearson"

table_correlations_raw = pd.DataFrame(None, index=features.index, columns=pd.Index(FEATURES_CORRELATIONS_TABLE_COLUMNS.keys()).drop("index"))

for algorithm in ["correlation", "elastic_net", "light_gbm", "neural_network"]:
    table_correlations_raw[f"percentage_{algorithm}"] = features[(algorithm, "mean")]

table_correlations = (
    table_correlations_raw.corr(method=correlation_type)
    .round(3)
    .rename(index=FEATURES_CORRELATIONS_TABLE_COLUMNS)
    .reset_index()
)
table_correlations

Unnamed: 0,index,percentage_correlation,percentage_elastic_net,percentage_light_gbm,percentage_neural_network
0,Percentage Correlation,1.0,0.356,0.369,-0.104
1,Percentage Elastic Net,0.356,1.0,0.201,0.124
2,Percentage Light GBM,0.369,0.201,1.0,0.585
3,Percentage Neural Network,-0.104,0.124,0.585,1.0
