In [1]:
# mount drive 
from google.colab import drive

drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
# import 
import numpy as np
from scipy.stats import norm
import pandas as pd

from plotly import graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

pio.templates.default = "plotly_white"


In [3]:
# prediction
import statsmodels.api as sm 

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

from sklearn.metrics import confusion_matrix, accuracy_score


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



In [4]:
# loading data
data = pd.read_table("/content/drive/My Drive/Colab Notebooks/project stat/very_final_table_paper.csv", sep=";", header=0)
inter_vars = pd.read_csv("/content/drive/My Drive/Colab Notebooks/project stat/interaction_vars.csv")

In [5]:
# adding intercation vars to data
for col in inter_vars.columns[1:]:
  data[col] = inter_vars[col]

In [6]:
# creating new column faller / none faller
data["faller"] = data["total_fall_count"].apply(lambda entry: 1 if entry > 1 else 0)

# transform drugs, diagnostic, gender to a binary features

# 1 : if drugs 1 otherwise 0
def create_drugs_col(entry):
  if entry == "['-1']":
    return 0

  return 1


# 1 : if diagnostic 1 otherwise 0
def create_diagnostic_col(entry):
  if entry == "['Aucun']":
    return 0

  return 1

data["take_drugs"] = data["drugs"].apply(create_drugs_col)
data["has_illness"] = data["diagnostic"].apply(create_diagnostic_col)
data["sex"] = data["gender"].map({"female": 0, "male": 1})

In [7]:

# score_function is implemented using the following signature
# parmas (model, X, Y) --> score (numerical)

# scoring the model
def compute_score(model, Y, X):
  treshold = 0.3
  prediction = [1 if prob > treshold else 0 for prob in  model.predict(X)]

  
  return accuracy_score(Y, prediction)


def Stepwise(data, target_feature, features, score_function=compute_score, max_nb_features=36):
  # in a given stage return the feat that maximaize score
  def select_max_features(actual_features, remaining_features):
    arr_features_by_score = []

    for feat in remaining_features:
      # target and predictors
      X_train = data[actual_features + [feat]]
      Y_train = data[target_feature]

      # fit model and compute score
      try:
        log_reg = sm.Logit(Y_train, X_train).fit(disp=False)
      except:
        return (feat, 0)
       
      score = score_function(log_reg, Y_train, X_train)

      arr_features_by_score.append((feat, score))

    # sort arr feature by score
    # to select the one that maximaze score
    arr_features_by_score.sort(key=lambda x: x[1], reverse=True)

    return arr_features_by_score[0]


  # dict to store features by importance
  # store feature: { rank, score }
  dict_features_by_importance = dict()


  # applying the algo
  actual = []
  remaining = features

  rank = 1
  while len(remaining) > 0 and len(actual) <= max_nb_features:
    # print(len(actual))
    feat, score = select_max_features(actual, remaining)

    actual.append(feat)
    remaining.remove(feat)

    dict_features_by_importance[feat] = dict(rank=rank, score=score)
    rank += 1

  # create dataframe and sort it by rank
  df_as_dict = dict(
      name=[k for k in dict_features_by_importance.keys()],
      rank=[v["rank"] for v in dict_features_by_importance.values()],
      score=[v["score"] for v in dict_features_by_importance.values()]
  )

  data_features_by_importance = pd.DataFrame(data=df_as_dict)
  data_features_by_importance.sort_values(by=['rank'], inplace=True)

  return data_features_by_importance

In [8]:
# features by cluster
patient = [
           "height",
           "weight",
           "sex",
           "age",
           "take_drugs",
           "has_illness"
          ]


position = [
            "FEATURE_confidence_ellipse_area_ML_AND_AP_opened_eyes",
            "FEATURE_maximum_value_ML_opened_eyes","FEATURE_maximum_value_AP_opened_eyes",
            "FEATURE_maximum_value_Radius_opened_eyes","FEATURE_mean_distance_ML_opened_eyes",
            "FEATURE_mean_distance_AP_opened_eyes",
            "FEATURE_mean_distance_Radius_opened_eyes",
            "FEATURE_RMS_ML_opened_eyes",
            "FEATURE_RMS_AP_opened_eyes",
            "FEATURE_RMS_Radius_opened_eyes",
            "FEATURE_amplitude_ML_opened_eyes",
            "FEATURE_amplitude_AP_opened_eyes",
            "FEATURE_amplitude_ML_AND_AP_opened_eyes",
            "FEATURE_sway_length_ML_opened_eyes",
            "FEATURE_sway_length_AP_opened_eyes",
            "FEATURE_sway_length_ML_AND_AP_opened_eyes",
            "FEATURE_Coefficient_sway_direction_ML_AND_AP_opened_eyes",
                      
            'FEATURE_maximum_value_AP_closed_eyes',
            'FEATURE_maximum_value_Radius_closed_eyes', 
            'FEATURE_mean_distance_ML_closed_eyes', 
            'FEATURE_mean_distance_AP_closed_eyes', 
            'FEATURE_mean_distance_Radius_closed_eyes', 
            'FEATURE_RMS_ML_closed_eyes', 
            'FEATURE_RMS_AP_closed_eyes', 
            'FEATURE_RMS_Radius_closed_eyes',
            'FEATURE_amplitude_ML_closed_eyes', 
            'FEATURE_amplitude_AP_closed_eyes', 
            'FEATURE_amplitude_ML_AND_AP_closed_eyes', 
            'FEATURE_sway_length_ML_closed_eyes', 
            'FEATURE_sway_length_AP_closed_eyes', 
            'FEATURE_sway_length_ML_AND_AP_closed_eyes', 
            'FEATURE_length_over_area_ML_AND_AP_closed_eyes', 
            'FEATURE_fractal_dimension_pd_ML_AND_AP_closed_eyes',
          
            'FEATURE_fractal_dimension_pd_ML_AND_AP_opened_eyes',
            'FEATURE_length_over_area_ML_AND_AP_opened_eyes',
            'FEATURE_maximum_value_ML_closed_eyes',
            'FEATURE_confidence_ellipse_area_ML_AND_AP_closed_eyes',
            'FEATURE_length_over_area_ML_AND_AP_opened_eyes'
            ]

dynamic = [
        
           "FEATURE_zero_crossing_SPD_ML_opened_eyes",
           "FEATURE_zero_crossing_SPD_AP_opened_eyes",
           "FEATURE_principal_sway_direction_ML_AND_AP_opened_eyes",
           "FEATURE_mean_velocity_ML_opened_eyes",
           "FEATURE_mean_velocity_AP_opened_eyes",
           "FEATURE_mean_velocity_ML_AND_AP_opened_eyes",
           "FEATURE_Coefficient_sway_direction_ML_AND_AP_opened_eyes",
           "FEATURE_planar_deviation_ML_AND_AP_opened_eyes",
           "FEATURE_peak_velocity_all_SPD_ML_opened_eyes",
           "FEATURE_peak_velocity_all_SPD_AP_opened_eyes",
           "FEATURE_peak_velocity_pos_SPD_ML_opened_eyes",
           "FEATURE_peak_velocity_pos_SPD_AP_opened_eyes",
 
           'FEATURE_mean_velocity_ML_closed_eyes', 
           'FEATURE_mean_velocity_AP_closed_eyes', 
           'FEATURE_mean_velocity_ML_AND_AP_closed_eyes', 
           'FEATURE_Coefficient_sway_direction_ML_AND_AP_closed_eyes', 
           'FEATURE_Quotient_both_direction_ML_AND_AP_closed_eyes', 
           'FEATURE_planar_deviation_ML_AND_AP_closed_eyes', 
           'FEATURE_peak_velocity_all_SPD_ML_closed_eyes', 
           'FEATURE_peak_velocity_all_SPD_AP_closed_eyes', 
           'FEATURE_peak_velocity_pos_SPD_ML_closed_eyes', 
           'FEATURE_peak_velocity_pos_SPD_AP_closed_eyes', 
           'FEATURE_peak_velocity_neg_SPD_ML_closed_eyes', 
           'FEATURE_peak_velocity_neg_SPD_AP_closed_eyes', 
           'FEATURE_mean_peak_Sway_Density_closed_eyes', 
           'FEATURE_mean_distance_peak_Sway_Density_closed_eyes', 
           'FEATURE_sway_area_per_second_ML_AND_AP_closed_eyes', 
           'FEATURE_phase_plane_parameters_ML_closed_eyes', 
           'FEATURE_phase_plane_parameters_AP_closed_eyes', 
           'FEATURE_fractal_dimension_cc_ML_AND_AP_closed_eyes', 
           'FEATURE_fractal_dimension_ce_ML_AND_AP_closed_eyes',
      
           'FEATURE_mean_frequency_ML_closed_eyes', 
           'FEATURE_mean_frequency_AP_closed_eyes', 
           'FEATURE_mean_frequency_ML_AND_AP_closed_eyes',
        
           'FEATURE_mean_frequency_ML_opened_eyes',
           'FEATURE_mean_frequency_AP_opened_eyes',
           'FEATURE_mean_frequency_ML_AND_AP_opened_eyes',
           'FEATURE_peak_velocity_neg_SPD_ML_opened_eyes',
           'FEATURE_peak_velocity_neg_SPD_AP_opened_eyes',
           'FEATURE_fractal_dimension_cc_ML_AND_AP_opened_eyes',
           'FEATURE_fractal_dimension_ce_ML_AND_AP_opened_eyes',
           'FEATURE_mean_peak_Sway_Density_opened_eyes',
           'FEATURE_phase_plane_parameters_AP_opened_eyes',
           'FEATURE_phase_plane_parameters_ML_opened_eyes',
           'FEATURE_sway_area_per_second_ML_AND_AP_opened_eyes',
           'FEATURE_zero_crossing_SPD_AP_closed_eyes',
           'FEATURE_zero_crossing_SPD_ML_closed_eyes',
           'FEATURE_principal_sway_direction_ML_AND_AP_closed_eyes'
           ]

frequency = [
             "FEATURE_frequency_mode_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_frequency_mode_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_total_power_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_total_power_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_power_frequency_50_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_power_frequency_50_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_power_frequency_95_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_power_frequency_95_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_centroid_frequency_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_centroid_frequency_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_frequency_dispersion_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_frequency_dispersion_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_energy_content_0_05_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_energy_content_0_05_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_energy_content_05_2_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_energy_content_05_2_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_energy_content_2_inf_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_energy_content_2_inf_Power_Spectrum_Density_AP_opened_eyes",
             "FEATURE_frequency_quotient_Power_Spectrum_Density_ML_opened_eyes",
             "FEATURE_frequency_quotient_Power_Spectrum_Density_AP_opened_eyes",
        
             'FEATURE_frequency_mode_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_frequency_mode_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_power_frequency_50_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_power_frequency_50_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_power_frequency_95_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_power_frequency_95_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_centroid_frequency_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_centroid_frequency_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_frequency_dispersion_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_frequency_dispersion_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_energy_content_0_05_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_energy_content_0_05_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_energy_content_05_2_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_energy_content_05_2_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_energy_content_2_inf_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_energy_content_2_inf_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_frequency_quotient_Power_Spectrum_Density_ML_closed_eyes',
             'FEATURE_frequency_quotient_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_total_power_Power_Spectrum_Density_AP_closed_eyes',
             'FEATURE_total_power_Power_Spectrum_Density_ML_closed_eyes'
             ]

stochastic = [
     
              'FEATURE_short_time_diffusion_Diffusion_ML_closed_eyes', 
              'FEATURE_long_time_diffusion_Diffusion_ML_closed_eyes', 
              'FEATURE_critical_time_Diffusion_ML_closed_eyes', 
              'FEATURE_long_time_scaling_Diffusion_ML_closed_eyes', 
              'FEATURE_short_time_diffusion_Diffusion_AP_closed_eyes', 
              'FEATURE_long_time_diffusion_Diffusion_AP_closed_eyes', 
              'FEATURE_critical_time_Diffusion_AP_closed_eyes', 
              'FEATURE_long_time_scaling_Diffusion_AP_closed_eyes', 
              'FEATURE_critical_displacement_Diffusion_ML_closed_eyes', 
              'FEATURE_critical_displacement_Diffusion_AP_closed_eyes',
  
              'FEATURE_short_time_diffusion_Diffusion_ML_opened_eyes',
              'FEATURE_long_time_diffusion_Diffusion_ML_opened_eyes',
              'FEATURE_critical_time_Diffusion_ML_opened_eyes',
              'FEATURE_critical_displacement_Diffusion_ML_opened_eyes',
              'FEATURE_long_time_scaling_Diffusion_ML_opened_eyes',
              'FEATURE_short_time_diffusion_Diffusion_AP_opened_eyes',
              'FEATURE_long_time_diffusion_Diffusion_AP_opened_eyes',
              'FEATURE_critical_time_Diffusion_AP_opened_eyes',
              'FEATURE_critical_displacement_Diffusion_AP_opened_eyes',
              'FEATURE_long_time_scaling_Diffusion_AP_opened_eyes'
              ]

interaction_vars = [
                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_power_frequency_95_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_centroid_frequency_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_phase_plane_parameters_ML_opened_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes',
                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_frequency_quotient_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_ML_opened_eyes\FEATURE_maximum_value_ML_opened_eyes',
                     'FEATURE_power_frequency_95_Power_Spectrum_Density_ML_opened_eyes\FEATURE_maximum_value_ML_opened_eyes',
                     'FEATURE_amplitude_ML_closed_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes',
                     'FEATURE_zero_crossing_SPD_AP_opened_eyes\FEATURE_length_over_area_ML_AND_AP_opened_eyes',
                     'FEATURE_amplitude_ML_closed_eyes\FEATURE_long_time_diffusion_Diffusion_AP_closed_eyes',
                     'FEATURE_RMS_ML_closed_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes'
              ]


#patient = pd.Index(patient).unique()
position = pd.Index(position).unique()
dynamic = pd.Index(dynamic).unique()
frequency = pd.Index(frequency).unique()
stochastic = pd.Index(stochastic).unique()
interaction_vars = pd.Index(interaction_vars).unique()

clusters = dict(
    patient=patient,
    position=position,
    dynamic=dynamic,
    frequency=frequency,
    stochastic=stochastic,
    interaction_vars=interaction_vars
)

In [9]:
# dict of features and the corresponding cluster
dict_feature_cluster = dict()

for cluster_name, cluster in clusters.items():
  for feat in cluster:
    dict_feature_cluster[feat] = cluster_name

In [10]:
# selelected features from VIF
selected_features = [
                     "height",
                     "weight",
                     "sex",
                     "age",
                     "take_drugs",
                     "has_illness",

                     'FEATURE_Coefficient_sway_direction_ML_AND_AP_opened_eyes',
                     'FEATURE_length_over_area_ML_AND_AP_closed_eyes',
                     'FEATURE_length_over_area_ML_AND_AP_opened_eyes',
                     'FEATURE_confidence_ellipse_area_ML_AND_AP_closed_eyes',
                     'FEATURE_principal_sway_direction_ML_AND_AP_closed_eyes',
                     'FEATURE_principal_sway_direction_ML_AND_AP_opened_eyes',
                     'FEATURE_Coefficient_sway_direction_ML_AND_AP_closed_eyes',
                     'FEATURE_Coefficient_sway_direction_ML_AND_AP_opened_eyes',
                     'FEATURE_mean_peak_Sway_Density_closed_eyes',
                     'FEATURE_Quotient_both_direction_ML_AND_AP_closed_eyes',
                     'FEATURE_mean_peak_Sway_Density_opened_eyes',
                     'FEATURE_sway_area_per_second_ML_AND_AP_closed_eyes',
                     'FEATURE_mean_distance_peak_Sway_Density_closed_eyes',
                     'FEATURE_frequency_mode_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_frequency_mode_Power_Spectrum_Density_ML_opened_eyes',
                     'FEATURE_frequency_mode_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_frequency_mode_Power_Spectrum_Density_AP_opened_eyes',
                     'FEATURE_energy_content_2_inf_Power_Spectrum_Density_AP_opened_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_ML_opened_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_energy_content_05_2_Power_Spectrum_Density_AP_opened_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_AP_opened_eyes',
                     'FEATURE_energy_content_2_inf_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_energy_content_2_inf_Power_Spectrum_Density_ML_opened_eyes',
                     'FEATURE_energy_content_2_inf_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_power_frequency_50_Power_Spectrum_Density_AP_opened_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_power_frequency_50_Power_Spectrum_Density_ML_opened_eyes',
                     'FEATURE_power_frequency_50_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_energy_content_05_2_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_power_frequency_95_Power_Spectrum_Density_AP_closed_eyes',
                     'FEATURE_long_time_scaling_Diffusion_ML_opened_eyes',
                     'FEATURE_long_time_scaling_Diffusion_ML_closed_eyes',
                     'FEATURE_long_time_scaling_Diffusion_AP_closed_eyes',
                     'FEATURE_long_time_diffusion_Diffusion_AP_opened_eyes',
                     'FEATURE_long_time_scaling_Diffusion_AP_opened_eyes',
                     'FEATURE_long_time_diffusion_Diffusion_AP_closed_eyes',
                     'FEATURE_critical_time_Diffusion_ML_opened_eyes',
                     'FEATURE_short_time_diffusion_Diffusion_AP_closed_eyes',
                     'FEATURE_critical_time_Diffusion_ML_closed_eyes',
                     'FEATURE_short_time_diffusion_Diffusion_AP_opened_eyes',
                     'FEATURE_short_time_diffusion_Diffusion_ML_opened_eyes',
                     'FEATURE_critical_time_Diffusion_AP_closed_eyes',
                     'FEATURE_critical_time_Diffusion_AP_opened_eyes',
                     'FEATURE_long_time_diffusion_Diffusion_ML_opened_eyes',
                     'FEATURE_critical_displacement_Diffusion_ML_opened_eyes',
                     'FEATURE_critical_displacement_Diffusion_AP_closed_eyes',
                     'FEATURE_critical_displacement_Diffusion_AP_opened_eyes',
                     'FEATURE_short_time_diffusion_Diffusion_ML_closed_eyes',
                     'FEATURE_long_time_diffusion_Diffusion_ML_closed_eyes',
                     'FEATURE_critical_displacement_Diffusion_ML_closed_eyes',

                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_power_frequency_95_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_centroid_frequency_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_phase_plane_parameters_ML_opened_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes',
                     'FEATURE_maximum_value_ML_opened_eyes\FEATURE_frequency_quotient_Power_Spectrum_Density_ML_closed_eyes',
                     'FEATURE_frequency_quotient_Power_Spectrum_Density_ML_opened_eyes\FEATURE_maximum_value_ML_opened_eyes',
                     'FEATURE_power_frequency_95_Power_Spectrum_Density_ML_opened_eyes\FEATURE_maximum_value_ML_opened_eyes',
                     'FEATURE_amplitude_ML_closed_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes',
                     'FEATURE_zero_crossing_SPD_AP_opened_eyes\FEATURE_length_over_area_ML_AND_AP_opened_eyes',
                     'FEATURE_amplitude_ML_closed_eyes\FEATURE_long_time_diffusion_Diffusion_AP_closed_eyes',
                     'FEATURE_RMS_ML_closed_eyes\FEATURE_long_time_scaling_Diffusion_AP_closed_eyes'
                     ]
                     
# preserve distinct featues
selected_features=list(set(selected_features))

In [11]:
# appling stepwise
result_stepwise = Stepwise(data=data, target_feature="faller", features=selected_features, score_function=compute_score)
# create cluster name
result_stepwise["cluster"] = result_stepwise["name"].apply(lambda entry: dict_feature_cluster[entry])

In [12]:
colors = result_stepwise["cluster"].map({
    "position": "#636EF1", 
    "dynamic": "#EF553B", 
    "frequency": "#00CC96", 
    "stochastic": "#AB63FA", 
    "patient": "#FFA15A",
    "interaction": "#19D3F3"
    })

size = [15 if result_stepwise["cluster"].values[i] == "interaction_vars" else 7 for i in range(len(result_stepwise))]


fig = go.Figure(data=[
                      go.Scatter(
                          x=result_stepwise["name"],
                          y=result_stepwise["score"],
                          mode="markers+lines",
                          marker=dict(
                              color=colors,
                              size=size,
                              ),
                          line=dict(
                            shape="spline",
                            width=1  
                          ),
                          text=result_stepwise["cluster"]
                          )
])

fig.update_layout(
    title="Forward stepwise",
    yaxis_title="accuracy score",
    height=800
)
fig.update_xaxes(tickfont=dict(size=9), tickangle=45)

fig.show()

In [13]:
# for readability, show the 12 first features

nb_features_to_show = 12

colors = result_stepwise["cluster"].map({
    "position": "#636EF1", 
    "dynamic": "#EF553B", 
    "frequency": "#00CC96", 
    "stochastic": "#AB63FA", 
    "patient": "#FFA15A"
    })[:nb_features_to_show]

size = [15 for i in range(len(result_stepwise))][:nb_features_to_show]


fig = go.Figure(data=[
                      go.Scatter(
                          x=result_stepwise["name"][:nb_features_to_show],
                          y=result_stepwise["score"][:nb_features_to_show],
                          mode="markers+lines",
                          marker=dict(
                              color=colors,
                              size=size,
                              ),
                          line=dict(
                            shape="spline",
                            width=1  
                          ),
                          text=result_stepwise["cluster"][:nb_features_to_show]
                          )
])

fig.update_layout(
    title="Forward stepwise",
    yaxis_title="accuracy score",
    height=800
)
fig.update_xaxes(tickfont=dict(size=10), tickangle=45)

fig.show()

In [14]:
clusters_proportions = pd.DataFrame(columns=["cluster", "proportion"])

data_without_patient = result_stepwise[result_stepwise["cluster"] != "patient"]
length = len(data_without_patient)

for cluster in clusters.keys():
  if cluster == "patient":
    continue
   
  count = len(data_without_patient[data_without_patient["cluster"] == cluster])

  clusters_proportions = clusters_proportions.append({"cluster": cluster, "proportion": count / length}, ignore_index=True)


In [15]:
fig = px.pie(clusters_proportions, names="cluster", values="proportion", title="Propotion of clusters in the final set of the best linear combination of features")

fig.show()

In [16]:
result_stepwise["name"] = result_stepwise["name"].apply(lambda entry: entry.replace("FEATURE_", ""))
result_stepwise.to_latex(index=False, columns=["name", "score",	"cluster"])

'\\begin{tabular}{lrl}\n\\toprule\n                                              name &  score &           cluster \\\\\n\\midrule\n            length\\_over\\_area\\_ML\\_AND\\_AP\\_closed\\_eyes &   0.65 &          position \\\\\n      long\\_time\\_diffusion\\_Diffusion\\_ML\\_closed\\_eyes &   0.66 &        stochastic \\\\\n frequency\\_quotient\\_Power\\_Spectrum\\_Density\\_ML\\_o... &   0.67 &         frequency \\\\\n frequency\\_quotient\\_Power\\_Spectrum\\_Density\\_ML\\_o... &   0.68 &  interaction\\_vars \\\\\n maximum\\_value\\_ML\\_opened\\_eyes\\textbackslash centroid\\_frequenc... &   0.74 &  interaction\\_vars \\\\\n power\\_frequency\\_50\\_Power\\_Spectrum\\_Density\\_ML\\_o... &   0.77 &         frequency \\\\\n frequency\\_quotient\\_Power\\_Spectrum\\_Density\\_ML\\_c... &   0.78 &         frequency \\\\\n frequency\\_mode\\_Power\\_Spectrum\\_Density\\_ML\\_opene... &   0.78 &         frequency \\\\\n     confidence\\_ellipse\\_area\\_ML\\_AND\\_AP\\_closed\\_eye