### HISC-Lasso: Calcul du score RED pour $m=50$ et MCAcc

In [118]:
# install pyhsic lasso
!pip install pyHSICLasso



In [119]:
import numpy as np
import pandas as pd
from pyHSICLasso import HSICLasso
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [120]:
# red score
def res_score(features_index, X):
  features_values = X[:, features_index]
  correlation_matrix = np.corrcoef(features_values, rowvar=False)
  m_val = len(features_index)
  red = np.sum(np.abs(np.triu(correlation_matrix, k=1)))
  return 2 * (red / (m_val * (m_val - 1)))

In [121]:
# data process
X = pd.read_csv("ar10p.csv")
n, p = X.shape
p -= 1
Y = X.iloc[:, p].values
X = X.iloc[:, :p].values

#standardisation des données
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [122]:
rep = 50
features_index = np.arange(10, 51, 10)
top_m = features_index[-1]
MCAcc = np.zeros((len(features_index), rep))
RED_table50 = np.zeros((1, rep))

In [123]:
for i in range(5):
  print(i)
  x_index = np.random.choice(n, size=int(n * 0.8), replace=False)
  X_train, Y_train = X[x_index], Y[x_index]
  X_test, Y_test = np.delete(X, x_index, axis=0), np.delete(Y, x_index)

  # Sélection de variables avec HSIC Lasso
  hsic_lasso = HSICLasso()
  hsic_lasso.input(X_train, Y_train,kernelX="Gaussian",sigmaX=1,kernelY="Delta")

  hsic_lasso.classification(num_feat=top_m)
  ModelFeatureIndex = hsic_lasso.get_index()

  for m_index, nb_features in enumerate(features_index):
    x_features = ModelFeatureIndex[:nb_features]
    # Modèle de classification
    kernel = RBF()
    model = GaussianProcessClassifier(kernel=kernel,multi_class="one_vs_one")
    model.fit(X_train[:, x_features], Y_train)

    # Prédiction et évaluation
    y_pred = model.predict(X_test[: x_features])
    MCAcc[m_index, i] = accuracy_score(Y_test, y_pred)

    # Calcul de la redondance
    if nb_features == top_m:
      RED_table50[0, i] = res_score(x_features, X_train)

0
Block HSIC Lasso B = 20.
M set to 3.
Using Gaussian kernel for the features, Delta kernel for the outcomes.




TypeError: slice indices must be integers or None or have an __index__ method

In [None]:
# Résultats statistiques
rmean = MCAcc.mean(axis=1)
mean_acc = MCAcc[4, :].mean()
std_acc = MCAcc[4, :].std()
mean_red = RED_table50.mean()
std_red = RED_table50.std()

plt.plot(features_index, rmean, color='cyan', linestyle='-', marker='o')
plt.ylim(0, 1)
plt.show()

In [None]:
rmean

In [None]:
# Sauvegarde des résultats
np.savetxt("ar10p_hsic_mca.csv", MCAcc, delimiter=",")
np.savetxt("ar10p_hsic_red50.csv", RED_table50, delimiter=",")