In [31]:
from ec_feature_selection import ECFS
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

### Load and split the data

In [32]:
data = load_breast_cancer()

X = data['data']
y = data['target']
feature_names = data['feature_names']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

print(X_train.shape)

(398, 30)


In [33]:
# ECFS instance
ecfs = ECFS(n_features=10)

### You can fit and transform the data separately

In [34]:
ecfs.fit(X=X_train, y=y_train, alpha=0.5, positive_class=1, negative_class=0)
X_train_reduced = ecfs.transform(X_train)
X_test_reduced = ecfs.transform(X_test)
print(X_train_reduced.shape)

(398, 10)


### Or use fit_transform

In [35]:
X_train_reduced = ecfs.fit_transform(X=X_train, y=y_train, alpha=0.5, positive_class=1, negative_class=0)
X_test_reduced = ecfs.transform(X_test)
print(X_train_reduced.shape)

(398, 10)


### You can use the ecfs instance to explore the features and the metrics

In [36]:
import pandas as pd

summary = pd.DataFrame({'Feature' : feature_names, 'Ranking' : ecfs.ranking, 'MI' : ecfs.mutual_information, 'Fisher Score' : ecfs.fisher_score})
summary.sort_values(by='Ranking')

Unnamed: 0,Feature,Ranking,MI,Fisher Score
7,mean concave points,0,0.282755,2.810265
17,concave points error,1,0.134749,0.538584
6,mean concavity,2,0.258248,1.771509
1,mean texture,3,0.106682,0.508055
19,fractal dimension error,4,0.041846,0.022056
11,texture error,5,0.014854,0.000317
8,mean symmetry,6,0.047411,0.238228
4,mean smoothness,7,0.068865,0.3666
23,worst area,8,0.299226,1.98169
27,worst concave points,9,0.274446,3.571345


### You can explore and use the eigenvalues and eigenvectors

In [37]:
ecfs.eigenvalues
#ecfs.eigenvectors

array([-2.49553526e+00, -8.62494620e-02, -5.60693122e-02, -1.07981280e-02,
       -7.71889859e-03, -2.93811088e-03, -1.27298250e-03, -6.59763859e-04,
       -4.33235426e-04, -3.03557399e-04, -1.64625747e-04, -1.16883095e-04,
       -5.55038929e-05, -3.98895455e-05, -2.60899111e-05, -1.58655235e-05,
       -1.14320881e-05, -7.34073281e-06, -4.51190498e-06, -3.64559329e-06,
       -2.83238923e-06, -1.83207443e-06, -1.62845359e-06, -1.00274176e-06,
       -7.06611192e-07, -2.59659626e-07, -1.66571579e-07, -1.56145496e-07,
        7.97934926e-01,  5.44193469e+00])