In [3]:
from ec_feature_selection import ECFS
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

### Load and split the data

In [4]:
data = load_breast_cancer()

X = data['data']
y = data['target']
feature_names = data['feature_names']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

print(X_train.shape)

(398, 30)


In [5]:
# ECFS instance
ecfs = ECFS(n_features=10)

### You can fit and transform the data separately

In [6]:
ecfs.fit(X=X_train, y=y_train, alpha=0.5, positive_class=1, negative_class=0)
X_train_reduced = ecfs.transform(X_train)
X_test_reduced = ecfs.transform(X_test)
print(X_train_reduced.shape)

(398, 10)


### Or use fit_transform

In [7]:
X_train_reduced = ecfs.fit_transform(X=X_train, y=y_train, alpha=0.5, positive_class=1, negative_class=0)
X_test_reduced = ecfs.transform(X_test)
print(X_train_reduced.shape)

(398, 10)


### You can use the ecfs instance to explore the features and the metrics

In [8]:
import pandas as pd

summary = pd.DataFrame({'Feature' : feature_names, 'Ranking' : ecfs.ranking, 'MI' : ecfs.mutual_information, 'Fisher Score' : ecfs.fisher_score})
summary.sort_values(by='Ranking')

Unnamed: 0,Feature,Ranking,MI,Fisher Score
8,mean symmetry,0,0.044345,0.230506
7,mean concave points,1,0.280409,2.883455
4,mean smoothness,2,0.057427,0.2856486
1,mean texture,3,0.095725,0.5187806
22,worst perimeter,4,0.301938,2.80654
17,concave points error,5,0.111879,0.5667031
12,perimeter error,6,0.235063,0.8096157
11,texture error,7,0.018145,0.0007395344
24,worst smoothness,8,0.062251,0.3470334
27,worst concave points,9,0.258247,3.256289


### You can explore and use the eigenvalues and eigenvectors

In [9]:
ecfs.eigenvalues

array([-1.52333177e+03, -5.20671376e+01, -3.46827751e+01, -6.59155705e+00,
       -4.41425286e+00, -1.73401904e+00, -7.48270514e-01, -3.85449622e-01,
       -2.42953553e-01, -1.79007989e-01, -9.37644020e-02, -6.54108602e-02,
       -3.10540949e-02, -2.23882917e-02, -1.45221241e-02, -7.92699605e-03,
       -6.03447007e-03, -3.25261003e-03, -2.46505777e-03, -1.88400569e-03,
       -1.43512622e-03, -1.05572277e-03, -8.50986792e-04, -4.32349680e-04,
       -1.78862165e-04, -1.06403736e-04, -1.02982781e-04, -8.22789398e-05,
        1.04382763e+00,  2.16725686e+03])