[An brief introduction on how to perform feature selection with scikit-feature](https://jundongl.github.io/scikit-feature/tutorial.html)

In [1]:
import scipy.io

In [2]:
mat = scipy.io.loadmat("COIL20.mat")

In [3]:
X=mat['X']
print(X)

[[0.01568627 0.01568627 0.01568627 ... 0.01568627 0.01568627 0.01568627]
 [0.01960784 0.01960784 0.01960784 ... 0.01960784 0.01960784 0.01960784]
 [0.01568627 0.01568627 0.01568627 ... 0.01568627 0.01568627 0.01568627]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [4]:
y = mat['Y'][:, 0] 
print(y)

[ 1  1  1 ... 20 20 20]


In [5]:
import numpy as np
n_labels = np.shape(y)
print(n_labels)

(1440,)


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

In [7]:
from skfeature.function.similarity_based import fisher_score

In [8]:
score = fisher_score.fisher_score(X_train, y_train)
print(score)

[13.96904931  0.5376816   0.19923194 ...  3.71944606 14.01720752
 14.05075518]


In [9]:
idx = fisher_score.feature_ranking(score)

In [10]:
num_fea = 5
selected_features_train = X_train[:, idx[0:num_fea]]
selected_features_test = X_test[:, idx[0:num_fea]]
print(selected_features_train)

[[0.         0.         0.         0.         0.        ]
 [0.02352941 0.02352941 0.02352941 0.02352941 0.02352941]
 [0.         0.         0.         0.         0.        ]
 ...
 [0.01568627 0.01568627 0.01568627 0.01568627 0.01568627]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]]


In [11]:

from sklearn import svm

clf = svm.LinearSVC()


In [12]:
clf.fit(selected_features_train, y_train)

LinearSVC()

In [13]:
y_predict = clf.predict(selected_features_test)
print(y_predict)

[19 19  2 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19
 19 19 19 19 19 19  2 19 19 19 19 19 19 19 19  2 19 19 19 19  2 19  2  2
 19 19 19 19  2 19 19 19 19 19 19 19 19 19 19 19 19  2 19 19  2 19 19 19
 19 19 19 19  2 19 19 19 19 19 19 19 19 19 19 19 19  2  2 19 19 19 19 19
  2 19  2 19 19 19 19  2  2  5 19 19 19 19 19 19  2 19 19 19 19 19  2 19
 19 19 19 19  2 19  1 19 19  2 19 19 19 19 19  2  2 19 19 19 19 19 19 19
 19 19 19 19 19 19 19 19  2 19 19 19 19 19 19 19 19 19  2 19 19 19 19 19
 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19
 19 19 19 19 19 19 19 19 19 19 19 19 19 19  2 19 19 19 19 19 19 19 19 19
 19 19  2 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19 19
 19 19 19 19 19 19 19 19 19 19 19 19  2 19 19 19  2 19 19 19 19  2 19 19
 19 19  2 19 19 19 19 19 19 19  2 19 19 19 19 19 19 19 19 19 19 19 19 19]


In [14]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test, y_predict)
print(acc)


0.09375


## For Unsupervised Learning Problems

In [15]:
from skfeature.utility import construct_W

kwargs_W = {"metric":"euclidean","neighbor_mode":"knn","weight_mode":"heat_kernel","k":5,'t':1}

W = construct_W.construct_W(X, **kwargs_W)

# Compute and output the score of each feature

from skfeature.function.similarity_based import lap_score

score = lap_score.lap_score(X, W=W)

print(score)


[0.01269462 0.00637613 0.00333286 ... 0.0123851  0.01271441 0.01269681]


In [16]:
idx = lap_score.feature_ranking(score)

In [17]:
num_fea = 5
selected_features = X[:, idx[0:num_fea]]

#### Performance Evaluation

Here, we use normalized mutual infomation score (NMI) and accuracy (ACC) to measure the performance of unsupervised feature selection algorithm Laplacian Score. Usually, the parameter n_clusters is set to be the same as the number of classes in the ground truth.

In [18]:
!pip install scikit-learn==0.22.2

Collecting scikit-learn==0.22.2
  Downloading scikit_learn-0.22.2-cp38-cp38-manylinux1_x86_64.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 9.9 MB/s eta 0:00:01
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.24.0
    Uninstalling scikit-learn-0.24.0:
      Successfully uninstalled scikit-learn-0.24.0
Successfully installed scikit-learn-0.22.2


In [19]:
from skfeature.utility import unsupervised_evaluation

import numpy as np

num_cluster = len(np.unique(y))

# print(num_cluster)

# nmi,acc=unsupervised_evaluation.evaluation(X_selected=selected_features,n_clusters=num_cluster,y=y)

# print(nmi)

#print(acc)



ImportError: cannot import name '_raise_dep_warning_if_not_pytest' from 'sklearn.utils.deprecation' (/opt/conda/lib/python3.8/site-packages/sklearn/utils/deprecation.py)