Skip to content

Commit

Permalink
feature importance
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Feb 23, 2024
1 parent 17a86b2 commit dbbf4f6
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
43 changes: 43 additions & 0 deletions IngeoML/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2024 Mario Graff Guerrero

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sklearn.metrics import f1_score
import numpy as np


def feature_importance(model, X, y, predictions,
score=None):
"""Estimate the feature importance of the model"""
if score is None:
score = lambda y, hy: f1_score(y, hy, average='macro')
base = score(y, model.predict(X))
hy = np.array([[score(y, j) for j in i]
for i in predictions])
return base - hy


def predict_shuffle_inputs(model, X, times: int=100):
"""Predict X by shuffling all the inputs"""
X_origin = X.copy()
rng = np.random.default_rng()
output = []
for i in range(X.shape[1]):
inner = []
for _ in range(times):
rng.shuffle(X[:, i])
inner.append(model.predict(X))
X = X_origin.copy()
output.append(np.vstack(inner))
return np.array(output)


41 changes: 41 additions & 0 deletions IngeoML/tests/test_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright 2024 Mario Graff Guerrero

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from sklearn.datasets import load_iris
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import LinearSVC
from IngeoML.analysis import feature_importance, predict_shuffle_inputs


def test_feature_importance():
"""Test feature importance"""

X, y = load_iris(return_X_y=True)
split = ShuffleSplit(n_splits=1, train_size=0.7).split(X, y)
tr, vs = next(split)
m = LinearSVC(dual='auto').fit(X[tr], y[tr])
predictions = predict_shuffle_inputs(m, X[vs], times=97)
diff = feature_importance(m, X[vs], y[vs], predictions)
assert diff.shape == (4, 97)


def test_predict_shuffle_inputs():
"""Test predict_shuffle_inputs"""

X, y = load_iris(return_X_y=True)
split = ShuffleSplit(n_splits=1, train_size=0.7).split(X, y)
tr, vs = next(split)
m = LinearSVC(dual='auto').fit(X[tr], y[tr])
hy = predict_shuffle_inputs(m, X[vs])
assert hy.shape == (4, 100, vs.shape[0])

0 comments on commit dbbf4f6

Please sign in to comment.