From a46e70c94e0ed96cc3160d370df35d3bc0bc7888 Mon Sep 17 00:00:00 2001
From: Michael Garod <mgarod@gmail.com>
Date: Mon, 26 Oct 2020 10:30:04 -0400
Subject: [PATCH] Add param to limit Feature Importances to top_n (#1102)

Feature importances now visualizes the top or bottom n features (using a negative argument), ensuring that the absolute magnitude of the features are respected during selection. The filter works for relative, real, and stacked values of the importance chart. Thank you to @mgarod for this excellent contribution!

Co-authored-by: Michael Garod <michael.garod@ibm.com>
---
 docs/api/model_selection/importances.rst      |  44 ++++++++++++
 .../test_importances/test_topn.png            | Bin 0 -> 3493 bytes
 .../test_importances/test_topn_negative.png   | Bin 0 -> 3449 bytes
 .../test_topn_negative_stacked.png            | Bin 0 -> 3581 bytes
 .../test_importances/test_topn_stacked.png    | Bin 0 -> 3597 bytes
 .../test_model_selection/test_importances.py  |  66 ++++++++++++++++++
 yellowbrick/model_selection/importances.py    |  63 ++++++++++++++++-
 7 files changed, 170 insertions(+), 3 deletions(-)
 create mode 100644 tests/baseline_images/test_model_selection/test_importances/test_topn.png
 create mode 100644 tests/baseline_images/test_model_selection/test_importances/test_topn_negative.png
 create mode 100644 tests/baseline_images/test_model_selection/test_importances/test_topn_negative_stacked.png
 create mode 100644 tests/baseline_images/test_model_selection/test_importances/test_topn_stacked.png

diff --git a/docs/api/model_selection/importances.rst b/docs/api/model_selection/importances.rst
index 325bf337c..d2f58441f 100644
--- a/docs/api/model_selection/importances.rst
+++ b/docs/api/model_selection/importances.rst
@@ -111,6 +111,50 @@ Taking the mean of the importances may be undesirable for several reasons. For e
     viz.fit(X, y)
     viz.show()
 
+Top and Bottom Feature Importances
+----------------------------------
+
+It may be more illuminating to the feature engineering process to identify the most or least informative features. To view only the N most informative features, specify the ``topn`` argument to the visualizer. Similar to slicing a ranked list by their importance, if ``topn`` is a postive integer, then the most highly ranked features are used. If ``topn`` is a negative integer, then the lowest ranked features are displayed instead.
+
+.. plot::
+    :context: close-figs
+    :alt: Coefficient importances for LASSO regression
+
+    from sklearn.linear_model import Lasso
+    from yellowbrick.datasets import load_concrete
+    from yellowbrick.model_selection import FeatureImportances
+
+    # Load the regression dataset
+    dataset = load_concrete(return_dataset=True)
+    X, y = dataset.to_data()
+
+    # Title case the feature for better display and create the visualizer
+    labels = list(map(lambda s: s.title(), dataset.meta['features']))
+    viz = FeatureImportances(Lasso(), labels=labels, relative=False, topn=3)
+
+    # Fit and show the feature importances
+    viz.fit(X, y)
+    viz.show()
+
+Using ``topn=3``, we can identify the three most informative features in the concrete dataset as ``splast``, ``cement``, and ``water``. This approach to visualization may assist with *factor analysis* - the study of how variables contribute to an overall model. Note that although ``water`` has a negative coefficient, it is the magnitude (absolute value) of the feature that matters since we are closely inspecting the negative correlation of ``water`` with the strength of concrete. Alternatively, ``topn=-3`` would reveal the three least informative features in the model. This approach is useful to model tuning similar to :doc:`rfecv`, but instead of automatically removing features, it would allow you to identify the lowest-ranked features as they change in different model instantiations. In either case, if you have many features, using ``topn`` can significantly increase the visual and analytical capacity of your analysis.
+
+The ``topn`` parameter can also be used when ``stacked=True``. In the context of stacked feature importance graphs, the information of a feature is the width of the entire bar, or the sum of the absolute value of all coefficients contained therein.
+
+.. plot::
+    :context: close-figs
+    :alt: Stacked per-class importances with Logistic Regression
+
+    from yellowbrick.model_selection import FeatureImportances
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.datasets import load_iris
+
+    data = load_iris()
+    X, y = data.data, data.target
+
+    model = LogisticRegression(multi_class="auto", solver="liblinear")
+    viz = FeatureImportances(model, stack=True, relative=False, topn=-3)
+    viz.fit(X, y)
+    viz.show()
 
 Discussion
 ----------
diff --git a/tests/baseline_images/test_model_selection/test_importances/test_topn.png b/tests/baseline_images/test_model_selection/test_importances/test_topn.png
new file mode 100644
index 0000000000000000000000000000000000000000..75fe59b7ee62f5b38e5f010cb4edc349fb19b317
GIT binary patch
literal 3493
zcmeAS@N?(olHy`uVBq!ia0y~yU{+vYU{d2?1B$GUdv_2>S(dm)lmzFem6RtIr84*?
zmK5aVm*iw7DU_ua6=&w>8S5GA8R;lwl#~<{Tj}fP!WHP{7p3d#cL{O>4dE>Ch%9Dc
z;7<f$#ufI<Gl4b=dAc};RLpsMdt+Bjs!aRC_boGZ+hQ6sFRzIW%22KlY?}L^H)COM
zuenl%;54(@Nu0&38#i1rII2^%MoB3;#sAp%Ge@2`>X!f5{HLq(=Z(yH&ntmevB<5h
z1yUdWD>^VRH1seqGB6l1u`n>4;1FP7P*5I9f@jj+GXHyfv|BuR-t&!j^RC;mGB8La
z-?@9YH*?mp&FSaw{oH)+9c#+G=lkm)<evR((|3IF-46<7hlL>M`Igzib@ukV#V5Qi
zv+g_oxZ>BBmp9&)WzRp{Y9Ygyy#M}fUj8I=okTDy^xM62?_S%R>2vRLa{z<ypO69r
zgM(WG0|SF3BNGEd3JXwlf*`d7XP&*qekc9k)2kIfUmnk%e~`6m=QqX}mEDP)AoOsd
z+}|&sZ>i6jBR=FJZAhoM5#{*VTlsgK|8j8h$A|0p_ukFh{!KHc<M2a+Z@>TU*tc(+
z^u1YkH?)A!k=b|m<nN(XRv<rnrEUJk_}%i~&#FHKpU>V(FKEomzqYTzJH`C@<J128
zqJOh6{7a5=80w|qh)R<~s;0`hX7|7@ykD;W{QP>`vfX!YEiX{yYd?JB{rAVee%;Ee
z$Sz}Mu+^tll|g<!L9fMbuBkmx8@Jc4uYcdKkB<94AO9Zz$Nf431H<x1tV6S<Fq%gO
zf5ri|=H7jN^6~1AzqijH%iA9PTZDn(K?^<Vm6^{f=UumZG(Rsd@0iW`<lT4sGG|@0
zYv(pR|9Qvm-J7N5j^srg0;5NB%8G5|j+^*C{`vX&`#+Jhh3^yx<^S7}{eR+4oy#V@
zt5>hydj7$o8@%2SH0N^OvP`R+-#>4zW(2mh@5TRPAI(AJ_A}}k>(ZV_A9tE=4lIip
NJYD@<);T3K0RTL|vPb{`

literal 0
HcmV?d00001

diff --git a/tests/baseline_images/test_model_selection/test_importances/test_topn_negative.png b/tests/baseline_images/test_model_selection/test_importances/test_topn_negative.png
new file mode 100644
index 0000000000000000000000000000000000000000..4983b418eb70e278dc53e36bd2d07e89f1d15b2b
GIT binary patch
literal 3449
zcmeAS@N?(olHy`uVBq!ia0y~yU{+vYU{d2?1B$GUdv_2>S(dm)lmzFem6RtIr84*?
zmK5aVm*iw7DU_ua6=&w>8S5GA8R;lwl#~<{Tj}fP!WHP{7p3d#cL{O>4dE>Ch%9Dc
z;7<f$#ufI<GZ`3oo_V@BhE&XXd)qK8Csn5X;rp1hAf9s8duG0GgaV`wx^k2?Nndo5
zmNr+~F?9*Y^feEnBC<RT1N*OXDhak&SDv3?!@q>Pz5b*2r%w|z=RL0k+QSmJwiZZz
zDB=)cU{FwYU|?wIVPIrnFk)h1U^p?P1UE<iZm8e2YnRk#oBp?DyT55NFdUG0{O@0#
z&t;Qy=g;5&9qs;|$>{v&!~FWUZO$j}zki!)+X3e2ghXzgoP~ua_Mh*6$C|SHZr|TJ
zdq02w<9XYof6IiGl$V?5<>lotMn7oO-LQdGY}<t$`}W!0*xvV!6&OMl`&c*_7$yiR
zFfcf{H2?!rl97plA%$v!X`8<>_Rp-ZGrw2&?c}ZW0+oB~Y8l)Qi$&aE;*L)E7_<Gu
z#kcRX=NA;j4ds{{(#dZ4J3i-{-2=nR?D@yq_y4gf+kN-e@(qi2@7`TlTx@*q+_{{G
zt($?_;=mzbqCE7!UwrVDBVhOcw$DG_KmESl&F}kvefh2a+YDIfz28HFEK7@GVAM?N
z7OV6K&zoy%4<!4=?Ujp<sWArU8esLn{sdHO46=H4_E!FiGrMp9{qf`V{C}6<|ND4&
zx_=EX14DtvXst3t+d@O2QUaIC`Em1m?(h3u^QLU~R)1i#lkWsKP1{r8j6W09_I<1T
z@7<?QMQ1+SY`gt-#<I+}#Ss^6&I6l^_rx}^%7L3#iQK)ww&1c%DRFV}_kY)(WqfD3
zBGrQJh>7pxCr?t|8e0I{kDyu)*c8j~xm>aV+=2u*3jgra-WODEY0{%eB&wPGk7=p!
WEd}Q~r8Ho9$>8bg=d#Wzp$Py8R(aR}

literal 0
HcmV?d00001

diff --git a/tests/baseline_images/test_model_selection/test_importances/test_topn_negative_stacked.png b/tests/baseline_images/test_model_selection/test_importances/test_topn_negative_stacked.png
new file mode 100644
index 0000000000000000000000000000000000000000..4d81f699783ad9faeedb2f57d7ffc8b7fc0b8eee
GIT binary patch
literal 3581
zcmeAS@N?(olHy`uVBq!ia0y~yU{+vYU{d2?1B$GUdv_2>S(dm)lmzFem6RtIr84*?
zmK5aVm*iw7DU_ua6=&w>8S5GA8R;lwl#~<{Tj}fP!WHP{7p3d#cL{O>4dE>Ch%9Dc
z;7<f$#ufI<Gl4c{dAc};RLpsM+pufyb=mfZpJz%o>c_Rsl3wV<<j9qKXzf9T$4b%#
z4^?-ZWQu7?Vb8nF7NjEB)8)8J+A!Ww`SZ?6Z+OHXJ-Ar?u4i64V{tuCeeAQR3sP@y
z%LUrVVmH$kNd2hb5MW?XP<CKoXy{>JWMD92Vqsu7F{A{G&8iRhu8-R*b$eUy=g*&S
z1~V{NBwoLMJvnmP$0sKz-wgg?Kj#kjiEX*bx3}Fiw&MaSTfJI)fBwB<VVTEzJ5Kr*
zoax#9q5SdPk|KSD_i=V|_H{OuC;x6=_u4{+@A1RK?aAA3pPhG@vj9o?jy-#P?AmXa
z0(CGj9QdnB+we=@%wF*Q&)fO;&&@l)URHUr=G=9An=HGI`)}j#*PF(jmpgc}Q25T5
zllK2?tC`9h#D{#84(aqbtQ`+dihC;l-F?GZqwsoH-{FS_-+uqS@wQC<&*wOs{}1F3
z>PB2ImD%^hxlDY{zF)hWc0arud*J^g*1=b-0KGD&Yx?}!J^Q<^Z)P_*Ch_>|{rxj-
zAMQQfeUsb5H2(jGjGm8a-+27ay}f(Hzy9B+Yjt7;xArtJFfd3mGBGfuuy8OiOb`SX
z01j@Wf};UKd1A1fZTld^=J&_9Z#Z|B#vXrIAo2Te-J7!AH;e76&ZqH9<TX9MRw~<X
zUn>6RP&w;|>I%j|S7V>K`G&FJ{@y=t-u*SMwykDRf8JmJ>$Sw=vu`B#pNzNLA1(gp
z(a-YD>I_RpGty`wLaPE4n2{LX?M}DfH`n%|c{H#-;%nb{|NZg2?Xzv$g*UEy{iA<3
z!-Hw`$Ud8MZf`p)_c2>sTpZZ^{P^Ht^UdIQdvk9e`}*~(g}uG}=}l+Qo0hwFNuA!*
zv)q5a+Wg~hEr3OWT71gf)W{aO|3`fbdUiihGe2l({4Oa{tls*bqxyFyV99$Y{vSWB
g!gBamaCK~(ML$=_bAG!GtlAkoUHx3vIVCg!04zzvwEzGB

literal 0
HcmV?d00001

diff --git a/tests/baseline_images/test_model_selection/test_importances/test_topn_stacked.png b/tests/baseline_images/test_model_selection/test_importances/test_topn_stacked.png
new file mode 100644
index 0000000000000000000000000000000000000000..99151181176957a38290bd253e70e986175c0f3d
GIT binary patch
literal 3597
zcmeAS@N?(olHy`uVBq!ia0y~yU{+vYU{d2?1B$GUdv_2>S(dm)lmzFem6RtIr84*?
zmK5aVm*iw7DU_ua6=&w>8S5GA8R;lwl#~<{Tj}fP!WHP{7p3d#cL{O>4dE>Ch%9Dc
z;7<f$#ufI<Gl4c%dAc};RLpsM+pud%s@(C9%kR8v)cqzgdCFlHV}qIO26syw3qEpW
zZ*Vubz%0SREqCBbNn-QV)_DsuUn?D!F410pP^n|0ZPfkc%kG_j&Jb(=%ipoS^2Oa<
z>s|wG<k(|g4Wu^6GO;i)oZt{(U{FwYU|?wIVPIrnFd9;VF=ypIWUr6kFZX&^-|N?}
zZw50kSS0iF^Cxe=-7BsicV^xp<vDk`PwZNk9DDtyvEI2|>mL96ckg`}^PMVdFp|FW
z^W&epKk}@0i%*yzZ#U1r-mdawYUH$3nf~J+zr4J>@&5bt^AB5)mFMN>r~5z91)9sj
zz)-N01xQX1RA69maBBdDsw5*5149bc1fOMnW6--j|KG>2Z#bpD{e8p8vhUaHyF2#I
z-22F6UhR*cZ#dJR=Y3-kdjDx|{NA65-&pQ3#tqT99MTDKgg73U-wvey{@C50=6_>O
zYUHtx6*BFI6W@IQeY4nN;s5);{+{0Zb@ur*eGBP**Ka>Km(waf_(~U$AAbJ2UjNUs
znrZ*xw`Vi!|Eb@%v3|?`VfLTzr}y{ozNvg-$JeLT@9*C~JC8yAIqQ%wB}Yi&9D3e7
zbM+15ob~<oKXQNt_n}QXk1K5Y4nH*b_WSQiyXTMj{k|>!**h&&^7#I}^`D}R<G=1r
zTQ~nb&-8Pknum6E4q5fsx-@<Z@&CV1o7IVyZ)VRqa(DaubMp-H^3(Jed^l--{+Pu5
z^Ya{^eZT+fxW3&l<}!vow)(WHGs$v4)oScf+o{_EA}c(>sd4?iKL*u@JatbS^&Nj~
zA;XuvAD9M<KgRj~$=QCd4_JGBw%`3#>_;5)^dof0P%}ZT?>B<`ONxt)ua(OB&$F3n
z`*hKo*H&_Ja>u@Y{d(hV+2-d3PI-G}!HD<WpNj9_zk?dG)Ai%e0UNU5w(h=y<r{Cm
zb-C-d<7*ihneF)e_-&bWz4gB4bMHV^)sC2d?4vanxqTXYMsq9X+2{Qn-vHb444$rj
JF6*2UngG0j=0N}e

literal 0
HcmV?d00001

diff --git a/tests/test_model_selection/test_importances.py b/tests/test_model_selection/test_importances.py
index a556edcbf..8db2d7a5a 100644
--- a/tests/test_model_selection/test_importances.py
+++ b/tests/test_model_selection/test_importances.py
@@ -434,6 +434,72 @@ def test_with_fitted(self):
             oz.fit(X, y)
             mockfit.assert_called_once_with(X, y)
 
+    def test_topn_stacked(self):
+        """
+        Test stack plot with only the three most important features by sum of
+        each feature's importance across all classes
+        """
+        X, y = load_iris(True)
+
+        viz = FeatureImportances(
+            LogisticRegression(solver="liblinear", random_state=222),
+            stack=True, topn=3
+        )
+        viz.fit(X, y)
+        viz.finalize()
+
+        npt.assert_equal(viz.feature_importances_.shape, (3, 3))
+        # Appveyor and Linux conda non-text-based differences
+        self.assert_images_similar(viz, tol=17.5)
+
+    def test_topn_negative_stacked(self):
+        """
+        Test stack plot with only the three least important features by sum of
+        each feature's importance across all classes
+        """
+        X, y = load_iris(True)
+
+        viz = FeatureImportances(
+            LogisticRegression(solver="liblinear", random_state=222),
+            stack=True, topn=-3
+        )
+        viz.fit(X, y)
+        viz.finalize()
+
+        npt.assert_equal(viz.feature_importances_.shape, (3, 3))
+        # Appveyor and Linux conda non-text-based differences
+        self.assert_images_similar(viz, tol=17.5)
+
+    def test_topn(self):
+        """
+        Test plot with only top three important features by absolute value
+        """
+        X, y = load_iris(True)
+
+        viz = FeatureImportances(
+            GradientBoostingClassifier(random_state=42), topn=3
+        )
+        viz.fit(X, y)
+        viz.finalize()
+
+        # Appveyor and Linux conda non-text-based differences
+        self.assert_images_similar(viz, tol=17.5)
+
+    def test_topn_negative(self):
+        """
+        Test plot with only the three least important features by absolute value
+        """
+        X, y = load_iris(True)
+
+        viz = FeatureImportances(
+            GradientBoostingClassifier(random_state=42), topn=-3
+        )
+        viz.fit(X, y)
+        viz.finalize()
+
+        # Appveyor and Linux conda non-text-based differences
+        self.assert_images_similar(viz, tol=17.5)
+
 
 ##########################################################################
 ## Mock Estimator
diff --git a/yellowbrick/model_selection/importances.py b/yellowbrick/model_selection/importances.py
index b9e3ad1af..6719af91b 100644
--- a/yellowbrick/model_selection/importances.py
+++ b/yellowbrick/model_selection/importances.py
@@ -27,7 +27,7 @@
 from yellowbrick.base import ModelVisualizer
 from yellowbrick.style.colors import resolve_colors
 from yellowbrick.utils import is_dataframe, is_classifier
-from yellowbrick.exceptions import YellowbrickTypeError, NotFitted, YellowbrickWarning
+from yellowbrick.exceptions import YellowbrickTypeError, NotFitted, YellowbrickWarning, YellowbrickValueError
 
 ##########################################################################
 ## Feature Visualizer
@@ -92,6 +92,10 @@ class FeatureImportances(ModelVisualizer):
         modified. If 'auto' (default), a helper method will check if the estimator
         is fitted before fitting it again.
 
+    topn : int, default=None
+        Display only the top N results with a positive integer, or the bottom N
+        results with a negative integer. If None or 0, all results are shown.
+
     kwargs : dict
         Keyword arguments that are passed to the base class and may influence
         the visualization as defined in other Visualizers.
@@ -128,6 +132,7 @@ def __init__(
         colors=None,
         colormap=None,
         is_fitted="auto",
+        topn=None,
         **kwargs
     ):
         # Initialize the visualizer bases
@@ -144,6 +149,7 @@ def __init__(
             stack=stack,
             colors=colors,
             colormap=colormap,
+            topn=topn
         )
 
     def fit(self, X, y=None, **kwargs):
@@ -218,12 +224,33 @@ def fit(self, X, y=None, **kwargs):
         else:
             self.features_ = np.array(self.labels)
 
+        if self.topn and self.topn > self.features_.shape[0]:
+            raise YellowbrickValueError(
+                "topn '{}' cannot be greater than the number of "
+                "features '{}'".format(self.topn, self.features_.shape[0])
+            )
+
         # Sort the features and their importances
         if self.stack:
-            sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
+            if self.topn:
+                abs_sort_idx = np.argsort(
+                    np.sum(np.absolute(self.feature_importances_), 0)
+                )
+                sort_idx = self._reduce_topn(abs_sort_idx)
+            else:
+                sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
+
             self.features_ = self.features_[sort_idx]
             self.feature_importances_ = self.feature_importances_[:, sort_idx]
         else:
+            if self.topn:
+                abs_sort_idx = np.argsort(np.absolute(self.feature_importances_))
+                abs_sort_idx = self._reduce_topn(abs_sort_idx)
+
+                self.features_ = self.features_[abs_sort_idx]
+                self.feature_importances_ = self.feature_importances_[abs_sort_idx]
+
+            # Sort features by value (sorting a second time if topn)
             sort_idx = np.argsort(self.feature_importances_)
             self.features_ = self.features_[sort_idx]
             self.feature_importances_ = self.feature_importances_[sort_idx]
@@ -276,7 +303,7 @@ def finalize(self, **kwargs):
         # Set the title
         self.set_title(
             "Feature Importances of {} Features using {}".format(
-                len(self.features_), self.name
+                self._get_topn_title(), self.name
             )
         )
 
@@ -346,6 +373,30 @@ def _is_fitted(self):
         """
         return hasattr(self, "feature_importances_") and hasattr(self, "features_")
 
+    def _reduce_topn(self, arr):
+        """
+        Return only the top or bottom N items within a sliceable array/list.
+
+        Assumes that arr is in ascending order.
+        """
+        if self.topn > 0:
+            arr = arr[-self.topn:]
+        elif self.topn < 0:
+            arr = arr[:-self.topn]
+        return arr
+
+    def _get_topn_title(self):
+        """
+        Return an appropriate title for the plot: Top N, Bottom N, or N
+        """
+        if self.topn:
+            if self.topn > 0:
+                return "Top {}".format(len(self.features_))
+            else:
+                return "Bottom {}".format(len(self.features_))
+        else:
+            return str(len(self.features_))
+
 
 ##########################################################################
 ## Quick Method
@@ -365,6 +416,7 @@ def feature_importances(
     colors=None,
     colormap=None,
     is_fitted="auto",
+    topn=None,
     show=True,
     **kwargs
 ):
@@ -431,6 +483,10 @@ def feature_importances(
         call ``plt.savefig`` from this signature, nor ``clear_figure``. If False, simply
         calls ``finalize()``
 
+    topn : int, default=None
+        Display only the top N results with a positive integer, or the bottom N
+        results with a negative integer. If None or 0, all results are shown.
+
     kwargs : dict
         Keyword arguments that are passed to the base class and may influence
         the visualization as defined in other Visualizers.
@@ -452,6 +508,7 @@ def feature_importances(
         colors=colors,
         colormap=colormap,
         is_fitted=is_fitted,
+        topn=topn,
         **kwargs
     )