Skip to content

Cannot extract important variables with accuracy_decrease  #3

@gundalav

Description

@gundalav

I have the following importance_frame:

importance_frame <- structure(list(variable = structure(1:20, .Label = c(
  "A", "C",
  "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R",
  "S", "T", "V", "W", "Y"
), class = "factor"), mean_min_depth = c(
  1.9761861386314,
  2.5220853029533, 2.15539883255869, 1.61935396654558, 1.45123463631321,
  1.53296953170083, 1.77518115811586, 1.52151167552988, 1.89182019096144,
  2.14429040818413, 1.26326405034901, 1.93502763567771, 1.26898183744519,
  2.02060547195198, 1.54217481302459, 1.67384650439192, 1.5485857685783,
  2.09727178410599, 2.75747046937195, 2.35864404092358
), times_a_root = c(
  23.4,
  5.5, 13.3, 27.9, 39.3, 31.3, 29.7, 34.2, 24.2, 13, 43, 22.7,
  45.3, 16.8, 31.5, 30.1, 33.5, 19.3, 1.75, 14.6
), no_of_nodes = c(
  68.1,
  32.6, 62.2, 103.2, 103.3, 104.7, 75.6, 105.7, 72.4, 64.6, 118.4,
  73.6, 116.6, 74.5, 104.6, 95.6, 103.2, 60.3, 8.875, 36.1
), no_of_trees = c(
  65.1,
  32.3, 59.8, 96.1, 94.7, 99.9, 74.8, 100.6, 69.4, 62.8, 111.2,
  71.2, 108.3, 72.4, 98.8, 90, 97.6, 58.4, 8.875, 35.9
), p_value = c(
  0.669119230058558,
  0.999999783867775, 0.824720803698331, 0.10305110839386, 0.160596787513604,
  0.141119826647113, 0.52735342045046, 0.162403671879659, 0.713272963278132,
  0.817225145266696, 0.0104446472288876, 0.546649197487473, 0.0330726857615005,
  0.672936592800508, 0.0310135225001855, 0.182169849737794, 0.274905137508873,
  0.873388429679101, 1, 0.999021554764331
), gini_decrease = c(
  0.233831386391386,
  0.0886505361305361, 0.185330422910423, 0.358267377067377, 0.401108053058053,
  0.397634655344655, 0.308835228105228, 0.389097318237318, 0.250707615717616,
  0.191033563103563, 0.476535763125763, 0.249038827838828, 0.47133199023199,
  0.243902473082473, 0.372547632367632, 0.33646759018759, 0.382999447219447,
  0.203790450660451, 0.0253906843156843, 0.133164814074814
), accuracy_decrease = c(
  -0.00445119047619048,
  -0.00289380952380952, -0.00482809523809524, -0.00530904761904762,
  0.0051652380952381, 0.00616785714285714, 0.00289238095238095,
  -0.00079095238095238, -0.00239095238095238, -0.00648809523809524,
  0.00383690476190476, -0.00413857142857143, 0.00331214285714286,
  -0.00290619047619048, -0.00131714285714286, -0.0046781746031746,
  0.00534214285714286, -0.00532571428571429, 0, -0.000374047619047619
)), class = "data.frame", .Names = c(
  "variable", "mean_min_depth",
  "times_a_root", "no_of_nodes", "no_of_trees", "p_value", "gini_decrease",
  "accuracy_decrease"
), row.names = c(NA, -20L), na.action = structure(c(
  80L,
  180L
), .Names = c("80", "180"), class = "omit"))

importance_frame
#>    variable mean_min_depth times_a_root no_of_nodes no_of_trees    p_value
#> 1         A       1.976186        23.40      68.100      65.100 0.66911923
#> 2         C       2.522085         5.50      32.600      32.300 0.99999978
#> 3         D       2.155399        13.30      62.200      59.800 0.82472080
#> 4         E       1.619354        27.90     103.200      96.100 0.10305111
#> 5         F       1.451235        39.30     103.300      94.700 0.16059679
#> 6         G       1.532970        31.30     104.700      99.900 0.14111983
#> 7         H       1.775181        29.70      75.600      74.800 0.52735342
#> 8         I       1.521512        34.20     105.700     100.600 0.16240367
#> 9         K       1.891820        24.20      72.400      69.400 0.71327296
#> 10        L       2.144290        13.00      64.600      62.800 0.81722515
#> 11        M       1.263264        43.00     118.400     111.200 0.01044465
#> 12        N       1.935028        22.70      73.600      71.200 0.54664920
#> 13        P       1.268982        45.30     116.600     108.300 0.03307269
#> 14        Q       2.020605        16.80      74.500      72.400 0.67293659
#> 15        R       1.542175        31.50     104.600      98.800 0.03101352
#> 16        S       1.673847        30.10      95.600      90.000 0.18216985
#> 17        T       1.548586        33.50     103.200      97.600 0.27490514
#> 18        V       2.097272        19.30      60.300      58.400 0.87338843
#> 19        W       2.757470         1.75       8.875       8.875 1.00000000
#> 20        Y       2.358644        14.60      36.100      35.900 0.99902155
#>    gini_decrease accuracy_decrease
#> 1     0.23383139     -0.0044511905
#> 2     0.08865054     -0.0028938095
#> 3     0.18533042     -0.0048280952
#> 4     0.35826738     -0.0053090476
#> 5     0.40110805      0.0051652381
#> 6     0.39763466      0.0061678571
#> 7     0.30883523      0.0028923810
#> 8     0.38909732     -0.0007909524
#> 9     0.25070762     -0.0023909524
#> 10    0.19103356     -0.0064880952
#> 11    0.47653576      0.0038369048
#> 12    0.24903883     -0.0041385714
#> 13    0.47133199      0.0033121429
#> 14    0.24390247     -0.0029061905
#> 15    0.37254763     -0.0013171429
#> 16    0.33646759     -0.0046781746
#> 17    0.38299945      0.0053421429
#> 18    0.20379045     -0.0053257143
#> 19    0.02539068      0.0000000000
#> 20    0.13316481     -0.0003740476

And I tried to get the important variables with the following code:

library(randomForestExplainer)
x_measure <- "gini_decrease"
y_measure <- "accuracy_decrease"
important_variables(importance_frame,
  k = 10,
  measures = c(x_measure, y_measure, size_measure)
)

The error I get is this:

Error in `[.data.frame`(rankings, , measures) : 
  undefined columns selected

How can I fix the issue?

Also what is the meaning of negative accuracy_decrease ?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions