diff --git a/tutorials/Higgs_Boson.ipynb b/tutorials/Higgs_Boson.ipynb index 003541e6..9dbd6bed 100644 --- a/tutorials/Higgs_Boson.ipynb +++ b/tutorials/Higgs_Boson.ipynb @@ -38,25 +38,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2020-08-31 09:16:09-- https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz\n", - "Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252\n", - "Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 2816407858 (2.6G) [application/x-httpd-php]\n", - "Saving to: ‘./HIGGS.csv.gz’\n", - "\n", - "HIGGS.csv.gz 100%[===================>] 2.62G 102MB/s in 29s \n", - "\n", - "2020-08-31 09:16:38 (93.5 MB/s) - ‘./HIGGS.csv.gz’ saved [2816407858/2816407858]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# This is a 2.7 GB file.\n", "# Please make sure you have enough space available before\n", @@ -71,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -141,30 +123,30 @@ "output_type": "stream", "text": [ "\n", - "Generation 1 - Current best internal CV score: 0.7103025000000001\n", - "Generation 2 - Current best internal CV score: 0.7103025000000001\n", - "Generation 3 - Current best internal CV score: 0.725755\n", - "Generation 4 - Current best internal CV score: 0.727995\n", - "Generation 5 - Current best internal CV score: 0.727995\n", - "Generation 6 - Current best internal CV score: 0.730315\n", - "Generation 7 - Current best internal CV score: 0.730315\n", - "Generation 8 - Current best internal CV score: 0.730315\n", - "Generation 9 - Current best internal CV score: 0.7308699999999999\n", - "Generation 10 - Current best internal CV score: 0.7347775\n", - "Best pipeline: XGBClassifier(input_matrix, alpha=1, learning_rate=0.1, max_depth=8, min_child_weight=19, n_estimators=100, nthread=1, subsample=0.8, tree_method=gpu_hist)\n", - "CPU times: user 5min 19s, sys: 54.7 s, total: 6min 14s\n", - "Wall time: 6min 17s\n" + "Generation 1 - Current best internal CV score: 0.730335\n", + "Generation 2 - Current best internal CV score: 0.730335\n", + "Generation 3 - Current best internal CV score: 0.730335\n", + "Generation 4 - Current best internal CV score: 0.735615\n", + "Generation 5 - Current best internal CV score: 0.7359375\n", + "Generation 6 - Current best internal CV score: 0.7359375\n", + "Generation 7 - Current best internal CV score: 0.7359375\n", + "Generation 8 - Current best internal CV score: 0.7359375\n", + "Generation 9 - Current best internal CV score: 0.736115\n", + "Generation 10 - Current best internal CV score: 0.7361850000000001\n", + "Best pipeline: XGBClassifier(ZeroCount(SelectPercentile(ZeroCount(input_matrix), percentile=99)), alpha=1, learning_rate=0.1, max_depth=9, min_child_weight=11, n_estimators=100, nthread=1, subsample=0.7000000000000001, tree_method=gpu_hist)\n", + "CPU times: user 8min 15s, sys: 1min 17s, total: 9min 33s\n", + "Wall time: 9min 39s\n" ] }, { "data": { "text/plain": [ "TPOTClassifier(config_dict='TPOT cuML', cv=2, generations=10,\n", - " log_file=,\n", + " log_file=,\n", " population_size=10, random_state=12, verbosity=2)" ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -193,16 +175,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.73669\n", - "CPU times: user 770 ms, sys: 31.7 ms, total: 802 ms\n", - "Wall time: 801 ms\n" + "0.73853\n", + "CPU times: user 950 ms, sys: 36.2 ms, total: 986 ms\n", + "Wall time: 984 ms\n" ] } ], @@ -308,12 +290,12 @@ "metadata": {}, "source": [ "## Performance Comparison\n", - "With the example configuration above (10 generations, population size of 10, two-fold cross validation), the `TPOT cuML` configuration provides a significant speedup while achieving essentially equivalent accuracy.\n", + "With the example configuration above (10 generations, population size of 10, two-fold cross validation), the `TPOT cuML` configuration provided a significant speedup while achieving essentially equivalent accuracy.\n", "\n", - "The GPU-accelerated version achieves an out-of-sample accuracy of 73.7% in **seven minutes**, while the default version achieves an accuracy of 73.8% after more than **five hours**. This kind of speedup also means you can create larger evolutionary search strategies while **still** obtaining faster results.\n", + "The GPU-accelerated version achieved an out-of-sample accuracy of 73.85% in **fewer than 10 minutes**, while the default version achieved an accuracy of 73.79% after more than **five hours** (specific performance values will vary across runs). This kind of speedup also means you can create larger evolutionary search strategies while **still** obtaining faster results.\n", "\n", "### Hardware\n", - "The following hardware was used for this test. Results and speedups will vary.\n", + "The following hardware was used for this test. Results and speedups will vary across systems and configurations.\n", "\n", "- CPU: 2x Intel(R) Xeon(R) Platinum 8168 CPU @ 2.70GHz (24 cores)\n", "- GPU: 1x NVIDIA V100 32GB"