/
machine_learn.html
executable file
·377 lines (306 loc) · 16.4 KB
/
machine_learn.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Machine learning made easy — machine_learn • healthcareai</title>
<!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
<link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png">
<link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png" />
<link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png" />
<link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png" />
<link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png" />
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.3.7/yeti/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/all.min.css" integrity="sha256-nAmazAk6vS34Xqo0BSrTb+abbtFlgsFK7NKSi6o7Y78=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.7.1/css/v4-shims.min.css" integrity="sha256-6qHlizsOWFskGlwVOKuns+D1nB6ssZrHQrNj1wGplHc=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.4/clipboard.min.js" integrity="sha256-FiZwavyI2V6+EXO1U+xzLG3IKldpiTFf3153ea9zikQ=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/headroom.min.js" integrity="sha256-DJFC1kqIhelURkuza0AvYal5RxMtpzLjFhsnVIeuk+U=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.4/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<!-- docsearch -->
<script src="../docsearch.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/docsearch.js/2.6.1/docsearch.min.css" integrity="sha256-QOSRU/ra9ActyXkIBbiIB144aDBdtvXBcNc3OTNuX/Q=" crossorigin="anonymous" />
<link href="../docsearch.css" rel="stylesheet">
<script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/jquery.mark.min.js" integrity="sha256-4HLtjeVgH0eIB3aZ9mLYF6E8oU5chNdjU6p6rrXpl9U=" crossorigin="anonymous"></script>
<meta property="og:title" content="Machine learning made easy — machine_learn" />
<meta property="og:description" content="Prepare data and train machine learning models." />
<meta property="og:image" content="https://docs.healthcare.ai/logo.png" />
<meta name="twitter:card" content="summary" />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-85609357-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-85609357-1');
</script>
</head>
<body>
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">healthcareai</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">2.4.0</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Vignettes
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/site_only/healthcareai.html">Getting Started</a>
</li>
<li>
<a href="../articles/site_only/db_connections.html">Database Connections</a>
</li>
<li>
<a href="../articles/site_only/deploy_model.html">Deploying a Model</a>
</li>
<li>
<a href="../articles/site_only/best_levels.html">Variables with Many Categories</a>
</li>
<li>
<a href="../articles/site_only/performance.html">Performance with Big Data</a>
</li>
<li>
<a href="../articles/site_only/transitioning.html">Transition from Version 1</a>
</li>
</ul>
</li>
<li>
<a href="../reference/index.html">Functions</a>
</li>
<li>
<a href="../news/index.html">News</a>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://github.com/HealthCatalyst/healthcareai-r">
<span class="fa fa-github"></span>
</a>
</li>
<li>
<a href="https://healthcare-ai.slack.com/">
<span class="fa fa-users"></span>
</a>
</li>
</ul>
<form class="navbar-form navbar-right hidden-xs hidden-sm" role="search">
<div class="form-group">
<input type="search" class="form-control" name="search-input" id="search-input" placeholder="Search..." aria-label="Search for..." autocomplete="off">
</div>
</form>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Machine learning made easy</h1>
<div class="hidden name"><code>machine_learn.Rd</code></div>
</div>
<div class="ref-description">
<p>Prepare data and train machine learning models.</p>
</div>
<pre class="usage"><span class='fu'>machine_learn</span>(
<span class='no'>d</span>,
<span class='no'>...</span>,
<span class='no'>outcome</span>,
<span class='no'>models</span>,
<span class='no'>metric</span>,
<span class='kw'>tune</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='no'>positive_class</span>,
<span class='kw'>n_folds</span> <span class='kw'>=</span> <span class='fl'>5</span>,
<span class='kw'>tune_depth</span> <span class='kw'>=</span> <span class='fl'>10</span>,
<span class='kw'>impute</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>,
<span class='kw'>model_name</span> <span class='kw'>=</span> <span class='kw'>NULL</span>,
<span class='kw'>allow_parallel</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>
)</pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>d</th>
<td><p>A data frame</p></td>
</tr>
<tr>
<th>...</th>
<td><p>Columns to be ignored in model training, e.g. ID columns,
unquoted.</p></td>
</tr>
<tr>
<th>outcome</th>
<td><p>Name of the target column, i.e. what you want to predict.
Unquoted. Must be named, i.e. you must specify <code>outcome = </code></p></td>
</tr>
<tr>
<th>models</th>
<td><p>Names of models to try. See <code><a href='get_supported_models.html'>get_supported_models</a></code>
for available models. Default is all available models.</p></td>
</tr>
<tr>
<th>metric</th>
<td><p>Which metric should be used to assess model performance?
Options for classification: "ROC" (default) (area under the receiver
operating characteristic curve) or "PR" (area under the precision-recall
curve). Options for regression: "RMSE" (default) (root-mean-squared error,
default), "MAE" (mean-absolute error), or "Rsquared." Options for
multiclass: "Accuracy" (default) or "Kappa" (accuracy, adjusted for class
imbalance).</p></td>
</tr>
<tr>
<th>tune</th>
<td><p>If TRUE (default) models will be tuned via
<code><a href='tune_models.html'>tune_models</a></code>. If FALSE, models will be trained via
<code><a href='flash_models.html'>flash_models</a></code> which is substantially faster but produces
less-predictively powerful models.</p></td>
</tr>
<tr>
<th>positive_class</th>
<td><p>For classification only, which outcome level is the
"yes" case, i.e. should be associated with high probabilities? Defaults to
"Y" or "yes" if present, otherwise is the first level of the outcome
variable (first alphabetically if the training data outcome was not already
a factor).</p></td>
</tr>
<tr>
<th>n_folds</th>
<td><p>How many folds to use to assess out-of-fold accuracy? Default
= 5. Models are evaluated on out-of-fold predictions whether tune is TRUE
or FALSE.</p></td>
</tr>
<tr>
<th>tune_depth</th>
<td><p>How many hyperparameter combinations to try? Default = 10.
Value is multiplied by 5 for regularized regression. Ignored if tune is
FALSE.</p></td>
</tr>
<tr>
<th>impute</th>
<td><p>Logical, if TRUE (default) missing values will be filled by
<code><a href='hcai_impute.html'>hcai_impute</a></code></p></td>
</tr>
<tr>
<th>model_name</th>
<td><p>Quoted, name of the model. Defaults to the name of the
outcome variable.</p></td>
</tr>
<tr>
<th>allow_parallel</th>
<td><p>Logical, defaults to FALSE. If TRUE and a parallel
backend is set up (e.g. with <code>doMC</code>) models with support for parallel
training will be trained across cores.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>A model_list object. You can call <code>plot</code>, <code>summary</code>,
<code>evaluate</code>, or <code>predict</code> on a model_list.</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>This is a high-level wrapper function. For finer control of data
cleaning and preparation use <code><a href='prep_data.html'>prep_data</a></code> or the functions it
wraps. For finer control of model tuning use <code><a href='tune_models.html'>tune_models</a></code>.</p>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><div class='input'><span class='co'># These examples take about 30 seconds to execute so aren't run automatically,</span>
<span class='co'># but you should be able to execute this code locally.</span>
<span class='kw'>if</span> (<span class='fl'>FALSE</span>) {
<span class='co'># Split the data into training and test sets</span>
<span class='no'>d</span> <span class='kw'><-</span> <span class='fu'><a href='split_train_test.html'>split_train_test</a></span>(<span class='kw'>d</span> <span class='kw'>=</span> <span class='no'>pima_diabetes</span>,
<span class='kw'>outcome</span> <span class='kw'>=</span> <span class='no'>diabetes</span>,
<span class='kw'>percent_train</span> <span class='kw'>=</span> <span class='fl'>.9</span>)
<span class='co'>### Classification ###</span>
<span class='co'># Clean and prep the training data, specifying that patient_id is an ID column,</span>
<span class='co'># and tune algorithms over hyperparameter values to predict diabetes</span>
<span class='no'>diabetes_models</span> <span class='kw'><-</span> <span class='fu'>machine_learn</span>(<span class='no'>d</span>$<span class='no'>train</span>, <span class='no'>patient_id</span>, <span class='kw'>outcome</span> <span class='kw'>=</span> <span class='no'>diabetes</span>)
<span class='co'># Inspect model specification and performance</span>
<span class='no'>diabetes_models</span>
<span class='co'># Make predictions (predicted probability of diabetes) on test data</span>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span>(<span class='no'>diabetes_models</span>, <span class='no'>d</span>$<span class='no'>test</span>)
<span class='co'>### Regression ###</span>
<span class='co'># If the outcome variable is numeric, regression models will be trained</span>
<span class='no'>age_model</span> <span class='kw'><-</span> <span class='fu'>machine_learn</span>(<span class='no'>d</span>$<span class='no'>train</span>, <span class='no'>patient_id</span>, <span class='kw'>outcome</span> <span class='kw'>=</span> <span class='no'>age</span>)
<span class='co'># Get detailed information about performance over tuning values</span>
<span class='fu'><a href='https://rdrr.io/r/base/summary.html'>summary</a></span>(<span class='no'>age_model</span>)
<span class='co'># Get available performance metrics</span>
<span class='fu'><a href='evaluate.html'>evaluate</a></span>(<span class='no'>age_model</span>)
<span class='co'># Plot training performance on tuning metric (default = RMSE)</span>
<span class='fu'><a href='https://rdrr.io/r/graphics/plot.html'>plot</a></span>(<span class='no'>age_model</span>)
<span class='co'># If new data isn't specifed, get predictions on training data</span>
<span class='fu'><a href='https://rdrr.io/r/stats/predict.html'>predict</a></span>(<span class='no'>age_model</span>)
<span class='co'>### Faster model training without tuning hyperparameters ###</span>
<span class='co'># Train models at set hyperparameter values by setting tune to FALSE. This is</span>
<span class='co'># faster (especially on larger datasets), but produces models with less</span>
<span class='co'># predictive power.</span>
<span class='fu'>machine_learn</span>(<span class='no'>d</span>$<span class='no'>train</span>, <span class='no'>patient_id</span>, <span class='kw'>outcome</span> <span class='kw'>=</span> <span class='no'>diabetes</span>, <span class='kw'>tune</span> <span class='kw'>=</span> <span class='fl'>FALSE</span>)
<span class='co'>### Train models optimizing given metric ###</span>
<span class='fu'>machine_learn</span>(<span class='no'>d</span>$<span class='no'>train</span>, <span class='no'>patient_id</span>, <span class='kw'>outcome</span> <span class='kw'>=</span> <span class='no'>diabetes</span>, <span class='kw'>metric</span> <span class='kw'>=</span> <span class='st'>"PR"</span>)
}</div></pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
<h2>Contents</h2>
<ul class="nav nav-pills nav-stacked">
<li><a href="#arguments">Arguments</a></li>
<li><a href="#value">Value</a></li>
<li><a href="#details">Details</a></li>
<li><a href="#examples">Examples</a></li>
</ul>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by Levi Thatcher, Michael Levy, Mike Mastanduno, Taylor Larsen, Taylor Miller, Rex Sumsion.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.4.1.</p>
</div>
</footer>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/docsearch.js/2.6.1/docsearch.min.js" integrity="sha256-GKvGqXDznoRYHCwKXGnuchvKSwmx9SRMrZOTh2g4Sb0=" crossorigin="anonymous"></script>
<script>
docsearch({
apiKey: 'ac39465bc37cbef616f5de1e646b6037',
indexName: 'healthcareai',
inputSelector: 'input#search-input.form-control',
transformData: function(hits) {
return hits.map(function (hit) {
hit.url = updateHitURL(hit);
return hit;
});
}
});
</script>
</body>
</html>