diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
index b8631f521..42c58cd2f 100644
--- a/CONTRIBUTE.md
+++ b/CONTRIBUTE.md
@@ -93,11 +93,11 @@ Julia Computing and New Zealand eScience Infrastructure.
**Julia language consultants.** Mike Innes, Avik Sengupta
-**Other contributors, past and present.** Diego Arenas, Edoardo Barp,
-Gergö Bohner, Michael K. Borregaard, Valentin Churavy, Harvey
-Devereux, Mosè Giordano, Thibaut Lienart, Mohammed Nook, Annika
-Stechemesser, Ayush Shridar, Yiannis Simillides
-
+**Other contributors, past and present.** Dilum Aluthge, Diego
+ Arenas, Edoardo Barp, Gergö Bohner, Michael K. Borregaard,
+ Valentin Churavy, Harvey Devereux, Mosè Giordano, Thibaut Lienart,
+ Mohammed Nook, Piotr Oleśkiewicz, Julian Samaroo, Ayush Shridar,
+ Yiannis Simillides, Annika Stechemesser
diff --git a/Project.toml b/Project.toml
index e76d0f323..f835d6840 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,15 +26,18 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
[compat]
-MLJBase = "0.2.6"
+CSV = "0.5"
+MLJBase = "0.2.6, 0.3"
MLJModels = "0.2.0"
julia = "1"
[extras]
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
[targets]
-test = ["DecisionTree", "RDatasets", "Test", "UnicodePlots"]
+test = ["CSV", "DecisionTree", "MLJBase", "RDatasets", "Test", "UnicodePlots"]
diff --git a/README.md b/README.md
index d8553f39b..59ba9c370 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,7 @@ A pure Julia machine learning framework.
## `join!(MLJ, YourModel)`
-**Call for help.** MLJ is [getting
-attention](https://github.com/trending/julia?since=monthly) but its
-small project team needs help to ensure its success. This depends
+**Call for help.** MLJ needs your help to ensure its success. This depends
crucially on:
- Existing and developing ML algorithms implementing the MLJ model interface
diff --git a/examples/JuliaCon2019/Manifest.toml b/examples/JuliaCon2019/Manifest.toml
new file mode 100644
index 000000000..24bbf9675
--- /dev/null
+++ b/examples/JuliaCon2019/Manifest.toml
@@ -0,0 +1,415 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Arpack]]
+deps = ["BinaryProvider", "Libdl", "LinearAlgebra"]
+git-tree-sha1 = "07a2c077bdd4b6d23a40342a8a108e2ee5e58ab6"
+uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
+version = "0.3.1"
+
+[[Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
+[[BinDeps]]
+deps = ["Compat", "Libdl", "SHA", "URIParser"]
+git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9"
+uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
+version = "0.8.10"
+
+[[BinaryProvider]]
+deps = ["Libdl", "Logging", "SHA"]
+git-tree-sha1 = "c7361ce8a2129f20b0e05a89f7070820cfed6648"
+uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
+version = "0.5.6"
+
+[[CSV]]
+deps = ["CategoricalArrays", "DataFrames", "Dates", "Mmap", "Parsers", "PooledArrays", "Profile", "Tables", "Unicode", "WeakRefStrings"]
+git-tree-sha1 = "a7df9250dff3aba96436580dd6ac00d712364cab"
+uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+version = "0.5.9"
+
+[[CategoricalArrays]]
+deps = ["Compat", "Future", "Missings", "Printf", "Reexport", "Requires"]
+git-tree-sha1 = "94d16e77dfacc59f6d6c1361866906dbb65b6f6b"
+uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+version = "0.5.2"
+
+[[CodecZlib]]
+deps = ["BinaryProvider", "Libdl", "Test", "TranscodingStreams"]
+git-tree-sha1 = "36bbf5374c661054d41410dc53ff752972583b9b"
+uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
+version = "0.5.2"
+
+[[ColorTypes]]
+deps = ["FixedPointNumbers", "Random"]
+git-tree-sha1 = "10050a24b09e8e41b951e9976b109871ce98d965"
+uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
+version = "0.8.0"
+
+[[Compat]]
+deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
+git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "2.1.0"
+
+[[DataFrames]]
+deps = ["CategoricalArrays", "Compat", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "StatsBase", "TableTraits", "Tables", "Unicode"]
+git-tree-sha1 = "7c0f86a01be0f77cc7f3f9096ed875f1217487e1"
+uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+version = "0.18.4"
+
+[[DataStructures]]
+deps = ["InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "0809951a1774dc724da22d26e4289bbaab77809a"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.17.0"
+
+[[DataValueInterfaces]]
+git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
+uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
+version = "1.0.0"
+
+[[Dates]]
+deps = ["Printf"]
+uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
+
+[[DecisionTree]]
+deps = ["DelimitedFiles", "Distributed", "LinearAlgebra", "Random", "ScikitLearnBase", "Statistics", "Test"]
+git-tree-sha1 = "594057e7171467e2983ab49739a3019ce2eae67f"
+uuid = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+version = "0.8.3"
+
+[[DelimitedFiles]]
+deps = ["Mmap"]
+uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+
+[[Distances]]
+deps = ["LinearAlgebra", "Printf", "Random", "Statistics", "Test"]
+git-tree-sha1 = "a135c7c062023051953141da8437ed74f89d767a"
+uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+version = "0.8.0"
+
+[[Distributed]]
+deps = ["Random", "Serialization", "Sockets"]
+uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[[Distributions]]
+deps = ["LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
+git-tree-sha1 = "56a158bc0abe4af5d4027af2275fde484261ca6d"
+uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
+version = "0.19.2"
+
+[[EzXML]]
+deps = ["BinaryProvider", "Libdl", "Printf"]
+git-tree-sha1 = "724e13b7522563a18ae4a5cc4a9792ae3b0da3e6"
+uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
+version = "0.9.3"
+
+[[FileIO]]
+deps = ["Pkg"]
+git-tree-sha1 = "351f001a78aa1b7ad2696e386e110b5abd071c71"
+uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
+version = "1.0.7"
+
+[[FixedPointNumbers]]
+git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b"
+uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
+version = "0.6.1"
+
+[[Future]]
+deps = ["Random"]
+uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+
+[[HTTP]]
+deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"]
+git-tree-sha1 = "03ddc88af7f2d963fac5aa9f3ac8e11914d68a78"
+uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
+version = "0.8.4"
+
+[[IniFile]]
+deps = ["Test"]
+git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
+uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
+version = "0.5.0"
+
+[[InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[IteratorInterfaceExtensions]]
+git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
+uuid = "82899510-4779-5014-852e-03e436cf321d"
+version = "1.0.0"
+
+[[LIBLINEAR]]
+deps = ["DelimitedFiles", "Libdl", "SparseArrays", "Test"]
+git-tree-sha1 = "42cacc29d9b4ae77b6702c181bbfa58f14d8ef7a"
+uuid = "2d691ee1-e668-5016-a719-b2531b85e0f5"
+version = "0.5.1"
+
+[[LIBSVM]]
+deps = ["Compat", "DelimitedFiles", "LIBLINEAR", "Libdl", "ScikitLearnBase", "SparseArrays", "Test"]
+git-tree-sha1 = "f17068e3f13a83da68c05f36c47a696b22129cff"
+uuid = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
+version = "0.3.1"
+
+[[LibGit2]]
+uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
+
+[[Libdl]]
+uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[[LinearAlgebra]]
+deps = ["Libdl"]
+uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[MLJ]]
+deps = ["CSV", "CategoricalArrays", "DataFrames", "Dates", "Distributed", "Distributions", "InteractiveUtils", "LinearAlgebra", "MLJBase", "MLJModels", "Pkg", "ProgressMeter", "Random", "RecipesBase", "RemoteFiles", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "8b5de48dda61ba6f1d7e0c22c38ade273155cafa"
+repo-rev = "master"
+repo-url = "https://github.com/alan-turing-institute/MLJ.jl.git"
+uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+version = "0.2.5"
+
+[[MLJBase]]
+deps = ["CSV", "CategoricalArrays", "ColorTypes", "Distributions", "InteractiveUtils", "Random", "SparseArrays", "Statistics", "StatsBase", "Tables"]
+git-tree-sha1 = "fab2cfd5f28850133b686d55cdbddadf8dd3f5a9"
+uuid = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+version = "0.2.6"
+
+[[MLJModels]]
+deps = ["CategoricalArrays", "Distances", "Distributions", "LIBSVM", "LinearAlgebra", "MLJBase", "Pkg", "Random", "Requires"]
+git-tree-sha1 = "641995f074fa5a790301c56e57c888ab938c58fc"
+uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
+version = "0.2.5"
+
+[[Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[MbedTLS]]
+deps = ["BinaryProvider", "Dates", "Distributed", "Libdl", "Random", "Sockets", "Test"]
+git-tree-sha1 = "2d94286a9c2f52c63a16146bb86fd6cdfbf677c6"
+uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
+version = "0.6.8"
+
+[[Missings]]
+deps = ["SparseArrays", "Test"]
+git-tree-sha1 = "f0719736664b4358aa9ec173077d4285775f8007"
+uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
+version = "0.4.1"
+
+[[Mmap]]
+uuid = "a63ad114-7e13-5084-954f-fe012c677804"
+
+[[Mocking]]
+deps = ["Compat", "Dates"]
+git-tree-sha1 = "4bf69aaf823b119b034e091e16b18311aa191663"
+uuid = "78c3b35d-d492-501b-9361-3d52fe80e533"
+version = "0.5.7"
+
+[[MultivariateStats]]
+deps = ["Arpack", "LinearAlgebra", "Printf", "Random", "SparseArrays", "Statistics", "StatsBase", "Test"]
+git-tree-sha1 = "cf1c990020bc4a52ff34ba2ee058b7cb677141f2"
+uuid = "6f286f6a-111f-5878-ab1e-185364afe411"
+version = "0.6.0"
+
+[[OrderedCollections]]
+deps = ["Random", "Serialization", "Test"]
+git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.1.0"
+
+[[PDMats]]
+deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"]
+git-tree-sha1 = "8b68513175b2dc4023a564cb0e917ce90e74fd69"
+uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
+version = "0.9.7"
+
+[[Parsers]]
+deps = ["Dates", "Test"]
+git-tree-sha1 = "db2b35dedab3c0e46dc15996d170af07a5ab91c9"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "0.3.6"
+
+[[Pkg]]
+deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
+uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[[PooledArrays]]
+git-tree-sha1 = "6e8c38927cb6e9ae144f7277c753714861b27d14"
+uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
+version = "0.5.2"
+
+[[Printf]]
+deps = ["Unicode"]
+uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+
+[[Profile]]
+deps = ["Printf"]
+uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+
+[[ProgressMeter]]
+deps = ["Distributed", "Printf"]
+git-tree-sha1 = "0f08e0e74e5b160ca20d3962a2620038b75881c7"
+uuid = "92933f4c-e287-5a05-a399-4b506db050ca"
+version = "1.0.0"
+
+[[QuadGK]]
+deps = ["DataStructures", "LinearAlgebra", "Test"]
+git-tree-sha1 = "3ce467a8e76c6030d4c3786e7d3a73442017cdc0"
+uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
+version = "2.0.3"
+
+[[RData]]
+deps = ["CategoricalArrays", "CodecZlib", "DataFrames", "Dates", "FileIO", "TimeZones"]
+git-tree-sha1 = "3b0fc2f7df61b8890502851281c1eb0d2407d6ac"
+uuid = "df47a6cb-8c03-5eed-afd8-b6050d6c41da"
+version = "0.6.2"
+
+[[RDatasets]]
+deps = ["CSV", "CodecZlib", "DataFrames", "FileIO", "Printf", "RData", "Reexport"]
+git-tree-sha1 = "f701bd7dc55cba37dd81a7053c20aadfde425ad0"
+uuid = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
+version = "0.6.2"
+
+[[REPL]]
+deps = ["InteractiveUtils", "Markdown", "Sockets"]
+uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[RecipesBase]]
+git-tree-sha1 = "7bdce29bc9b2f5660a6e5e64d64d91ec941f6aa2"
+uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
+version = "0.7.0"
+
+[[Reexport]]
+deps = ["Pkg"]
+git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
+uuid = "189a3867-3050-52da-a836-e630ba90ab69"
+version = "0.2.0"
+
+[[RemoteFiles]]
+deps = ["Dates", "HTTP", "Test"]
+git-tree-sha1 = "0bf57958308f3e3a6dcce1d34ec8225d6bc247ea"
+uuid = "cbe49d4c-5af1-5b60-bb70-0a60aa018e1b"
+version = "0.2.1"
+
+[[Requires]]
+deps = ["Test"]
+git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1"
+uuid = "ae029012-a4dd-5104-9daa-d747884805df"
+version = "0.5.2"
+
+[[Rmath]]
+deps = ["BinaryProvider", "Libdl", "Random", "Statistics", "Test"]
+git-tree-sha1 = "9a6c758cdf73036c3239b0afbea790def1dabff9"
+uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
+version = "0.5.0"
+
+[[SHA]]
+uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+
+[[ScikitLearnBase]]
+deps = ["LinearAlgebra", "Random", "Statistics"]
+git-tree-sha1 = "7877e55c1523a4b336b433da39c8e8c08d2f221f"
+uuid = "6e75b9c4-186b-50bd-896f-2d2496a4843e"
+version = "0.5.0"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[[SharedArrays]]
+deps = ["Distributed", "Mmap", "Random", "Serialization"]
+uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
+
+[[Sockets]]
+uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
+
+[[SortingAlgorithms]]
+deps = ["DataStructures", "Random", "Test"]
+git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
+uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
+version = "0.3.1"
+
+[[SparseArrays]]
+deps = ["LinearAlgebra", "Random"]
+uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[[SpecialFunctions]]
+deps = ["BinDeps", "BinaryProvider", "Libdl", "Test"]
+git-tree-sha1 = "0b45dc2e45ed77f445617b99ff2adf0f5b0f23ea"
+uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
+version = "0.7.2"
+
+[[Statistics]]
+deps = ["LinearAlgebra", "SparseArrays"]
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+
+[[StatsBase]]
+deps = ["DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
+git-tree-sha1 = "2b6ca97be7ddfad5d9f16a13fe277d29f3d11c23"
+uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+version = "0.31.0"
+
+[[StatsFuns]]
+deps = ["Rmath", "SpecialFunctions", "Test"]
+git-tree-sha1 = "b3a4e86aa13c732b8a8c0ba0c3d3264f55e6bb3e"
+uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
+version = "0.8.0"
+
+[[SuiteSparse]]
+deps = ["Libdl", "LinearAlgebra", "SparseArrays"]
+uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
+
+[[TableTraits]]
+deps = ["IteratorInterfaceExtensions"]
+git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
+uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
+version = "1.0.0"
+
+[[Tables]]
+deps = ["DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "Requires", "TableTraits", "Test"]
+git-tree-sha1 = "2e5d1a0d9b574ee2ed0c1a2fe32807de022376dd"
+uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+version = "0.2.9"
+
+[[Test]]
+deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[[TimeZones]]
+deps = ["Dates", "EzXML", "Mocking", "Printf", "Serialization", "Unicode"]
+git-tree-sha1 = "859bfc1832ea52e413c96fa5c92130516db62bdb"
+uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53"
+version = "0.9.1"
+
+[[TranscodingStreams]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "a25d8e5a28c3b1b06d3859f30757d43106791919"
+uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
+version = "0.9.4"
+
+[[URIParser]]
+deps = ["Test", "Unicode"]
+git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69"
+uuid = "30578b45-9adc-5946-b283-645ec420af67"
+version = "0.4.0"
+
+[[UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
+[[Unicode]]
+uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+
+[[WeakRefStrings]]
+deps = ["Random", "Test"]
+git-tree-sha1 = "9a0bb82eede528debe631b642eeb48a631a69bc2"
+uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
+version = "0.6.1"
diff --git a/examples/JuliaCon2019/Project.toml b/examples/JuliaCon2019/Project.toml
new file mode 100644
index 000000000..a3c9ab383
--- /dev/null
+++ b/examples/JuliaCon2019/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+LIBSVM = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b"
+MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
+MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
+RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
diff --git a/examples/JuliaCon2019/demo.ipynb b/examples/JuliaCon2019/demo.ipynb
new file mode 100644
index 000000000..dd9b51d75
--- /dev/null
+++ b/examples/JuliaCon2019/demo.ipynb
@@ -0,0 +1,1027 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m registry at `~/.julia/registries/General`\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m git-repo `https://github.com/JuliaRegistries/General.git`\n",
+ "\u001b[?25l\u001b[2K\u001b[?25h"
+ ]
+ }
+ ],
+ "source": [
+ "using Pkg\n",
+ "Pkg.activate(@__DIR__)\n",
+ "Pkg.instantiate()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Loading model metadata\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/MLJ.jl:114\n"
+ ]
+ }
+ ],
+ "source": [
+ "using MLJ"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Getting some data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
| SepalLength | SepalWidth | PetalLength | PetalWidth |
---|
| Float64 | Float64 | Float64 | Float64 |
---|
4 rows × 4 columns
1 | 7.2 | 3.2 | 6.0 | 1.8 |
---|
2 | 5.0 | 3.5 | 1.3 | 0.3 |
---|
3 | 5.0 | 3.5 | 1.6 | 0.6 |
---|
4 | 5.7 | 2.9 | 4.2 | 1.3 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cccc}\n",
+ "\t& SepalLength & SepalWidth & PetalLength & PetalWidth\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64 & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 7.2 & 3.2 & 6.0 & 1.8 \\\\\n",
+ "\t2 & 5.0 & 3.5 & 1.3 & 0.3 \\\\\n",
+ "\t3 & 5.0 & 3.5 & 1.6 & 0.6 \\\\\n",
+ "\t4 & 5.7 & 2.9 & 4.2 & 1.3 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "4×4 DataFrame\n",
+ "│ Row │ SepalLength │ SepalWidth │ PetalLength │ PetalWidth │\n",
+ "│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼─────────────┼────────────┼─────────────┼────────────┤\n",
+ "│ 1 │ 7.2 │ 3.2 │ 6.0 │ 1.8 │\n",
+ "│ 2 │ 5.0 │ 3.5 │ 1.3 │ 0.3 │\n",
+ "│ 3 │ 5.0 │ 3.5 │ 1.6 │ 0.6 │\n",
+ "│ 4 │ 5.7 │ 2.9 │ 4.2 │ 1.3 │"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "using RDatasets\n",
+ "iris = dataset(\"datasets\", \"iris\"); # a DataFrame\n",
+ "scrambled = shuffle(1:size(iris, 1))\n",
+ "X = iris[scrambled, 1:4];\n",
+ "y = iris[scrambled, 5];\n",
+ "\n",
+ "first(X, 4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5-element CategoricalArray{String,1,UInt8}:\n",
+ " \"virginica\" \n",
+ " \"setosa\" \n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"setosa\" "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y[1:5]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Basic fit and predict:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "import MLJModels ✔\n",
+ "import LIBSVM ✔\n",
+ "import MLJModels.LIBSVM_.SVC ✔\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Training \u001b[34mMachine{SVC} @ 1…12\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*\n",
+ "optimization finished, #iter = 33\n",
+ "nu = 0.038907\n",
+ "obj = -1.945147, rho = -0.167869\n",
+ "nSV = 10, nBSV = 0\n",
+ "*\n",
+ "optimization finished, #iter = 48\n",
+ "nu = 0.293514\n",
+ "obj = -21.377494, rho = -0.144367\n",
+ "nSV = 33, nBSV = 26\n",
+ "*\n",
+ "optimization finished, #iter = 35\n",
+ "nu = 0.046521\n",
+ "obj = -2.403410, rho = 0.039522\n",
+ "nSV = 10, nBSV = 2\n",
+ "Total nSV = 44\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "150-element Array{CategoricalString{UInt8},1}:\n",
+ " \"virginica\" \n",
+ " \"setosa\" \n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " ⋮ \n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"versicolor\""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "@load SVC()\n",
+ "classifier_ = SVC()\n",
+ "classifier = machine(classifier_, X, y)\n",
+ "fit!(classifier)\n",
+ "ŷ = predict(classifier, X) # or some Xnew"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Evaluating the model:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Evaluating using a holdout set. \n",
+ "│ fraction_train=0.8 \n",
+ "│ shuffle=false \n",
+ "│ measure=MLJ.misclassification_rate \n",
+ "│ operation=StatsBase.predict \n",
+ "│ Resampling from all rows. \n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/resampling.jl:100\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "import MLJModels ✔\n",
+ "import MultivariateStats ✔\n",
+ "import MLJModels.MultivariateStats_.PCA ✔\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Training \u001b[34mMachine{PCA} @ 1…98\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ " | x1 | x2 | x3 |
---|
| Float64 | Float64 | Float64 |
---|
3 rows × 3 columns
1 | -2.61409 | 0.560901 | 0.205535 |
---|
2 | 2.7701 | 0.263528 | -0.0772477 |
---|
3 | 2.40561 | 0.188871 | -0.263868 |
---|
"
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|ccc}\n",
+ "\t& x1 & x2 & x3\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & -2.61409 & 0.560901 & 0.205535 \\\\\n",
+ "\t2 & 2.7701 & 0.263528 & -0.0772477 \\\\\n",
+ "\t3 & 2.40561 & 0.188871 & -0.263868 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "3×3 DataFrame\n",
+ "│ Row │ x1 │ x2 │ x3 │\n",
+ "│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
+ "├─────┼──────────┼──────────┼────────────┤\n",
+ "│ 1 │ -2.61409 │ 0.560901 │ 0.205535 │\n",
+ "│ 2 │ 2.7701 │ 0.263528 │ -0.0772477 │\n",
+ "│ 3 │ 2.40561 │ 0.188871 │ -0.263868 │"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "evaluate!(classifier,\n",
+ " resampling=Holdout(fraction_train=0.8),\n",
+ " measure=misclassification_rate)\n",
+ "# ## Adding dimension reduction:\n",
+ "@load PCA\n",
+ "dim_reducer_ = PCA()\n",
+ "dim_reducer = machine(dim_reducer_, X)\n",
+ "fit!(dim_reducer)\n",
+ "Xsmall = transform(dim_reducer, X);\n",
+ "\n",
+ "first(Xsmall, 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*\n",
+ "optimization finished, #iter = 23\n",
+ "nu = 0.038664\n",
+ "obj = -1.933164, rho = -0.165650\n",
+ "nSV = 8, nBSV = 0\n",
+ "*\n",
+ "optimization finished, #iter = 38\n",
+ "nu = 0.293883\n",
+ "obj = -21.597810, rho = -0.082448\n",
+ "nSV = 34, nBSV = 26\n",
+ "*\n",
+ "optimization finished, #iter = 30\n",
+ "nu = 0.045664\n",
+ "obj = -2.380751, rho = 0.053250\n",
+ "nSV = 9, nBSV = 2\n",
+ "Total nSV = 45\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Training \u001b[34mMachine{SVC} @ 1…52\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "150-element Array{CategoricalString{UInt8},1}:\n",
+ " \"virginica\" \n",
+ " \"setosa\" \n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " ⋮ \n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"virginica\" \n",
+ " \"virginica\" \n",
+ " \"versicolor\"\n",
+ " \"setosa\" \n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"versicolor\"\n",
+ " \"versicolor\""
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "classifier = machine(classifier_, Xsmall, y)\n",
+ "fit!(classifier)\n",
+ "ŷ = predict(classifier, Xsmall)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Building a composite model:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Method 1: Compact syntax (but not generalizable):"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "(not implemented at time of talk)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# composite_ = @pipeline dim_reducer_ classifier_\n",
+ "\n",
+ "# composite = machine(composite_, X, y)\n",
+ "# evaluate!(composite, measure=misclassification_rate)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Method 2: Re-interpret unstreamlined code:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[34mNode @ 1…92\u001b[39m = predict(\u001b[0m\u001b[1m1…90\u001b[22m, transform(\u001b[0m\u001b[1m1…02\u001b[22m, \u001b[34m5…24\u001b[39m))"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Xraw = X;\n",
+ "yraw = y;\n",
+ "\n",
+ "X = source(Xraw)\n",
+ "y = source(yraw)\n",
+ "\n",
+ "dim_reducer = machine(dim_reducer_, X)\n",
+ "Xsmall = transform(dim_reducer, X)\n",
+ "\n",
+ "classifier = machine(classifier_, Xsmall, y)\n",
+ "ŷ = predict(classifier, Xsmall)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Training \u001b[34mNodalMachine{PCA} @ 1…02\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n",
+ "┌ Info: Training \u001b[34mNodalMachine{SVC} @ 1…90\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*\n",
+ "optimization finished, #iter = 23\n",
+ "nu = 0.038664\n",
+ "obj = -1.933164, rho = -0.165650\n",
+ "nSV = 8, nBSV = 0\n",
+ "*\n",
+ "optimization finished, #iter = 38\n",
+ "nu = 0.293883\n",
+ "obj = -21.597810, rho = -0.082448\n",
+ "nSV = 34, nBSV = 26\n",
+ "*\n",
+ "optimization finished, #iter = 30\n",
+ "nu = 0.045664\n",
+ "obj = -2.380751, rho = 0.053250\n",
+ "nSV = 9, nBSV = 2\n",
+ "Total nSV = 45\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[34mNode @ 1…92\u001b[39m = predict(\u001b[0m\u001b[1m1…90\u001b[22m, transform(\u001b[0m\u001b[1m1…02\u001b[22m, \u001b[34m5…24\u001b[39m))"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fit!(ŷ)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{CategoricalString{UInt8},1}:\n",
+ " \"setosa\" \n",
+ " \"versicolor\""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ŷ(rows=3:4)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Updating \u001b[34mNodalMachine{PCA} @ 1…02\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:152\n",
+ "┌ Info: Training \u001b[34mNodalMachine{SVC} @ 1…90\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:140\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*\n",
+ "optimization finished, #iter = 13\n",
+ "nu = 0.030533\n",
+ "obj = -1.526884, rho = -0.270704\n",
+ "nSV = 4, nBSV = 1\n",
+ "*\n",
+ "optimization finished, #iter = 20\n",
+ "nu = 0.355841\n",
+ "obj = -30.258034, rho = 0.019778\n",
+ "nSV = 36, nBSV = 34\n",
+ "*\n",
+ "optimization finished, #iter = 8\n",
+ "nu = 0.048815\n",
+ "obj = -2.645552, rho = 0.204566\n",
+ "nSV = 7, nBSV = 4\n",
+ "Total nSV = 44\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[34mNode @ 1…92\u001b[39m = predict(\u001b[0m\u001b[1m1…90\u001b[22m, transform(\u001b[0m\u001b[1m1…02\u001b[22m, \u001b[34m5…24\u001b[39m))"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dim_reducer_.ncomp = 1 # maximum output dimension\n",
+ "fit!(ŷ)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{CategoricalString{UInt8},1}:\n",
+ " \"setosa\" \n",
+ " \"versicolor\""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ŷ(rows=3:4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " Changing classifier hyperparameter does not retrigger retraining of\n",
+ " upstream dimension reducer:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*\n",
+ "optimization finished, #iter = 13\n",
+ "nu = 0.033696\n",
+ "obj = -1.838789, rho = -0.128178\n",
+ "nSV = 5, nBSV = 2\n",
+ "*\n",
+ "optimization finished, #iter = 24\n",
+ "nu = 0.429648\n",
+ "obj = -35.588638, rho = -0.040530\n",
+ "nSV = 44, nBSV = 42\n",
+ "*\n",
+ "optimization finished, #iter = 5\n",
+ "nu = 0.080000\n",
+ "obj = -4.676483, rho = -0.106043\n",
+ "nSV = 8, nBSV = 8\n",
+ "Total nSV = 53\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Not retraining \u001b[34mNodalMachine{PCA} @ 1…02\u001b[39m.\n",
+ "│ It appears up-to-date. Use force=true to force retraining.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:146\n",
+ "┌ Info: Updating \u001b[34mNodalMachine{SVC} @ 1…90\u001b[39m.\n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/machines.jl:152\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[34mNode @ 1…92\u001b[39m = predict(\u001b[0m\u001b[1m1…90\u001b[22m, transform(\u001b[0m\u001b[1m1…02\u001b[22m, \u001b[34m5…24\u001b[39m))"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "classifier_.gamma = 0.1\n",
+ "fit!(ŷ)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{CategoricalString{UInt8},1}:\n",
+ " \"setosa\" \n",
+ " \"versicolor\""
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ŷ(rows=3:4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Predicting on new data (`Xraw` in `source(Xraw)` is substituted for `Xnew`):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2-element Array{CategoricalString{UInt8},1}:\n",
+ " \"setosa\"\n",
+ " \"setosa\""
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Xnew = (SepalLength = [4.0, 5.2],\n",
+ " SepalWidth = [3.2, 3.0],\n",
+ " PetalLength = [1.2, 1.5],\n",
+ " PetalWidth = [0.1, 0.4],)\n",
+ "ŷ(Xnew)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Exporting network as stand-alone reusable model:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(pca = (ncomp = 1,\n",
+ " method = :auto,\n",
+ " pratio = 0.99,\n",
+ " mean = nothing,),\n",
+ " svc = (kernel = RadialBasis::KERNEL = 2,\n",
+ " gamma = 0.1,\n",
+ " weights = nothing,\n",
+ " cost = 1.0,\n",
+ " degree = 3,\n",
+ " coef0 = 0.0,\n",
+ " tolerance = 0.001,\n",
+ " shrinking = true,\n",
+ " probability = false,),)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "composite_ = @from_network Composite(pca=dim_reducer_, svc=classifier_) <= (X, y, ŷ)\n",
+ "params(composite_)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Evaluating using cross-validation. \n",
+ "│ nfolds=6. \n",
+ "│ shuffle=false \n",
+ "│ measure=MLJ.misclassification_rate \n",
+ "│ operation=StatsBase.predict \n",
+ "│ Resampling from all rows. \n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/resampling.jl:151\n",
+ "\u001b[33mCross-validating: 100%[=========================] Time: 0:00:02\u001b[39m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "6-element Array{Float64,1}:\n",
+ " 0.08\n",
+ " 0.08\n",
+ " 0.0 \n",
+ " 0.12\n",
+ " 0.08\n",
+ " 0.04"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "composite = machine(composite_, Xraw, yraw)\n",
+ "evaluate!(composite, measure=misclassification_rate)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Evaluating a \"self-tuning\" random forest (nested resampling):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Dict{Any,Any} with 6 entries:\n",
+ " \"MultivariateStats\" => Any[\"RidgeRegressor\"]\n",
+ " \"MLJ\" => Any[\"MLJ.Constant.DeterministicConstantRegressor\", \"ML…\n",
+ " \"DecisionTree\" => Any[\"DecisionTreeRegressor\"]\n",
+ " \"ScikitLearn\" => Any[\"SVMLRegressor\", \"ElasticNet\", \"ElasticNetCV\", \"SV…\n",
+ " \"LIBSVM\" => Any[\"EpsilonSVR\", \"NuSVR\"]\n",
+ " \"XGBoost\" => Any[\"XGBoostRegressor\"]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "task = load_boston()\n",
+ "models(task)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Evaluating a single tree:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "import MLJModels ✔\n",
+ "import DecisionTree ✔\n",
+ "import MLJModels.DecisionTree_.DecisionTreeRegressor ✔\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Evaluating using a holdout set. \n",
+ "│ fraction_train=0.7 \n",
+ "│ shuffle=false \n",
+ "│ measure=Function[rms, mav] \n",
+ "│ operation=StatsBase.predict \n",
+ "│ Resampling from all rows. \n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/resampling.jl:100\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(MLJ.rms = 8.795939100833767,\n",
+ " MLJ.mav = 5.785953164160401,)"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "@load DecisionTreeRegressor # load code\n",
+ "\n",
+ "tree_ = DecisionTreeRegressor(n_subfeatures=3)\n",
+ "tree = machine(tree_, task)\n",
+ "evaluate!(tree,\n",
+ " resampling=Holdout(fraction_train=0.7),\n",
+ " measure=[rms, mav])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Use ensembling wrapper to create a random forest:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MLJ.DeterministicEnsembleModel(atom = \u001b[34mDecisionTreeRegressor @ 7…75\u001b[39m,\n",
+ " weights = Float64[],\n",
+ " bagging_fraction = 0.8,\n",
+ " rng = MersenneTwister(UInt32[0x08804db9, 0xfc38831f, 0xd5683001, 0x444075ec]),\n",
+ " n = 10,\n",
+ " parallel = true,\n",
+ " out_of_bag_measure = Any[],)\u001b[34m @ 9…74\u001b[39m"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "forest_ = EnsembleModel(atom=tree_, n=10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Wrapping in a tuning strategy creates a \"self_tuning\" random forest:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MLJ.DeterministicTunedModel(model = \u001b[34mDeterministicEnsembleModel{DecisionTreeRegressor} @ 9…74\u001b[39m,\n",
+ " tuning = \u001b[34mGrid @ 2…87\u001b[39m,\n",
+ " resampling = \u001b[34mCV @ 1…01\u001b[39m,\n",
+ " measure = MLJ.rms,\n",
+ " operation = StatsBase.predict,\n",
+ " ranges = MLJ.NumericRange{T,Symbol} where T[\u001b[34mNumericRange @ 1…81\u001b[39m, \u001b[34mNumericRange @ 1…80\u001b[39m],\n",
+ " minimize = true,\n",
+ " full_report = true,\n",
+ " train_best = true,)\u001b[34m @ 6…25\u001b[39m"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r1 = range(forest_, :bagging_fraction, lower=0.4, upper=1.0);\n",
+ "r2 = range(forest_, :(atom.n_subfeatures), lower=1, upper=12)\n",
+ "\n",
+ "self_tuning_forest_ = TunedModel(model=forest_,\n",
+ " tuning=Grid(),\n",
+ " resampling=CV(),\n",
+ " ranges=[r1,r2],\n",
+ " measure=rms)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Evaluate the self_tuning_forest (nested resampling):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Info: Evaluating using cross-validation. \n",
+ "│ nfolds=6. \n",
+ "│ shuffle=false \n",
+ "│ measure=Function[rms, rmslp1] \n",
+ "│ operation=StatsBase.predict \n",
+ "│ Resampling from all rows. \n",
+ "└ @ MLJ /Users/anthony/.julia/packages/MLJ/tod7z/src/resampling.jl:151\n",
+ "\u001b[33mCross-validating: 100%[=========================] Time: 0:00:18\u001b[39m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(MLJ.rms = [2.91827, 3.40544, 4.60971, 4.54709, 8.12081, 3.79819],\n",
+ " MLJ.rmslp1 = [0.148546, 0.119118, 0.148812, 0.134863, 0.345141, 0.221093],)"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "self_tuning_forest = machine(self_tuning_forest_, task)\n",
+ "\n",
+ "evaluate!(self_tuning_forest,\n",
+ " resampling=CV(),\n",
+ " measure=[rms,rmslp1])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.1.0",
+ "language": "julia",
+ "name": "julia-1.1"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.1.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 3
+}
diff --git a/examples/JuliaCon2019/demo.jl b/examples/JuliaCon2019/demo.jl
new file mode 100644
index 000000000..1e8e5857f
--- /dev/null
+++ b/examples/JuliaCon2019/demo.jl
@@ -0,0 +1,174 @@
+using Pkg
+Pkg.activate(@__DIR__)
+Pkg.instantiate()
+
+#-
+
+using MLJ
+
+
+# ## Getting some data:
+
+using RDatasets
+iris = dataset("datasets", "iris"); # a DataFrame
+scrambled = shuffle(1:size(iris, 1))
+X = iris[scrambled, 1:4];
+y = iris[scrambled, 5];
+
+first(X, 4)
+
+#-
+
+y[1:5]
+
+
+# ## Basic fit and predict:
+
+@load SVC()
+classifier_ = SVC()
+classifier = machine(classifier_, X, y)
+fit!(classifier)
+ŷ = predict(classifier, X) # or some Xnew
+
+#-
+
+# ## Evaluating the model:
+evaluate!(classifier,
+ resampling=Holdout(fraction_train=0.8),
+ measure=misclassification_rate)
+
+
+# ## Adding dimension reduction:
+
+@load PCA
+dim_reducer_ = PCA()
+dim_reducer = machine(dim_reducer_, X)
+fit!(dim_reducer)
+Xsmall = transform(dim_reducer, X);
+
+first(Xsmall, 3)
+
+#-
+
+classifier = machine(classifier_, Xsmall, y)
+fit!(classifier)
+ŷ = predict(classifier, Xsmall)
+
+
+# ## Building a composite model:
+
+# ### Method 1: Compact syntax (but not generalizable):
+
+# (not implemented at time of talk)
+
+## composite_ = @pipeline dim_reducer_ classifier_
+
+## composite = machine(composite_, X, y)
+## evaluate!(composite, measure=misclassification_rate)
+
+
+# ### Method 2: Re-interpret unstreamlined code:
+
+Xraw = X;
+yraw = y;
+
+X = source(Xraw)
+y = source(yraw)
+
+dim_reducer = machine(dim_reducer_, X)
+Xsmall = transform(dim_reducer, X)
+
+classifier = machine(classifier_, Xsmall, y)
+ŷ = predict(classifier, Xsmall)
+
+#-
+
+fit!(ŷ)
+
+#-
+
+ŷ(rows=3:4)
+
+#-
+
+dim_reducer_.ncomp = 1 # maximum output dimension
+fit!(ŷ)
+
+#-
+
+ŷ(rows=3:4)
+
+# Changing classifier hyperparameter does not retrigger retraining of
+# upstream dimension reducer:
+
+classifier_.gamma = 0.1
+fit!(ŷ)
+
+#-
+
+ŷ(rows=3:4)
+
+# Predicting on new data (`Xraw` in `source(Xraw)` is substituted for `Xnew`):
+
+Xnew = (SepalLength = [4.0, 5.2],
+ SepalWidth = [3.2, 3.0],
+ PetalLength = [1.2, 1.5],
+ PetalWidth = [0.1, 0.4],)
+ŷ(Xnew)
+
+
+# #### Exporting network as stand-alone reusable model:
+
+composite_ = @from_network Composite(pca=dim_reducer_, svc=classifier_) <= (X, y, ŷ)
+params(composite_)
+
+#-
+
+composite = machine(composite_, Xraw, yraw)
+evaluate!(composite, measure=misclassification_rate)
+
+# ## Evaluating a "self-tuning" random forest (nested resampling):
+
+task = load_boston()
+models(task)
+
+#-
+
+# ### Evaluating a single tree:
+
+@load DecisionTreeRegressor # load code
+
+tree_ = DecisionTreeRegressor(n_subfeatures=3)
+tree = machine(tree_, task)
+evaluate!(tree,
+ resampling=Holdout(fraction_train=0.7),
+ measure=[rms, mav])
+
+# ### Use ensembling wrapper to create a random forest:
+
+forest_ = EnsembleModel(atom=tree_, n=10)
+
+
+# ### Wrapping in a tuning strategy creates a "self_tuning" random forest:
+
+r1 = range(forest_, :bagging_fraction, lower=0.4, upper=1.0);
+r2 = range(forest_, :(atom.n_subfeatures), lower=1, upper=12)
+
+self_tuning_forest_ = TunedModel(model=forest_,
+ tuning=Grid(),
+ resampling=CV(),
+ ranges=[r1,r2],
+ measure=rms)
+
+# ### Evaluate the self_tuning_forest (nested resampling):
+
+self_tuning_forest = machine(self_tuning_forest_, task)
+
+evaluate!(self_tuning_forest,
+ resampling=CV(),
+ measure=[rms,rmslp1])
+
+
+
+
+
diff --git a/examples/JuliaCon2019/talk.pdf b/examples/JuliaCon2019/talk.pdf
new file mode 100644
index 000000000..0770a43d9
Binary files /dev/null and b/examples/JuliaCon2019/talk.pdf differ
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 000000000..31cfca5cf
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,3 @@
+To ensure loading of correct package versions, be sure to run
+scripts/notebooks in the same directory containing the Project.toml
+files that accompany them in this repo.
diff --git a/src/tuning.jl b/src/tuning.jl
index a1eca5000..8c49e921f 100644
--- a/src/tuning.jl
+++ b/src/tuning.jl
@@ -92,6 +92,7 @@ function TunedModel(;model=nothing,
!isempty(ranges) || error("You need to specify ranges=... ")
model !== nothing || error("You need to specify model=... ")
+ model isa Supervised || error("model must be a SupervisedModel. ")
message = clean!(model)
isempty(message) || @info message
diff --git a/test/Transformers.jl b/test/Transformers.jl
index e75056ae0..6aba3f77a 100644
--- a/test/Transformers.jl
+++ b/test/Transformers.jl
@@ -1,7 +1,9 @@
module TestTransformer
# using Revise
-using MLJ, MLJBase
+using MLJ
+using MLJBase
+using CSV
using Test
using Statistics
using DataFrames
diff --git a/test/datasets.jl b/test/datasets.jl
index ad806a486..0a7ea56a2 100644
--- a/test/datasets.jl
+++ b/test/datasets.jl
@@ -2,6 +2,8 @@ module TestDatasets
# using Revise
using MLJ
+using MLJBase
+using CSV
load_ames()
load_boston()
diff --git a/test/ensembles.jl b/test/ensembles.jl
index bafe09aa6..ee2cfe8e5 100644
--- a/test/ensembles.jl
+++ b/test/ensembles.jl
@@ -10,7 +10,8 @@ module TestEnsembles
using Test
using Random
using MLJ
-import MLJBase
+using MLJBase
+using CSV
using CategoricalArrays
using Distributions
diff --git a/test/machines.jl b/test/machines.jl
index c50122c7c..ca09a0eab 100644
--- a/test/machines.jl
+++ b/test/machines.jl
@@ -2,7 +2,8 @@ module TestMachines
# using Revise
using MLJ
-import MLJBase
+using MLJBase
+using CSV
using Test
using Statistics
diff --git a/test/networks.jl b/test/networks.jl
index 854d97f76..bb8a6ba80 100644
--- a/test/networks.jl
+++ b/test/networks.jl
@@ -3,7 +3,8 @@ module TestLearningNetworks
# using Revise
using Test
using MLJ
-import MLJBase
+using MLJBase
+using CSV
using CategoricalArrays
# TRAINABLE MODELS
diff --git a/test/resampling.jl b/test/resampling.jl
index 4d154a0c8..981394faf 100644
--- a/test/resampling.jl
+++ b/test/resampling.jl
@@ -4,6 +4,7 @@ module TestResampling
using Test
using MLJ
using MLJBase
+using CSV
using DataFrames
x1 = ones(4)
diff --git a/test/runtests.jl b/test/runtests.jl
index a7f005e36..44cec0e71 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,8 @@
# eg, `module TestDatasets` for code testing `datasets.jl`.
using MLJ
+using MLJBase
+using CSV
using Test
@constant junk=KNNRegressor()
diff --git a/test/tuning.jl b/test/tuning.jl
index b3ed54799..cddac2c96 100644
--- a/test/tuning.jl
+++ b/test/tuning.jl
@@ -4,7 +4,8 @@ module TestTuning
using Test
using MLJ
# using UnicodePlots
-import MLJBase
+using MLJBase
+using CSV
x1 = rand(100);
x2 = rand(100);