JuliaData · mrchaos · Jun 22, 2021 · Jun 22, 2021 · Jun 22, 2021 · Jul 12, 2021
diff --git a/Project.toml b/Project.toml
@@ -33,7 +33,7 @@ OnlineStats = "1"
 OnlineStatsBase = "1"
 PooledArrays = "0.5.2,1"
 RecipesBase = "0.7,1"
-StatsBase = "0.32,0.33"
+StatsBase = "0.32,0.33,0.33.8"
 TextParse = "0.9.1,1"
 WeakRefStrings = "0.6"
 julia = "1"

diff --git a/docs/src/ml.md b/docs/src/ml.md
@@ -10,7 +10,7 @@ using JuliaDB
 download("https://raw.githubusercontent.com/agconti/"*
           "kaggle-titanic/master/data/train.csv", "train.csv")
 
-train_table = loadtable("train.csv", escapechar='"')
+train_table = dropmissing(loadtable("train.csv", escapechar='"'))
 select(train_table, Not((:Name, :Ticket, :Cabin))) # hide
 ```
 
@@ -42,9 +42,6 @@ You may note that `Survived` column contains only 1s and 0s to denote whether a
 sch = ML.schema(train_table, hints=Dict(
         :Pclass => ML.Categorical,
         :Survived => ML.Categorical,
-        :Parch => nothing,
-        :SibSp => nothing,
-        :Fare => nothing,
         )
 )
 ```
@@ -62,11 +59,11 @@ input_sch, output_sch = ML.splitschema(sch, :Survived)
 Once the schema has been created, you can extract the feature matrix according to the given schema using `ML.featuremat`:
 
 ```@example titanic
-train_input = ML.featuremat(input_sch, train_table)
+train_input = ML.featuremat(input_sch, collect(train_table))
 ```
 
 ```@example titanic
-train_output = ML.featuremat(output_sch, train_table)
+train_output = ML.featuremat(output_sch, collect(train_table))
 ```
 
 ## Learning
@@ -78,22 +75,23 @@ Let us create a simple neural network to learn whether a passenger will survive
 ```@example titanic
 using Flux
 
+data = [(train_input, train_output)]
+
 model = Chain(
   Dense(ML.width(input_sch), 32, relu),
   Dense(32, ML.width(output_sch)),
   softmax)
 
 loss(x, y) = Flux.mse(model(x), y)
-opt = Flux.ADAM(Flux.params(model))
+opt = Flux.ADAM()
 evalcb = Flux.throttle(() -> @show(loss(first(data)...)), 2);
 ```
 
 Train the data in 10 iterations
 
 ```@example titanic
-data = [(train_input, train_output)]
 for i = 1:10
-  Flux.train!(loss, data, opt, cb = evalcb)
+  Flux.train!(loss, Flux.params(model),data, opt, cb = evalcb)
 end
 ```
 
@@ -108,9 +106,9 @@ Now let's load some testing data to use the model we learned to predict survival
 download("https://raw.githubusercontent.com/agconti/"*
           "kaggle-titanic/master/data/test.csv", "test.csv")
 
-test_table = loadtable("test.csv", escapechar='"')
+test_table = dropmissing(loadtable("test.csv", escapechar='"'))
 
-test_input = ML.featuremat(input_sch, test_table) ;
+test_input = ML.featuremat(input_sch, collect(test_table)) ;
 ```
 
 Run the model on one observation:

diff --git a/src/ml.jl b/src/ml.jl
@@ -153,8 +153,8 @@ function featuremat!(A, schemas::Schema, t::Dataset)
 end
 
 splitschema(xs::Schema, ks...) =
-    filter((k,v) -> k ∉ ks, xs),
-    filter((k,v) -> k ∈ ks, xs)
+    filter(k -> k.first ∉ ks, xs),
+    filter(k -> k.first ∈ ks, xs)
 
 function featuremat(sch, xs)
     featuremat!(zeros(Float32, length(xs), width(sch)), sch, xs)'