# TSML Demo

## Let's add workers for parallel processing

In [56]:
using Distributed
nprocs()==1 && addprocs()
nworkers()

7

## Load TSML Modules and other Dependencies

In [99]:
using TSML, TSMLextra
@everywhere using Random
@everywhere using TSML, TSMLextra, Plots
@everywhere using TSML.TSMLTypes
@everywhere using TSML: TSClassifier
@everywhere using TSML.TSClassifiers.FileStats
@everywhere using TSML.TSMLTransformers
@everywhere using TSML.EnsembleMethods
@everywhere using TSML.DecisionTreeLearners
@everywhere using TSML.Utils
@everywhere using TSMLextra.CaretLearners
@everywhere using TSMLextra.SKLearners
@everywhere using DataFrames
@everywhere using Statistics
@everywhere using StatsBase: iqr

## Functions for feature extraction and prediction

In [58]:
@everywhere function getprediction(model::TSLearner,data::Dict)
  Random.seed!(126)
  trfeatures = data[:trfeatures]
  tstfeatures = data[:tstfeatures]
  troutput = data[:troutput]
  tstoutput = data[:tstoutput]
  fit!(model,trfeatures,troutput)
  trresults = transform!(model,tstfeatures)
  sum(trresults .== tstoutput)/length(tstoutput)
end


@everywhere function extract_features_from_timeseries(modeldirname,trdirname,tstdirname)
  frange = 5:20
  println("*** Extracting training features ***")
  trdata = getstats(trdirname)
  trfeatures = trdata[:,frange] |> Matrix
  troutput = trdata[:,:dtype] 
  println()
  println("*** Extracting testing features ***")
  tstdata = getstats(tstdirname)
  tstfeatures = tstdata[:,frange] |> Matrix
  tstoutput = tstdata[:,:dtype]

  data = Dict(:trfeatures => trfeatures,
              :tstfeatures => tstfeatures,
              :troutput => troutput,
              :tstoutput => tstoutput
             )
  return data
end

## Data processing and feature extraction

In [59]:
modeldir = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/model")
trainingdir = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/training")
testingdir = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/testing")

data = extract_features_from_timeseries(modeldir,trainingdir,testingdir)

*** Extracting training features ***
getting stats of AirOffTemp1.csv
getting stats of AirOffTemp2.csv
getting stats of AirOffTemp3.csv
getting stats of Energy1.csv
getting stats of Energy2.csv
getting stats of Energy3.csv
getting stats of Energy4.csv
getting stats of Energy6.csv
getting stats of Energy7.csv
getting stats of Energy8.csv
getting stats of Energy9.csv
getting stats of Pressure1.csv
getting stats of Pressure3.csv
getting stats of Pressure4.csv
getting stats of Pressure6.csv
getting stats of RetTemp11.csv
getting stats of RetTemp21.csv
getting stats of RetTemp41.csv
getting stats of RetTemp51.csv

*** Extracting testing features ***
getting stats of AirOffTemp4.csv
getting stats of AirOffTemp5.csv
getting stats of Energy10.csv
getting stats of Energy5.csv
getting stats of Pressure5.csv
getting stats of RetTemp31.csv


Dict{Symbol,Array} with 4 entries:
  :tstfeatures => [6.0 2.15 … 1.44612 0.679828; 9.0 3.7 … 2.47319 1.11812; … ; …
  :tstoutput   => SubString{String}["AirOffTemp", "AirOffTemp", "Energy", "Ener…
  :troutput    => SubString{String}["AirOffTemp", "AirOffTemp", "AirOffTemp", "…
  :trfeatures  => [8.9 3.1 … 2.88221 1.18164; 5.2 2.0 … 1.80057 0.921901; … ; 1…

## Initialize ML models from Julia, Caret, and Scikitlearn

In [119]:
@everywhere caret_svmlinear = CaretLearner(Dict(:learner=>"svmLinear"))
@everywhere caret_treebag = CaretLearner(Dict(:learner=>"treebag"))
@everywhere caret_rpart = CaretLearner(Dict(:learner=>"rpart"))
@everywhere caret_rf = CaretLearner(Dict(:learner=>"rf"))
@everywhere sk_ridge = SKLearner(Dict(:learner=>"RidgeClassifier"))
@everywhere sk_sgd = SKLearner(Dict(:learner=>"SGDClassifier"))
@everywhere sk_knn = SKLearner(Dict(:learner=>"KNeighborsClassifier"))
@everywhere sk_gb = SKLearner(Dict(:learner=>"GradientBoostingClassifier"))
@everywhere sk_extratree = SKLearner(Dict(:learner=>"ExtraTreesClassifier"))
@everywhere jrf = RandomForest(Dict(:impl_args=>Dict(:num_trees=>30)))
@everywhere jpt = PrunedTree()
@everywhere jada = Adaboost()
@everywhere jvote_ens=VoteEnsemble(Dict(:learners=>[jrf,sk_gb]))
@everywhere jstack_ens=StackEnsemble(Dict(:learners=>[jrf,sk_gb]))
@everywhere jbest_ens=BestLearner(Dict(:learners=>[jrf,sk_gb]))
@everywhere jsuper_ens=VoteEnsemble(Dict(:learners=>[jvote_ens,jstack_ens,jbest_ens]))

## Run in parallel all models in different trials

In [120]:
function runme()
    learners=Dict(
      :jvote_ens=>jvote_ens,:jstack_ens=>jstack_ens,:jbest_ens=>jbest_ens,
      :jrf => jrf,:jada=>jada,:jsuper_ens=>jsuper_ens,
      :crt_svmlinear=>caret_svmlinear,:crt_treebag=>caret_treebag,:crt_rpart=>caret_rpart,:crt_rf=>caret_rf,
      :skl_knn=>sk_knn,:skl_gb=>sk_gb,:skl_extratree=>sk_extratree
    )

    models=collect(keys(learners))
    trials=10
    ctable=@distributed (vcat) for model in models
        acc=@distributed (vcat) for i=1:trials
            Random.seed!(i*i)
            res=getprediction(learners[model],data)
            println(model," => ",round(res,digits=2))
            res
        end
        [model round(maximum(acc),digits=2) round(median(acc),digits=2) round(iqr(acc),digits=2) length(acc)]
    end
    sorted=sort(DataFrame(ctable),:x2,rev=true)
    rename!(sorted,Dict(:x1=>:model,:x2=>:max,:x3=>:median,:x4=>:iqr,:x5=>:trials))
    return sorted
end
runme()

      From worker 8:	jada => 0.33
      From worker 9:	jada => 0.33
      From worker 9:	jada => 0.33
      From worker 7:	jada => 0.33
      From worker 8:	jada => 0.33
      From worker 7:	jada => 0.33
      From worker 2:	jada => 0.33
      From worker 2:	jada => 0.33
      From worker 4:	crt_rf => 0.83


│   There were missing values in resampled performance measures.
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113


      From worker 4:	crt_rf => 0.67
      From worker 4:	jada => 0.33
      From worker 4:	jada => 0.33
      From worker 2:	crt_rf => 0.83
      From worker 8:	crt_rf => 0.83
      From worker 9:	crt_rf => 0.83
      From worker 7:	crt_rf => 0.83


│   There were missing values in resampled performance measures.
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113


      From worker 9:	crt_rf => 0.83
      From worker 9:	jrf => 0.67
      From worker 7:	crt_rf => 0.83
      From worker 7:	jrf => 0.67
      From worker 2:	jrf => 0.67
      From worker 2:	crt_rf => 0.83
      From worker 8:	crt_rf => 0.83
      From worker 4:	jrf => 0.67
      From worker 8:	jrf => 0.67
      From worker 8:	crt_svmlinear => 0.83


│   There were missing values in resampled performance measures.
└ @ RCall ~/.julia/packages/RCall/iojZI/src/io.jl:113


      From worker 2:	crt_svmlinear => 0.83
      From worker 8:	crt_svmlinear => 0.83
      From worker 8:	jstack_ens => 0.33
      From worker 7:	crt_svmlinear => 0.83
      From worker 9:	crt_svmlinear => 0.83
      From worker 4:	crt_svmlinear => 0.83
      From worker 2:	crt_svmlinear => 0.83
      From worker 2:	jstack_ens => 0.33
      From worker 2:	jrf => 0.67
      From worker 7:	crt_svmlinear => 0.83
      From worker 7:	jstack_ens => 0.33
      From worker 7:	jrf => 0.67
      From worker 9:	crt_svmlinear => 0.83
      From worker 9:	jstack_ens => 0.33
      From worker 9:	jrf => 0.67
      From worker 4:	crt_svmlinear => 0.83
      From worker 4:	jstack_ens => 0.33
      From worker 4:	jrf => 0.67
      From worker 4:	crt_rpart => 0.33
      From worker 4:	crt_rpart => 0.33
      From worker 9:	crt_rpart => 0.33
      From worker 7:	crt_rpart => 0.33
      From worker 8:	crt_rpart => 0.33
      From worker 2:	crt_rpart => 0.33
      From worker 2:	crt_rpart => 0.33
      Fr

Unnamed: 0_level_0,model,max,median,iqr,trials
Unnamed: 0_level_1,Any,Any,Any,Any,Any
1,jbest_ens,0.83,0.58,0.17,10
2,skl_extratree,0.83,0.83,0.0,10
3,crt_rf,0.83,0.83,0.0,10
4,crt_treebag,0.83,0.83,0.0,10
5,skl_gb,0.83,0.83,0.0,10
6,crt_svmlinear,0.83,0.83,0.0,10
7,jsuper_ens,0.83,0.67,0.29,10
8,jrf,0.67,0.67,0.0,10
9,jvote_ens,0.67,0.67,0.0,10
10,skl_knn,0.67,0.67,0.0,10
