## Load TSML filters

In [None]:
using TSML
using TSML.Utils
using TSML.TSMLTypes

using TSML: CSVDateValReader, CSVDateValWriter, Statifier
using TSML: Monotonicer, Outliernicer, Plotter

## Create artificial data function

In [None]:
using DataFrames
using Dates
using Random

ENV["COLUMNS"]=1000 # for dataframe column size

function generateXY()
    Random.seed!(123)
    gdate = DateTime(2014,1,1):Dates.Minute(15):DateTime(2014,1,5)
    gval = Array{Union{Missing,Float64}}(rand(length(gdate)))
    gmissing = floor(0.30*length(gdate)) |> Integer
    gndxmissing = Random.shuffle(1:length(gdate))[1:gmissing]
    X = DataFrame(Date=gdate,Value=gval)
    X.Value[gndxmissing] .= missing
    Y = rand(length(gdate))
    (X,Y)
end;

## Generate artificial data with missing

In [None]:
(df,outY)=generateXY()
first(df,10)

## User Pipeline and Plotter to plot artificial data

In [None]:
pltr=Plotter(Dict(:interactive => false))

mypipeline = Pipeline(Dict(
  :transformers => [pltr]
 )
)

fit!(mypipeline, df)
transform!(mypipeline, df)

## Get statistics including blocks of missing data

In [None]:
statfier = Statifier(Dict(:processmissing=>true))

mypipeline = Pipeline(Dict(
  :transformers => [statfier]
 )
)

fit!(mypipeline, df)
res = transform!(mypipeline, df)

## Use Pipeline: aggregate, impute, and plot 

In [None]:
valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))
valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))

mypipeline = Pipeline(Dict(
  :transformers => [valgator,pltr]
 )
)

fit!(mypipeline, df)
transform!(mypipeline, df)

## Try real data

In [None]:
fname = joinpath(dirname(pathof(TSML)),"../data/testdata.csv")
csvreader = CSVDateValReader(Dict(:filename=>fname,:dateformat=>"dd/mm/yyyy HH:MM"))

outputname = joinpath(dirname(pathof(TSML)),"/tmp/testdata_output.csv")
csvwriter = CSVDateValWriter(Dict(:filename=>outputname))

valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))
valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))
stfier = Statifier(Dict(:processmissing=>true))
outliernicer = Outliernicer(Dict(:dateinterval=>Dates.Hour(1)));

## Plot real data with missing values

In [None]:
mpipeline1 = Pipeline(Dict(
  :transformers => [csvreader,valgator,pltr]
 )
)

fit!(mpipeline1)
transform!(mpipeline1)

## Get statistics including blocks of missing data

In [None]:
mpipeline1 = Pipeline(Dict(
  :transformers => [csvreader,valgator,stfier]
 )
)

fit!(mpipeline1)
respipe1 = transform!(mpipeline1)

## Try imputing and get statistics

In [None]:
mpipeline2 = Pipeline(Dict(
  :transformers => [csvreader,valgator,valnner,stfier]
 )
)

fit!(mpipeline2)
respipe2 = transform!(mpipeline2)

## Plot imputted data

In [None]:
mpipeline2 = Pipeline(Dict(
  :transformers => [csvreader,valgator,valnner,pltr]
 )
)

fit!(mpipeline2)
transform!(mpipeline2)

### Monotonicer

In [None]:
regularfile = joinpath(dirname(pathof(TSML)),"../data/typedetection/regular.csv")
monofile = joinpath(dirname(pathof(TSML)),"../data/typedetection/monotonic.csv")
dailymonofile = joinpath(dirname(pathof(TSML)),"../data/typedetection/dailymonotonic.csv")

regularfilecsv = CSVDateValReader(Dict(:filename=>regularfile,:dateformat=>"dd/mm/yyyy HH:MM"))
monofilecsv = CSVDateValReader(Dict(:filename=>monofile,:dateformat=>"dd/mm/yyyy HH:MM"))
dailymonofilecsv = CSVDateValReader(Dict(:filename=>dailymonofile,:dateformat=>"dd/mm/yyyy HH:MM"))

valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))
valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))
stfier = Statifier(Dict(:processmissing=>true))
mono = Monotonicer(Dict())
stfier = Statifier(Dict(:processmissing=>true))
outliernicer = Outliernicer(Dict(:dateinterval=>Dates.Hour(1)));

## Plot of monotonic data

In [None]:
monopipeline = Pipeline(Dict(
  :transformers => [monofilecsv,valgator,valnner,pltr]
 )
)

fit!(monopipeline)
transform!(monopipeline)

## Plot after normalization of monotonic data

In [None]:
monopipeline = Pipeline(Dict(
  :transformers => [monofilecsv,valgator,valnner,mono,pltr]
 )
)

fit!(monopipeline)
transform!(monopipeline)

## Plot with Monotonicer and Outliernicer

In [None]:
monopipeline = Pipeline(Dict(
  :transformers => [monofilecsv,valgator,valnner,mono,outliernicer,pltr]
 )
)

fit!(monopipeline)
transform!(monopipeline)


## Plot of daily monotonic

In [None]:
dailymonopipeline = Pipeline(Dict(
  :transformers => [dailymonofilecsv,valgator,valnner,pltr]
 )
)

fit!(dailymonopipeline)
transform!(dailymonopipeline)

## Plot of daily monotonic data with Monotonicer

In [None]:
dailymonopipeline = Pipeline(Dict(
  :transformers => [dailymonofilecsv,valgator,valnner,mono,pltr]
 )
)
fit!(dailymonopipeline)
transform!(dailymonopipeline)

## Plot of daily monotonic with Monotonicer and Outliernicer

In [None]:
dailymonopipeline = Pipeline(Dict(
  :transformers => [dailymonofilecsv,valgator,valnner,mono,outliernicer,pltr]
 )
)
fit!(dailymonopipeline)
transform!(dailymonopipeline)

## Plot regular TS after monotonic normalization

In [None]:
regpipeline = Pipeline(Dict(
  :transformers => [regularfilecsv,valgator,valnner,mono,pltr]
 )
)

fit!(regpipeline)
transform!(regpipeline)

## Plot of regular TS with outlier normalization

In [None]:
regpipeline = Pipeline(Dict(
  :transformers => [regularfilecsv,valgator,valnner,mono,outliernicer,pltr]
 )
)
fit!(regpipeline)
transform!(regpipeline)

## TS Discovery by automatic data type classification

In [None]:
using TSML: TSClassifier
Random.seed!(12)

trdirname = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/training")
tstdirname = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/testing")
modeldirname = joinpath(dirname(pathof(TSML)),"../data/realdatatsclassification/model")

tscl = TSClassifier(Dict(:trdirectory=>trdirname,
           :tstdirectory=>tstdirname,
           :modeldirectory=>modeldirname,
           :feature_range => 6:20,
           :num_trees=>10)
)

fit!(tscl)
dfresults = transform!(tscl)

In [None]:
apredict = dfresults[:predtype]
fnames = dfresults[:fname]
myregex = r"(?<dtype>[A-Z _ - a-z]+)(?<number>\d*).(?<ext>\w+)"
mtypes=map(fnames) do fname
  mymatch=match(myregex,fname)
  mymatch[:dtype]
end

sum(mtypes .== apredict)/length(mtypes) * 100