In [44]:
using TSML: fit,fit!,transform,transform!
using TSML
using DataFrames
using Dates
using CSV
using Random

In [45]:
fname ="testdata.csv"
dat = CSV.read(fname, DataFrame)
rename!(dat,names(dat)[1]=>:Date,names(dat)[2]=>:Value)
dat.Date = DateTime.(dat.Date,"d/m/y H:M")
orig = deepcopy(dat)
filter1 = DateValgator()
filter2 = DateValLinearImputer();

In [46]:
fit!(filter1,dat,[])
res1=transform!(filter1,dat)
(first(res1,5),size(res1,1))

([1m5×2 DataFrame[0m
[1m Row [0m│[1m Date                [0m[1m Value    [0m
     │[90m DateTime            [0m[90m Float64? [0m
─────┼───────────────────────────────
   1 │ 2014-01-01T00:00:00      10.0
   2 │ 2014-01-01T01:00:00       9.9
   3 │ 2014-01-01T02:00:00      10.0
   4 │ 2014-01-01T03:00:00      10.0
   5 │ 2014-01-01T04:00:00      10.0, 8761)

In [47]:
fit!(filter2,res1,[])
res2=transform!(filter2,res1)
(first(res2,5),size(res2,1))

([1m5×2 DataFrame[0m
[1m Row [0m│[1m Date                [0m[1m Value    [0m
     │[90m DateTime            [0m[90m Float64? [0m
─────┼───────────────────────────────
   1 │ 2014-01-01T00:00:00      10.0
   2 │ 2014-01-01T01:00:00       9.9
   3 │ 2014-01-01T02:00:00      10.0
   4 │ 2014-01-01T03:00:00      10.0
   5 │ 2014-01-01T04:00:00      10.0, 8761)

In [48]:
mypipeline = filter1 |> filter2;

In [49]:
fit_transform!(mypipeline,dat) |> x->first(x,5)

Row,Date,Value
Unnamed: 0_level_1,DateTime,Float64?
1,2014-01-01T00:00:00,10.0
2,2014-01-01T01:00:00,9.9
3,2014-01-01T02:00:00,10.0
4,2014-01-01T03:00:00,10.0
5,2014-01-01T04:00:00,10.0


In [50]:
module MCSVReaders
using TSML
using Statistics
using Random
using DataFrames
import TSML.AbsTypes.fit!
import TSML.AbsTypes.transform!
export fit!,transform!
export MCSVDateValReader

mutable struct MCSVDateValReader <: Transformer
   name::String
   model::Dict{Symbol,Any}

   function MCSVDateValReader(args=Dict())
      default_args = Dict(
          :name => "csvrdr",
          :filename => "",
          :dateformat => ""
      )
      cargs=nested_dict_merge(default_args,args)
      cargs[:name] = cargs[:name]*"_"*randstring(3)
      new(cargs[:name],cargs)

   end
end

function fit!(csvrdr::MCSVDateValReader,x::DataFrame=DataFrame(),y::Vector=[])::Nothing
   fname = csvrdr.model[:filename]
   fmt = csvrdr.model[:dateformat]
   (fname != "" && fmt != "") || throw(ArgumentError("missing filename or date format"))
   return nothing
end

function transform!(csvrdr::MCSVDateValReader,x::DataFrame=DataFrame())::DataFrame
   fname = csvrdr.model[:filename]
   fmt = csvrdr.model[:dateformat]
   df = CSV.File(fname) |> DataFrame
   ncol(df) == 2 || throw(ArgumentError("dataframe should have only two columns: Date,Value"))
   rename!(df,names(df)[1]=>:Date,names(df)[2]=>:Value)
   if !(eltype(df.Date) <: DateTime )
      df.Date = DateTime.(df.Date,fmt)
   end
   df
end
end
using .MCSVReaders



In [51]:
csvreader = MCSVDateValReader(Dict(:filename=>"testdata.csv",:dateformat=>"d/m/y H:M"))
fit!(csvreader)
res=transform!(csvreader)
first(res,5)

Row,Date,Value
Unnamed: 0_level_1,DateTime,Float64
1,2014-01-01T00:06:00,10.0
2,2014-01-01T00:18:00,10.0
3,2014-01-01T00:29:00,10.0
4,2014-01-01T00:40:00,9.9
5,2014-01-01T00:51:00,9.9


In [52]:
mypipeline = csvreader |> filter1 |> filter2
    
fit_transform!(mypipeline) |> x->first(x,5)

Row,Date,Value
Unnamed: 0_level_1,DateTime,Float64?
1,2014-01-01T00:00:00,10.0
2,2014-01-01T01:00:00,9.9
3,2014-01-01T02:00:00,10.0
4,2014-01-01T03:00:00,10.0
5,2014-01-01T04:00:00,10.0


In [53]:
mydate=DateTime(2014,1,1):Dates.Minute(15):DateTime(2014,1,3) # 15 minutes interval
values = Array{Union{Float64,Missing}}(sin.(1:0.1:length(mydate)) .+ cos.(1:0.1:length(mydate)))[1:length(mydate)];
x = DataFrame(Date=mydate,Value=values); xx = deepcopy(x);first(x,10)
nmissing=floor(nrow(x) * 0.80) |> Integer
ndxmissing=Random.shuffle(1:nrow(x))[1:nmissing]
x.Value[ndxmissing] .= missing; first(x,15)
dvtr = TSML.DateValgator()
TSML.fit!(dvtr,x,[]);TSML.fit!(dvtr,xx,[])
inputx = TSML.transform!(dvtr,x); inputxx = TSML.transform!(dvtr,xx);
dvnnr = TSML.DateValLinearImputer(Dict(:dateinterval=>Dates.Hour(1),:nnsize=>1))
TSML.fit!(dvnnr,inputx,[])
pred_y=TSML.transform!(dvnnr,inputx);pred_yy=TSML.transform!(dvnnr,inputxx);

In [54]:
sqrt(sum(pred_y.Value-pred_yy.Value).^2)

0.007418461947682808

In [55]:
rename!(pred_y,:Value=>:MissingVals);

In [56]:
jx = leftjoin(pred_y,pred_yy,on=:Date)
jx.error = (jx.MissingVals .- jx.Value).^2
first(jx,5)

Row,Date,MissingVals,Value,error
Unnamed: 0_level_1,DateTime,Float64?,Float64?,Float64
1,2014-01-01T00:00:00,0.86282,1.36329,0.250468
2,2014-01-01T01:00:00,0.86282,1.19324,0.109175
3,2014-01-01T02:00:00,0.86282,0.804733,0.00337414
4,2014-01-01T03:00:00,0.149712,0.289179,0.0194511
5,2014-01-01T04:00:00,-0.403921,-0.272029,0.0173954
