FluxML · lorenzoh · Jul 29, 2022 · Jun 18, 2022 · Jun 22, 2022 · Jun 23, 2022
diff --git a/src/TimeSeries/TimeSeries.jl b/src/TimeSeries/TimeSeries.jl
@@ -31,12 +31,22 @@ using InlineTest
 # Blocks
 include("blocks/timeseriesrow.jl")
 
+include("encodings/timeseriespreprocessing.jl");
+
+const _tasks = Dict{String, Any}()
+include("tasks/classification.jl")
+
 include("recipes.jl")
 
 function __init__()
     _registerrecipes()
+    foreach(values(_tasks)) do t
+        if !haskey(FastAI.learningtasks(), t.id)
+            push!(FastAI.learningtasks(), t)
+        end
+    end
 end
 
-export TimeSeriesRow
-
+export 
+    TimeSeriesRow, TSClassificationSingle, TimeSeriesPreprocessing
-export 
-    TimeSeriesRow, TSClassificationSingle, TimeSeriesPreprocessing
+export 
+    TimeSeriesRow, TSClassificationSingle, TSPreprocessing
-export 
-    TimeSeriesRow, TSClassificationSingle, TimeSeriesPreprocessing
+export 
+    TimeSeriesRow, TSClassificationSingle, TSPreprocessing
 end
diff --git a/src/TimeSeries/encodings/timeseriespreprocessing.jl b/src/TimeSeries/encodings/timeseriespreprocessing.jl
@@ -0,0 +1,51 @@
+"""
+    TSPreprocessing() <: Encoding
+
+Encodes 'TimeSeriesRow's by normalizing the time-series values. The time-series can
+either be normalized by each variable or time-step.
+
+Encodes
+- 'TimeSeriesRow' -> 'TimeSeriesRow'
+"""
+
+struct TSPreprocessing <: Encoding
+    tfms
+end
+
+function TSPreprocessing()
+    base_tfms = [
+    ]
+    return TSPreprocessing(base_tfms)
+end
+
+function encodedblock(p::TSPreprocessing, block::TimeSeriesRow)
+    return block
+end
+
+function encode(p::TSPreprocessing, context, block::TimeSeriesRow, obs)
+    for tfm in values(p.tfms)
+        obs = tfm(obs)
+    end
+    obs
+end
+
+function tsdatasetstats(
+    data;
+    by_var=false,
+    by_step=false
+)
+    drop_axes = []
+    if (by_var)
+        append!(drop_axes,2)
+    else
+        append!(drop_axes,3)
+    end 
+    axes = [ax for ax in [1, 2, 3] if !(ax in drop_axes)]
+    mean = Statistics.mean(data, dims=axes)
+    std  = Statistics.std(data, dims=axes)
+    return mean, std
+end
+
+function setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data)
+    means, stds = tsdatasetstats(data)
+end
-function setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data)
-    means, stds = tsdatasetstats(data)
-end
+setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data) = means, stds = tsdatasetstats(data)
-function setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data)
-    means, stds = tsdatasetstats(data)
-end
+setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data) = means, stds = tsdatasetstats(data)
diff --git a/src/TimeSeries/tasks/classification.jl b/src/TimeSeries/tasks/classification.jl
@@ -0,0 +1,29 @@
+"""
+    TSClassificationSingle(blocks[, data])
+Learning task for single-label time-series classification. Samples are normalized and 
+classified into of the 'classes'.
+"""
+function TSClassificationSingle(
+    blocks::Tuple{<:TimeSeriesRow, <:Label},
+    data
+)
+    return SupervisedTask(
+        blocks,
+        (
+            OneHot()
+        )
+    )
+end
+
+_tasks["tsclfsingle"] = (
+    id = "timeseries/single",
+    name = "Time-Series Classification (single-label)",
+    constructor = TSClassificationSingle,
+    blocks = (TimeSeriesRow, Label),
+    category = "supervised",
+    description = """
+        Time-Series classification task where every time-series has a single 
+        class label associated with it.
+        """,
+    package = @__MODULE__,
+)
diff --git a/src/datasets/containers.jl b/src/datasets/containers.jl
@@ -356,26 +356,13 @@ end
     end
 end
 
-#! How to include the file.
-# @testset "TimeSeriesDataset" begin
-#     @testset "TimeSeriesDataset from TS" begin
-#         # Size 159 KB
-#         tsd = TimeSeriesDataset("/Users/saksham/Downloads/AtrialFibrillation/AtrialFibrillation_TRAIN.ts") 
-#         @test tsd isa TimeSeriesDataset{}
-#         @test size(getindex(tsd, 10)) == (2, 640)
-#         @test length(tsd) ==15
-#     end
-# end
-
-# @testset "TimeSeriesDataset" begin
-#     @testset "TimeSeriesDataset from TS" begin
-#         temp = mktempdir()
-#         downpath = joinpath(temp, "temp.zip")
-#         path = Downloads.download("http://timeseriesclassification.com/Downloads/AtrialFibrillation.zip", downpath)
-#         InfoZIP.unzip(path, temp)
-#         tsd = TimeSeriesDataset(joinpath(temp, "AtrialFibrillation_TRAIN.ts"))
-#         @test tsd isa TimeSeriesDataset{}
-#         @test size(getindex(tsd, 10)) == (2, 640)
-#         @test length(tsd) ==15
-#     end
-# end
+@testset "TimeSeriesDataset" begin
+    @testset "TimeSeriesDataset from TS" begin
+        folderpath = datasetpath("atrial")
+        filepath = joinpath(folderpath, "AtrialFibrillation_TRAIN.ts")
+        tsd = TimeSeriesDataset(filepath)
+        @test tsd isa TimeSeriesDataset{}
+        @test size(getindex(tsd, 10)) == (2, 640)
+        @test length(tsd) == 15
+    end
+end
diff --git a/src/datasets/fastaidatasets.jl b/src/datasets/fastaidatasets.jl
@@ -44,7 +44,8 @@ const DESCRIPTIONS = Dict(
     "imagenette" => "A subset of 10 easily classified classes from Imagenet: tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute",
     "imagewoof" => "A subset of 10 harder to classify classes from Imagenet (all dog breeds): Australian terrier, Border terrier, Samoyed, beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, dingo, golden retriever, Old English sheepdog",
     "food-101" => "101 food categories, with 101,000 images; 250 test images and 750 training images per class. The training images were not cleaned. All images were rescaled to have a maximum side length of 512 pixels.",
-    "ECG5000" => "The original dataset for \"ECG5000\" is a 20-hour long ECG downloaded from Physionet. The name is BIDMC Congestive Heart Failure Database(chfdb) and it is record \"chf07\"."
+    "ECG5000" => "The original dataset for \"ECG5000\" is a 20-hour long ECG downloaded from Physionet. The name is BIDMC Congestive Heart Failure Database(chfdb) and it is record \"chf07\".",
+    "AtrialFibrillation" => "This is a physionet dataset of two-channel ECG recordings has been created from data used in the Computers in Cardiology Challenge 2004, an open competition with the goal of developing automated methods for predicting spontaneous termination of atrial fibrillation (AF).",
 )
 
 const DATASETCONFIGS = [
@@ -127,6 +128,8 @@ const DATASETCONFIGS = [
 
     # timeseries
     TSClassificationDataset("ECG5000", "41f6de20ac895e9ce31753860995518951f1ed42a405d0e51c909d27e3b3c5a4", description = DESCRIPTIONS["ECG5000"] ,datadepname="ecg5000", size="10MB" ),
+    TSClassificationDataset("AtrialFibrillation", "218abad67d58190a6daa1a27f4bd58ace6e18f80fb59fb2c7385f0d2d4b411a2", description = DESCRIPTIONS["AtrialFibrillation"], datadepname = "atrial", size = "226KB"),
+
 ]
 
 const DATASETS = [d.datadepname for d in DATASETCONFIGS]