Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Time Series Block #239

Merged
merged 22 commits into from
Jul 29, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/TimeSeries/TimeSeries.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,22 @@ using InlineTest
# Blocks
include("blocks/timeseriesrow.jl")

include("encodings/timeseriespreprocessing.jl");

const _tasks = Dict{String, Any}()
include("tasks/classification.jl")

include("recipes.jl")

function __init__()
_registerrecipes()
foreach(values(_tasks)) do t
if !haskey(FastAI.learningtasks(), t.id)
push!(FastAI.learningtasks(), t)
end
end
end

export TimeSeriesRow

export
TimeSeriesRow, TSClassificationSingle, TimeSeriesPreprocessing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
export
TimeSeriesRow, TSClassificationSingle, TimeSeriesPreprocessing
export
TimeSeriesRow, TSClassificationSingle, TSPreprocessing

end
51 changes: 51 additions & 0 deletions src/TimeSeries/encodings/timeseriespreprocessing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
TSPreprocessing() <: Encoding

Encodes 'TimeSeriesRow's by normalizing the time-series values. The time-series can
either be normalized by each variable or time-step.

Encodes
- 'TimeSeriesRow' -> 'TimeSeriesRow'
"""

struct TSPreprocessing <: Encoding
tfms
end

function TSPreprocessing()
base_tfms = [
]
return TSPreprocessing(base_tfms)
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of transforms will be in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently only Standardize, that's the only used in the tutorials.
If time permits we can also add normalisation using min-max, clipping outliers based on IQR, handle missing values in the time series.


function encodedblock(p::TSPreprocessing, block::TimeSeriesRow)
return block
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the format of the time series is changed by the encoding, this should return a different block

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No the format won't be changed, as I discussed with Brian earlier that different models might require different formats and so the encoding shouldn't depend on the model.


function encode(p::TSPreprocessing, context, block::TimeSeriesRow, obs)
for tfm in values(p.tfms)
obs = tfm(obs)
end
obs
end

function tsdatasetstats(
data;
by_var=false,
by_step=false
)
drop_axes = []
if (by_var)
append!(drop_axes,2)
else
append!(drop_axes,3)
end
axes = [ax for ax in [1, 2, 3] if !(ax in drop_axes)]
codeboy5 marked this conversation as resolved.
Show resolved Hide resolved
mean = Statistics.mean(data, dims=axes)
std = Statistics.std(data, dims=axes)
return mean, std
end

function setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data)
means, stds = tsdatasetstats(data)
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data)
means, stds = tsdatasetstats(data)
end
setup(::Type{TSPreprocessing}, ::TimeSeriesRow, data) = means, stds = tsdatasetstats(data)

29 changes: 29 additions & 0 deletions src/TimeSeries/tasks/classification.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
TSClassificationSingle(blocks[, data])
Learning task for single-label time-series classification. Samples are normalized and
classified into of the 'classes'.
"""
function TSClassificationSingle(
blocks::Tuple{<:TimeSeriesRow, <:Label},
data
)
return SupervisedTask(
blocks,
(
OneHot()
)
)
end

_tasks["tsclfsingle"] = (
id = "timeseries/single",
name = "Time-Series Classification (single-label)",
constructor = TSClassificationSingle,
blocks = (TimeSeriesRow, Label),
category = "supervised",
description = """
Time-Series classification task where every time-series has a single
class label associated with it.
""",
package = @__MODULE__,
)
33 changes: 10 additions & 23 deletions src/datasets/containers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -356,26 +356,13 @@ end
end
end

#! How to include the file.
# @testset "TimeSeriesDataset" begin
# @testset "TimeSeriesDataset from TS" begin
# # Size 159 KB
# tsd = TimeSeriesDataset("/Users/saksham/Downloads/AtrialFibrillation/AtrialFibrillation_TRAIN.ts")
# @test tsd isa TimeSeriesDataset{}
# @test size(getindex(tsd, 10)) == (2, 640)
# @test length(tsd) ==15
# end
# end

# @testset "TimeSeriesDataset" begin
# @testset "TimeSeriesDataset from TS" begin
# temp = mktempdir()
# downpath = joinpath(temp, "temp.zip")
# path = Downloads.download("http://timeseriesclassification.com/Downloads/AtrialFibrillation.zip", downpath)
# InfoZIP.unzip(path, temp)
# tsd = TimeSeriesDataset(joinpath(temp, "AtrialFibrillation_TRAIN.ts"))
# @test tsd isa TimeSeriesDataset{}
# @test size(getindex(tsd, 10)) == (2, 640)
# @test length(tsd) ==15
# end
# end
@testset "TimeSeriesDataset" begin
@testset "TimeSeriesDataset from TS" begin
folderpath = datasetpath("atrial")
filepath = joinpath(folderpath, "AtrialFibrillation_TRAIN.ts")
tsd = TimeSeriesDataset(filepath)
@test tsd isa TimeSeriesDataset{}
@test size(getindex(tsd, 10)) == (2, 640)
@test length(tsd) == 15
end
end
5 changes: 4 additions & 1 deletion src/datasets/fastaidatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ const DESCRIPTIONS = Dict(
"imagenette" => "A subset of 10 easily classified classes from Imagenet: tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute",
"imagewoof" => "A subset of 10 harder to classify classes from Imagenet (all dog breeds): Australian terrier, Border terrier, Samoyed, beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, dingo, golden retriever, Old English sheepdog",
"food-101" => "101 food categories, with 101,000 images; 250 test images and 750 training images per class. The training images were not cleaned. All images were rescaled to have a maximum side length of 512 pixels.",
"ECG5000" => "The original dataset for \"ECG5000\" is a 20-hour long ECG downloaded from Physionet. The name is BIDMC Congestive Heart Failure Database(chfdb) and it is record \"chf07\"."
"ECG5000" => "The original dataset for \"ECG5000\" is a 20-hour long ECG downloaded from Physionet. The name is BIDMC Congestive Heart Failure Database(chfdb) and it is record \"chf07\".",
"AtrialFibrillation" => "This is a physionet dataset of two-channel ECG recordings has been created from data used in the Computers in Cardiology Challenge 2004, an open competition with the goal of developing automated methods for predicting spontaneous termination of atrial fibrillation (AF).",
)

const DATASETCONFIGS = [
Expand Down Expand Up @@ -127,6 +128,8 @@ const DATASETCONFIGS = [

# timeseries
TSClassificationDataset("ECG5000", "41f6de20ac895e9ce31753860995518951f1ed42a405d0e51c909d27e3b3c5a4", description = DESCRIPTIONS["ECG5000"] ,datadepname="ecg5000", size="10MB" ),
TSClassificationDataset("AtrialFibrillation", "218abad67d58190a6daa1a27f4bd58ace6e18f80fb59fb2c7385f0d2d4b411a2", description = DESCRIPTIONS["AtrialFibrillation"], datadepname = "atrial", size = "226KB"),

]

const DATASETS = [d.datadepname for d in DATASETCONFIGS]
Expand Down