diff --git a/Project.toml b/Project.toml index 1a7f4b6b..2a9d3c79 100644 --- a/Project.toml +++ b/Project.toml @@ -13,9 +13,11 @@ FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63" Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534" ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0" LazyModules = "8cdb02fc-e678-4876-92c5-9defec4f444e" MAT = "23992714-dd62-5051-b70f-ba57cb901cac" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" @@ -36,9 +38,11 @@ FixedPointNumbers = "0.8" GZip = "0.5" Glob = "1.3" HDF5 = "0.16.2" +ImageCore = "0.9" ImageShow = "0.3" JLD2 = "0.4.21" JSON3 = "1" +JpegTurbo = "0.1" LazyModules = "0.3" MAT = "0.10" MLUtils = "0.2.0, 0.3, 0.4" diff --git a/docs/src/datasets/imagenet_installation.md b/docs/src/datasets/imagenet_installation.md new file mode 100644 index 00000000..6b473925 --- /dev/null +++ b/docs/src/datasets/imagenet_installation.md @@ -0,0 +1,62 @@ +# Installing ImageNet +The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at +[image-net.org](https://image-net.org/) after signing up and accepting the terms of access. +It is therefore required that you download this dataset manually. + +## Existing installation +The dataset structure is assumed to look as follows: +``` +ImageNet +├── train +├── val +│ ├── n01440764 +│ │ ├── ILSVRC2012_val_00000293.JPEG +│ │ ├── ILSVRC2012_val_00002138.JPEG +│ │ └── ... +│ ├── n01443537 +│ └── ... +├── test +└── devkit + ├── data + │ ├── meta.mat + │ └── ... + └── ... +``` +If your existing copy of the ImageNet dataset uses another file structure, +we recommend to create symbolic links, e.g. using `ln` on Unix-like operating +systems: +```bash +cd ~/.julia/datadeps +mkdir -p ImageNet/val +ln -s my/path/to/imagenet/val ImageNet/val +mkdir -p ImageNet/devkit/data +ln -s my/path/to/imagenet/devkit/data ImageNet/devkit/data +``` + +## New installation +Download the following files from the [ImageNet website](https://image-net.org/): +* `ILSVRC2012_devkit_t12` +* `ILSVRC2012_img_train.tar`, only required for `:train` split +* `ILSVRC2012_img_val.tar`, only required for `:val` split + +After downloading the data, move and extract the training and validation images to +labeled subfolders running the following shell script: +```bash +# Extract the training data: +mkdir -p ImageNet/train && tar -xvf ILSVRC2012_img_train.tar -C ImageNet/train +# Unpack all 1000 compressed tar-files, one for each category: +cd ImageNet/train +find . -name "*.tar" | while read NAME ; do mkdir -p "\${NAME%.tar}"; tar -xvf "\${NAME}" -C "\${NAME%.tar}"; rm -f "\${NAME}"; done + +# Extract the validation data: +cd ../.. +mkdir -p ImageNet/val && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/val + +# Run script from soumith to create all class directories and moves images into corresponding directories: +cd ImageNet/val +wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash + +# Extract metadata from the devkit: +cd ../.. +mkdir -p ImageNet/devkit && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/devkit +``` diff --git a/docs/src/datasets/vision.md b/docs/src/datasets/vision.md index f6ab6f28..c84a1a07 100644 --- a/docs/src/datasets/vision.md +++ b/docs/src/datasets/vision.md @@ -24,6 +24,7 @@ CIFAR10 CIFAR100 EMNIST FashionMNIST +ImageNet MNIST Omniglot SVHN2 diff --git a/src/MLDatasets.jl b/src/MLDatasets.jl index eb531bee..26fe19db 100644 --- a/src/MLDatasets.jl +++ b/src/MLDatasets.jl @@ -32,6 +32,8 @@ include("require.jl") # export @require @lazy import HDF5="f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" # @lazy import JLD2 +@lazy import JpegTurbo="b835a17e-a41a-41e7-81f0-2f016b05efe0" # Required for ImageNet + export getobs, numobs # From MLUtils.jl include("abstract_datasets.jl") @@ -91,6 +93,9 @@ include("datasets/vision/omniglot.jl") export Omniglot include("datasets/vision/svhn2.jl") export SVHN2 +include("datasets/vision/imagenet_reader/ImageNetReader.jl") +include("datasets/vision/imagenet.jl") +export ImageNet ## Text @@ -162,6 +167,7 @@ function __init__() __init__cifar100() __init__emnist() __init__fashionmnist() + __init__imagenet() __init__mnist() __init__omniglot() __init__svhn2() diff --git a/src/datasets/vision/imagenet.jl b/src/datasets/vision/imagenet.jl new file mode 100644 index 00000000..b7010756 --- /dev/null +++ b/src/datasets/vision/imagenet.jl @@ -0,0 +1,179 @@ +const IMAGENET_WEBSITE = "https://image-net.org/" + +function __init__imagenet() + DEPNAME = "ImageNet" + return register(ManualDataDep(DEPNAME, + """The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at + $(IMAGENET_WEBSITE) after signing up and accepting the terms of access. + It is therefore required that you download this dataset manually. + + Please follow the instructions at + https://github.com/JuliaML/MLDatasets.jl/blob/master/docs/src/datasets/imagenet_installation.md. + """)) +end + +""" + ImageNet(; Tx=Float32, split=:train, dir=nothing, kwargs...) + ImageNet([Tx, split]) + +The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017). +This is the most highly-used subset of ImageNet. It spans 1000 object classes and contains +1,281,167 training images, 50,000 validation images and 100,000 test images. +Each image is in 224x224x3 format using RGB color space. + +# Arguments + +$ARGUMENTS_SUPERVISED_ARRAY +- `train_dir`, `val_dir`, `test_dir`, `devkit_dir`: optional subdirectory names of `dir`. + Default to `"train"`, `"val"`, `"test"` and `"devkit"`. +- `split`: selects the data partition. Can take the values `:train:`, `:val` and `:test`. + Defaults to `:train`. +- `Tx`: datatype used to load data. Defaults to `Float32`. +- `preprocess`: preprocessing steps applied to an image to convert it to an array. + Assumes an RGB image in CHW format as input and an array in WHC format as output. + Defaults to `ImageNetReader.default_preprocess`, which applies a center-crop + and normalization using coefficients from PyTorch's vision models. +- `inverse_preprocess`: inverse function of `preprocess` used in `convert2image`. + Defaults to `ImageNetReader.default_inverse_preprocess`. + +**Note:** When providing custom preprocessing functions, make sure both match. + +# Fields + +- `metadata`: A dictionary containing additional information on the dataset. +- `split`: Symbol indicating the selected data partition +- `dataset`: A `FileDataset` containing paths to ImageNet images as well as a `loadfn` + used to load images, which applies `preprocess`. +- `targets`: An array storing the targets for supervised learning. +- `inverse_preprocess`: inverse function of `preprocess` used in `convert2image`. + +# Methods + +$METHODS_SUPERVISED_ARRAY +- [`convert2image`](@ref) converts features to `RGB` images. + +# Examples + +```julia-repl +julia> using MLDatasets: ImageNet + +julia> dataset = ImageNet(:val); + +julia> dataset[1:5].targets +5-element Vector{Int64}: + 1 + 1 + 1 + 1 + 1 + +julia> X, y = dataset[1:5]; + +julia> size(X) +(5,) + +julia> size(X[1]) +(224, 224, 3) + +julia> X, y = dataset[2000]; + +julia> convert2image(dataset, X) + +julia> dataset.metadata +Dict{String, Any} with 8 entries: + "features_dir" => "/Users/funks/.julia/datadeps/ImageNet/val" + "class_WNIDs" => ["n01440764", "n01443537", "n01484850", "n01491361", "n01494475", " + "class_description" => ["freshwater dace-like game fish of Europe and western Asia noted f + "n_observations" => 50000 + "class_names" => Vector{SubString{String}}[["tench", "Tinca tinca"], ["goldfish", "C + "metadata_path" => "/Users/funks/.julia/datadeps/ImageNet/devkit/data/meta.mat" + "n_classes" => 1000 + "img_size" => (224, 224) + "wnid_to_label" => Dict("n07693725"=>932, "n03775546"=>660, "n01689811"=>45, "n0210087 + +julia> dataset.metadata["class_names"][y] + 3-element Vector{SubString{String}}: + "common iguana" + "iguana" + "Iguana iguana" +``` + +# References + +[1]: [Russakovsky et al., ImageNet Large Scale Visual Recognition Challenge](https://arxiv.org/abs/1409.0575) +""" +struct ImageNet <: SupervisedDataset + metadata::Dict{String, Any} + split::Symbol + dataset::FileDataset + targets::Vector{Int} + inverse_preprocess::Function +end + +ImageNet(; split = :train, Tx = Float32, kws...) = ImageNet(Tx, split; kws...) +ImageNet(split::Symbol; kws...) = ImageNet(; split, kws...) +ImageNet(Tx::Type; kws...) = ImageNet(; Tx, kws...) + +function ImageNet(Tx::Type, + split::Symbol; + img_size::Tuple{Int, Int} = (224, 224), + preprocess = ImageNetReader.default_preprocess, + inverse_preprocess = ImageNetReader.default_inverse_preprocess, + dir = nothing, + train_dir = "train", + val_dir = "val", + test_dir = "test", + devkit_dir = "devkit") + @assert split ∈ (:train, :val, :test) + + DEPNAME = "ImageNet" + METADATA_FILENAME = joinpath(devkit_dir, "data", "meta.mat") + + TRAINSET_SIZE = 1_281_167 + VALSET_SIZE = 50_000 + TESTSET_SIZE = 100_000 + + root_dir = @datadep_str DEPNAME + if split == :train + split_dir = train_dir + n_observations = TRAINSET_SIZE + elseif split == :val + split_dir = val_dir + n_observations = VALSET_SIZE + else # :test + split_dir == test_dir + n_observations = TESTSET_SIZE + end + features_dir = joinpath(root_dir, split_dir) + + # Load metadata + metadata_path = datafile(DEPNAME, METADATA_FILENAME, dir) + metadata = ImageNetReader.read_wordnet_metadata(metadata_path) + metadata["metadata_path"] = metadata_path + metadata["features_dir"] = features_dir + metadata["n_observations"] = n_observations + metadata["n_classes"] = ImageNetReader.NCLASSES + metadata["img_size"] = img_size + + # Create FileDataset + dataset = ImageNetReader.get_file_dataset(Tx, img_size, preprocess, features_dir) + @assert length(dataset) == n_observations + + targets = [metadata["wnid_to_label"][wnid] + for wnid in ImageNetReader.get_wnids(dataset)] + @assert length(targets) == n_observations + return ImageNet(metadata, split, dataset, targets, inverse_preprocess) +end + +convert2image(d::ImageNet, x::AbstractArray) = d.inverse_preprocess(x) + +Base.length(d::ImageNet) = length(d.dataset) + +const IMAGENET_MEM_WARNING = """Loading the entire ImageNet dataset into memory might not be possible. + If you are sure you want to load all of ImageNet, use `dataset[1:end]` instead of `dataset[:]`. + """ +Base.getindex(::ImageNet, ::Colon) = throw(ArgumentError(IMAGENET_MEM_WARNING)) +Base.getindex(d::ImageNet, i::Integer) = (features = d.dataset[i], targets = d.targets[i]) +function Base.getindex(d::ImageNet, is::AbstractVector) + return (features = d.dataset[is], targets = d.targets[is]) +end diff --git a/src/datasets/vision/imagenet_reader/ImageNetReader.jl b/src/datasets/vision/imagenet_reader/ImageNetReader.jl new file mode 100644 index 00000000..67a52d71 --- /dev/null +++ b/src/datasets/vision/imagenet_reader/ImageNetReader.jl @@ -0,0 +1,48 @@ +module ImageNetReader +using ImageCore: channelview, colorview, AbstractRGB, RGB + +import ..FileDataset +import ..read_mat +import ..@lazy + +@lazy import JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0" + +const NCLASSES = 1000 + +include("preprocess.jl") + +function get_file_dataset(Tx::Type{<:Real}, img_size::Tuple{Int, Int}, preprocess::Function, + dir::AbstractString) + # Construct a function that loads images from FileDataset path, + # applies preprocessing and converts to type Tx. + function load_image(file::AbstractString) + im = JpegTurbo.jpeg_decode(RGB{Tx}, file; preferred_size = img_size) + return preprocess(im, img_size) + end + return FileDataset(load_image, dir, "*.JPEG") +end + +function read_wordnet_metadata(file::AbstractString) + meta = read_mat(file)["synsets"] + + # Only leaf nodes in WordNet metadata correspond to classes + is_child = iszero.(meta["num_children"]) + @assert meta["ILSVRC2012_ID"][is_child] == 1:NCLASSES + + # Sort classes by WNID for Metalhead compatibility + I = sortperm(meta["WNID"][is_child]) + + metadata = Dict{String, Any}() + metadata["class_WNIDs"] = Vector{String}(meta["WNID"][is_child][I]) # WordNet IDs + metadata["class_names"] = split.(meta["words"][is_child][I], ", ") + metadata["class_description"] = Vector{String}(meta["gloss"][is_child][I]) + metadata["wnid_to_label"] = Dict(metadata["class_WNIDs"] .=> 1:NCLASSES) + return metadata +end + +# Get WordNet ID from path +get_wnids(d::FileDataset) = get_wnids(d.paths) +get_wnids(paths::AbstractVector{<:AbstractString}) = path_to_wnid.(paths) +path_to_wnid(path::AbstractString) = split(path, "/")[end - 1] + +end # ImageNetReader module diff --git a/src/datasets/vision/imagenet_reader/preprocess.jl b/src/datasets/vision/imagenet_reader/preprocess.jl new file mode 100644 index 00000000..e4a65c8c --- /dev/null +++ b/src/datasets/vision/imagenet_reader/preprocess.jl @@ -0,0 +1,21 @@ +# Image preprocessing defaults for ImageNet models. + +function default_preprocess(im::AbstractMatrix{<:AbstractRGB}, outsize) + im = channelview(center_crop(im, outsize)) + return PermutedDimsArray(im, (3, 2, 1)) # Convert from Image.jl's CHW to Flux's WHC +end + +function default_inverse_preprocess(x::AbstractArray{T, N}) where {T, N} + @assert N == 3 || N == 4 + x = PermutedDimsArray(x, (3, 2, 1, 4:N...)) # Convert from WHC[N] to CHW[N] + return colorview(RGB, x) +end + +# Take rectangle of pixels of shape `outsize` at the center of image `im` +function center_crop(im::AbstractMatrix, outsize) + h2, w2 = div.(outsize, 2) # half height, half width of view + h_adjust, w_adjust = _adjust.(outsize) + return @view im[((div(end, 2) - h2):(div(end, 2) + h2 - h_adjust)) .+ 1, + ((div(end, 2) - w2):(div(end, 2) + w2 - w_adjust)) .+ 1] +end +_adjust(i::Integer) = ifelse(iszero(i % 2), 1, 0)