Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ImageNet #146

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7852e48
Add Images and ImageMagick deps using LazyModules
adrhill Jun 23, 2022
20be9a6
Add Image preprocessing script
adrhill Jun 23, 2022
59afe92
Add ImageNet dataset
adrhill Jun 23, 2022
1f4dfaf
Rename ImageNetReader file to match struct name
adrhill Jun 23, 2022
dfdeaa5
Formatting fixes
adrhill Jun 23, 2022
d2ded7e
Remove lowpass on image before resizing
adrhill Jun 23, 2022
4809296
Use `FileDataset` and replace ImageMagick with JpegTurbo
adrhill Jun 23, 2022
cac14d2
Add missing reference URL to comment
adrhill Jun 23, 2022
1e850ba
Remove use of `imresize`
adrhill Jun 23, 2022
2aa0170
Replace Images dependency by ImageCore
adrhill Jun 23, 2022
06ad214
Use StackViews.jl for batching
adrhill Jun 23, 2022
3097302
Load ImageCore and StackViews non-lazily
adrhill Jun 24, 2022
02e966d
Bake `Tx` into FileDataset's `loadfn`
adrhill Jun 24, 2022
9fb811c
Fix indexing bug in `center_crop_view`
adrhill Jun 24, 2022
4e0e8d4
Move installation guide into separate markdown file
adrhill Jul 8, 2022
0daca90
Include feedback from code review
adrhill Jul 8, 2022
09feb3d
Support custom preprocessing functions
adrhill Feb 2, 2023
df14fea
Sort classes by WordNet ID
adrhill Feb 2, 2023
c92ae00
Update docstring
adrhill Feb 2, 2023
8637ebe
Merge branch 'master' into ah/imagenet
adrhill Feb 2, 2023
944bd83
Update docstrings
adrhill Feb 2, 2023
fe38d43
Remove StackViews dependency
adrhill Feb 7, 2023
6af86c6
Remove normalization constants
adrhill Feb 7, 2023
95b13d9
Add more metadata
adrhill Feb 7, 2023
09d5be4
Add `img_size` argument
adrhill Feb 8, 2023
ae1929d
Format to SciML code style, matching #205
adrhill Feb 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,19 @@ FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
LazyModules = "8cdb02fc-e678-4876-92c5-9defec4f444e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StackViews = "cae243ae-269e-4f55-b966-ac2d0dc13c15"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
Expand All @@ -33,15 +36,18 @@ FixedPointNumbers = "0.8"
GZip = "0.5"
Glob = "1.3"
HDF5 = "0.16.2"
ImageCore = "0.9"
ImageShow = "0.3"
JLD2 = "0.4.21"
JSON3 = "1"
JpegTurbo = "0.1"
LazyModules = "0.3"
MAT = "0.10"
MLUtils = "0.2.0"
NPZ = "0.4.1"
Pickle = "0.3"
Requires = "1"
StackViews = "0.1"
Tables = "1.6"
julia = "1.6"

Expand Down
1 change: 1 addition & 0 deletions docs/src/datasets/vision.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ convert2image
```@docs
CIFAR10
CIFAR100
ImageNet
adrhill marked this conversation as resolved.
Show resolved Hide resolved
EMNIST
FashionMNIST
MNIST
Expand Down
7 changes: 7 additions & 0 deletions src/MLDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ using MLUtils: getobs, numobs, AbstractDataContainer
using Glob
using DelimitedFiles: readdlm
using FileIO
using StackViews: StackView
using LazyModules: @lazy

include("require.jl") # export @require
Expand All @@ -29,6 +30,8 @@ import CSV
@lazy import HDF5="f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
# @lazy import JLD2

@lazy import JpegTurbo="b835a17e-a41a-41e7-81f0-2f016b05efe0" # Required for ImageNet

export getobs, numobs # From MLUtils.jl

include("abstract_datasets.jl")
Expand Down Expand Up @@ -86,6 +89,9 @@ include("datasets/vision/cifar100.jl")
export CIFAR100
include("datasets/vision/svhn2.jl")
export SVHN2
include("datasets/vision/imagenet_reader/ImageNetReader.jl")
include("datasets/vision/imagenet.jl")
export ImageNet

## Text

Expand Down Expand Up @@ -147,6 +153,7 @@ function __init__()
__init__fashionmnist()
__init__mnist()
__init__svhn2()
__init__imagenet()
adrhill marked this conversation as resolved.
Show resolved Hide resolved
end

end #module
198 changes: 198 additions & 0 deletions src/datasets/vision/imagenet.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
const IMAGENET_WEBSITE = "https://image-net.org/"

function __init__imagenet()
DEPNAME = "ImageNet"
return register(
ManualDataDep(
DEPNAME,
# TODO: currently markdown formatting is not applied
"""
The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at
$IMAGENET_WEBSITE after signing up and accepting the terms of access.
It is therefore required that you download this dataset manually.

## Existing installation
The dataset structure is assumed to look as follows:
```
ImageNet
├── train
├── val
│ ├── n01440764
│ │ ├── ILSVRC2012_val_00000293.JPEG
│ │ ├── ILSVRC2012_val_00002138.JPEG
│ │ └── ...
│ ├── n01443537
│ └── ...
├── test
└── devkit
├── data
│ ├── meta.mat
│ └── ...
└── ...
```
If your existing copy of the ImageNet dataset uses another file structure,
CarloLucibello marked this conversation as resolved.
Show resolved Hide resolved
we recommend to create symbolic links, e.g. using `ln` on Unix-like operating
systems:
```bash
cd ~/.julia/datadeps
mkdir -p ImageNet/val
ln -s my/path/to/imagenet/val ImageNet/val
mkdir -p ImageNet/devkit/data
ln -s my/path/to/imagenet/devkit/data ImageNet/devkit/data
```

## New installation
Download the following files from the ImageNet website ($IMAGENET_WEBSITE):
* `ILSVRC2012_devkit_t12`
* `ILSVRC2012_img_train.tar`, only required for `:train` split
* `ILSVRC2012_img_val.tar`, only required for `:val` split

After downloading the data, move and extract the training and validation images to
labeled subfolders running the following shell script:
```bash
# Extract the training data:
mkdir -p ImageNet/train && tar -xvf ILSVRC2012_img_train.tar -C ImageNet/train
# Unpack all 1000 compressed tar-files, one for each category:
cd ImageNet/train
find . -name "*.tar" | while read NAME ; do mkdir -p "\${NAME%.tar}"; tar -xvf "\${NAME}" -C "\${NAME%.tar}"; rm -f "\${NAME}"; done

# Extract the validation data:
cd ../..
mkdir -p ImageNet/val && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/val

# Run script from soumith to create all class directories and moves images into corresponding directories:
cd ImageNet/val
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash

# Extract metadata from the devkit:
cd ../..
mkdir -p ImageNet/devkit && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/devkit
```
""",
# shell script based on PyTorch example "ImageNet training in PyTorch":
# https://github.com/pytorch/examples/blob/d5478765d38210addf474dd73faf0d103052027a/imagenet/extract_ILSVRC.sh
),
)
end

"""
ImageNet(; Tx=Float32, split=:train, dir=nothing)
ImageNet([Tx, split])
adrhill marked this conversation as resolved.
Show resolved Hide resolved

The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017).
This is the most highly-used subset of ImageNet. It spans 1000 object classes and contains
1,281,167 training images, 50,000 validation images and 100,000 test images.
Each image is in 224x224x3 format using RGB color space.

- Authors: Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh,
Sean Ma, Zhiheng Huang, Andrej Karpathy, Aditya Khosla, Michael Bernstein,
Alexander C. Berg, Li Fei-Fei
- Website: $IMAGENET_WEBSITE
- Reference: Russakovsky et al., ImageNet Large Scale Visual Recognition Challenge
adrhill marked this conversation as resolved.
Show resolved Hide resolved
(https://arxiv.org/abs/1409.0575)

# Arguments

$ARGUMENTS_SUPERVISED_ARRAY
- `split`: selects the data partition. Can take the values `:train:` or `:test`.

# Fields

$FIELDS_SUPERVISED_ARRAY
- `split`.

# Methods

$METHODS_SUPERVISED_ARRAY
- [`convert2image`](@ref) converts features to `RGB` images.

# Examples

```julia-repl
julia> using MLDatasets: ImageNet

julia> dataset = ImageNet(:val);

julia> dataset[1:5].targets
5-element Vector{Int64}:
1
1
1
1
1

julia> X, y = dataset[1:5];

julia> size(X)
(224, 224, 3, 5)

julia> dataset.metadata
Dict{String, Any} with 4 entries:
"class_WNIDs" => ["n02119789", "n02100735", "n02110185", "n02096294", "n02102040", "n02066245", "n02509815", "n02124075", "n02417914", "n02123394" … "n02815834", "n09229709", "n07697313", "n03888605", "n03355925", "n03…
"class_description" => ["small grey fox of southwestern United States; may be a subspecies of Vulpes velox", "an English breed having a plumed tail and a soft silky coat that is chiefly white", "breed of sled dog developed in …
"class_names" => Vector{SubString{String}}[["kit fox", "Vulpes macrotis"], ["English setter"], ["Siberian husky"], ["Australian terrier"], ["English springer", "English springer spaniel"], ["grey whale", "gray whale", "d…
"wnid_to_label" => Dict("n07693725"=>768, "n03775546"=>829, "n01689811"=>469, "n02100877"=>192, "n02441942"=>48, "n04371774"=>569, "n07717410"=>741, "n03347037"=>919, "n04355338"=>526, "n02097474"=>158…)
```
"""
struct ImageNet <: SupervisedDataset
metadata::Dict{String,Any}
split::Symbol
dataset::FileDataset
targets::Vector{Int}
end

ImageNet(; split=:train, Tx=Float32, dir=nothing) = ImageNet(Tx, split; dir)
ImageNet(split::Symbol; kws...) = ImageNet(; split, kws...)
ImageNet(Tx::Type; kws...) = ImageNet(; Tx, kws...)

function ImageNet(
Tx::Type,
split::Symbol;
dir=nothing,
train_dir="train",
val_dir="val",
test_dir="test",
devkit_dir="devkit",
)
@assert split ∈ (:train, :val, :test)

DEPNAME = "ImageNet"
METADATA_FILENAME = joinpath(devkit_dir, "data", "meta.mat")

TRAINSET_SIZE = 1_281_167
VALSET_SIZE = 50_000
TESTSET_SIZE = 100_000

# Load metadata
file_path = datafile(DEPNAME, METADATA_FILENAME, dir)
metadata = ImageNetReader.read_metadata(file_path)

root_dir = @datadep_str DEPNAME
if split == :train
dataset = ImageNetReader.readdata(Tx, joinpath(root_dir, train_dir))
@assert length(dataset) == TRAINSET_SIZE
elseif split == :val
dataset = ImageNetReader.readdata(Tx, joinpath(root_dir, val_dir))
@assert length(dataset) == VALSET_SIZE
else
dataset = ImageNetReader.readdata(Tx, joinpath(root_dir, test_dir))
@assert length(dataset) == TESTSET_SIZE
end
targets = [
metadata["wnid_to_label"][wnid] for wnid in ImageNetReader.load_wnids(dataset)
]
return ImageNet(metadata, split, dataset, targets)
end

convert2image(::Type{<:ImageNet}, x) = ImageNetReader.inverse_preprocess(x)

Base.length(d::ImageNet) = length(d.dataset)

const IMAGENET_MEM_WARNING = """Loading the entire ImageNet dataset into memory might not be possible.
If you are sure you want to load all of ImageNet, index the dataset with `[1:end]` instead of `[:]`.
adrhill marked this conversation as resolved.
Show resolved Hide resolved
"""
Base.getindex(::ImageNet, ::Colon) = throw(ArgumentError(IMAGENET_MEM_WARNING))
Base.getindex(d::ImageNet, i::Integer) = (features=d.dataset[i], targets=d.targets[i])
function Base.getindex(d::ImageNet, is::AbstractVector)
return (features=StackView(d.dataset[is]), targets=d.targets[is])
end
adrhill marked this conversation as resolved.
Show resolved Hide resolved
47 changes: 47 additions & 0 deletions src/datasets/vision/imagenet_reader/ImageNetReader.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
module ImageNetReader
using ImageCore: channelview, colorview, AbstractRGB, RGB

import ..FileDataset
import ..read_mat
import ..@lazy

@lazy import JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
CarloLucibello marked this conversation as resolved.
Show resolved Hide resolved

const NCLASSES = 1000
const IMGSIZE = (224, 224)
adrhill marked this conversation as resolved.
Show resolved Hide resolved

include("preprocess.jl")

function read_metadata(file::AbstractString)
meta = read_mat(file)["synsets"]
is_child = iszero.(meta["num_children"])
@assert meta["ILSVRC2012_ID"][is_child] == 1:NCLASSES

metadata = Dict{String,Any}()
metadata["class_WNIDs"] = Vector{String}(meta["WNID"][is_child]) # WordNet IDs
metadata["class_names"] = split.(meta["words"][is_child], ", ")
metadata["class_description"] = Vector{String}(meta["gloss"][is_child])
metadata["wnid_to_label"] = Dict(metadata["class_WNIDs"] .=> 1:NCLASSES)
return metadata
end

# The full ImageNet dataset doesn't fit into memory, so we only save filenames
function readdata(Tx::Type{<:Real}, dir::AbstractString)
return FileDataset(image_loader(Tx), dir, "*.JPEG")
end

# Get WordNet ID from path
load_wnids(d::FileDataset) = load_wnids(d.paths)
load_wnids(fs::AbstractVector{<:AbstractString}) = [split(f, "/")[end - 1] for f in fs]

# Construct a function that loads images from FileDataset path
# and preprocess it to normalized 224x224x3 Array{Tx,3}
function image_loader(Tx::Type{<:Real})
function load_image(file::AbstractString)::AbstractArray{Tx,3}
im = JpegTurbo.jpeg_decode(RGB{Tx}, file; preferred_size=IMGSIZE)
return preprocess(Tx, im)
end
return load_image
end

end # module
31 changes: 31 additions & 0 deletions src/datasets/vision/imagenet_reader/preprocess.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Image preprocessing for ImageNet models.
# Code adapted from Metalhead 0.5.3's utils.jl

# Take rectangle of pixels of shape `outsize` at the center of image `im`
adjust(i::Integer) = ifelse(iszero(i % 2), 1, 0)
function center_crop_view(im::AbstractMatrix, outsize=IMGSIZE)
h2, w2 = div.(outsize, 2) # half height, half width of view
h_adjust, w_adjust = adjust.(outsize)
return @view im[
((div(end, 2) - h2):(div(end, 2) + h2 - h_adjust)) .+ 1,
((div(end, 2) - w2):(div(end, 2) + w2 - w_adjust)) .+ 1,
]
end

# Coefficients taken from PyTorch's ImageNet normalization code
const PYTORCH_MEAN = [0.485f0, 0.456f0, 0.406f0]
const PYTORCH_STD = [0.229f0, 0.224f0, 0.225f0]

function preprocess(Tx::Type, im::AbstractMatrix{<:AbstractRGB})
im = center_crop_view(im)
im = (channelview(im) .- PYTORCH_MEAN) ./ PYTORCH_STD
adrhill marked this conversation as resolved.
Show resolved Hide resolved
# Convert from CHW (Image.jl's channel ordering) to WHC:
return Tx.(PermutedDimsArray(im, (3, 2, 1)))
adrhill marked this conversation as resolved.
Show resolved Hide resolved
end

function inverse_preprocess(x::AbstractArray{T,N}) where {T,N}
@assert N == 3 || N == 4
return colorview(
RGB, PermutedDimsArray(x, (3, 2, 1, 4:N...)) .* PYTORCH_STD .+ PYTORCH_MEAN
)
end