Skip to content

Commit

Permalink
Dev flow.utils.data part3 (#5644)
Browse files Browse the repository at this point in the history
* add more datasets

* add more transform funcs

* export interface

* export datasets interface

* auto format by CI

* fix docs

* skip test

* support DistributedSampler

* refine

* add more transform function

* fix err import

* fix comment

* refine

* add more transform test

* refactor dataloader test

* refine

* add ddp test

* refine

* refine

* add ddp test case

* skil test

* add ddp test case

* add test case

* refine

* rm ddp test

* remove ddp test

* auto format by CI

* format

* update api docs

* add utils.rst

* auto format by CI

* fix ddp grad size

Signed-off-by: daquexian <daquexian566@gmail.com>

* remove print

Signed-off-by: daquexian <daquexian566@gmail.com>

* refine as comments

* refine

* auto format by CI

* auto format by CI

* refine

* add ddp test

* auto format by CI

* rm test case

* fix reshape

Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
Co-authored-by: daquexian <daquexian566@gmail.com>
Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
  • Loading branch information
4 people committed Aug 13, 2021
1 parent 3afdd83 commit 2ad6321
Show file tree
Hide file tree
Showing 25 changed files with 3,576 additions and 173 deletions.
2 changes: 2 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ OneFlow API Reference
linalg
image
optim
utils



Indices and tables
Expand Down
62 changes: 62 additions & 0 deletions docs/source/utils.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
oneflow.utils
===================================
Utils
----------------------------------
.. currentmodule:: oneflow.utils
.. automodule:: oneflow.utils.data
:members: DataLoader,
Dataset,
IterableDataset,
TensorDataset,
ConcatDataset,
Subset,
random_split,
Sampler,
SequentialSampler,
RandomSampler,
SubsetRandomSampler,
BatchSampler

.. currentmodule:: oneflow.utils
.. automodule:: oneflow.utils.data.distributed
:members: DistributedSampler

.. currentmodule:: oneflow.utils
.. automodule:: oneflow.utils.vision.datasets
:members: MNIST,
FashionMNIST,
CIFAR10,
CIFAR100,
ImageNet,
CocoCaptions,
CocoDetection,
VOCDetection,
VOCSegmentation,
DatasetFolder,
ImageFolder

.. currentmodule:: oneflow.utils
.. automodule:: oneflow.utils.vision.transforms
:members: Compose,
ToTensor,
PILToTensor,
ConvertImageDtype,
ToPILImage,
Normalize,
Resize,
Scale,
CenterCrop,
Pad,
Lambda,
RandomTransforms,
RandomApply,
RandomOrder,
RandomChoice,
RandomCrop,
RandomHorizontalFlip,
RandomVerticalFlip,
RandomResizedCrop,
RandomSizedCrop,
FiveCrop,
TenCrop,
InterpolationMode
148 changes: 148 additions & 0 deletions python/oneflow/test/dataloader/data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import oneflow as flow
import oneflow.utils.vision.transforms as transforms


def load_data_cifar10(
batch_size,
data_dir="./data-test/cifar10",
download=True,
transform=None,
source_url=None,
num_workers=0,
):
cifar10_train = flow.utils.vision.datasets.CIFAR10(
root=data_dir,
train=True,
download=download,
transform=transform,
source_url=source_url,
)
cifar10_test = flow.utils.vision.datasets.CIFAR10(
root=data_dir,
train=False,
download=download,
transform=transform,
source_url=source_url,
)

train_iter = flow.utils.data.DataLoader(
cifar10_train, batch_size=batch_size, shuffle=True, num_workers=num_workers
)
test_iter = flow.utils.data.DataLoader(
cifar10_test, batch_size=batch_size, shuffle=False, num_workers=num_workers
)
return train_iter, test_iter


def load_data_mnist(
batch_size, resize=None, root="./data/mnist", download=True, source_url=None
):
"""Download the MNIST dataset and then load into memory."""
root = os.path.expanduser(root)
transformer = []
if resize:
transformer += [transforms.Resize(resize)]
transformer += [transforms.ToTensor()]
transformer = transforms.Compose(transformer)

mnist_train = flow.utils.vision.datasets.MNIST(
root=root,
train=True,
transform=transformer,
download=download,
source_url=source_url,
)
mnist_test = flow.utils.vision.datasets.MNIST(
root=root,
train=False,
transform=transformer,
download=download,
source_url=source_url,
)
train_iter = flow.utils.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_iter = flow.utils.data.DataLoader(mnist_test, batch_size, shuffle=False)
return train_iter, test_iter


def get_fashion_mnist_dataset(
resize=None, root="./data-test/fashion-mnist", download=True, source_url=None,
):
root = os.path.expanduser(root)
trans = []
if resize:
trans.append(transforms.Resize(resize))
trans.append(transforms.ToTensor())
transform = transforms.Compose(trans)

mnist_train = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=True,
transform=transform,
download=download,
source_url=source_url,
)
mnist_test = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=False,
transform=transform,
download=download,
source_url=source_url,
)
return mnist_train, mnist_test


# reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch
def load_data_fashion_mnist(
batch_size,
resize=None,
root="./data-test/fashion-mnist",
download=True,
source_url=None,
num_workers=0,
):
"""Download the Fashion-MNIST dataset and then load into memory."""
root = os.path.expanduser(root)
trans = []
if resize:
trans.append(transforms.Resize(resize))
trans.append(transforms.ToTensor())
transform = transforms.Compose(trans)

mnist_train = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=True,
transform=transform,
download=download,
source_url=source_url,
)
mnist_test = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=False,
transform=transform,
download=download,
source_url=source_url,
)

train_iter = flow.utils.data.DataLoader(
mnist_train, batch_size, shuffle=True, num_workers=num_workers
)
test_iter = flow.utils.data.DataLoader(
mnist_test, batch_size, shuffle=False, num_workers=num_workers
)
return train_iter, test_iter
20 changes: 6 additions & 14 deletions python/oneflow/test/dataloader/test_cifar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import oneflow as flow
import oneflow.nn as nn
import oneflow.optim as optim
from data_utils import load_data_cifar10


classes = (
Expand Down Expand Up @@ -81,21 +82,19 @@ def test(test_case):
os.getenv("ONEFLOW_TEST_CACHE_DIR", "./data-test"), "cifar10"
)

trainset = flow.utils.vision.datasets.CIFAR10(
root=data_dir,
train=True,
train_iter, test_iter = load_data_cifar10(
batch_size=batch_size,
data_dir=data_dir,
download=True,
transform=transform,
source_url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/cifar/cifar-10-python.tar.gz",
)
trainloader = flow.utils.data.DataLoader(
trainset, batch_size=batch_size, shuffle=False, num_workers=0
num_workers=0,
)

final_loss = 0
for epoch in range(1, train_epoch + 1): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 1):
for i, data in enumerate(train_iter, 1):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
inputs = inputs.to(dtype=flow.float32, device=device)
Expand Down Expand Up @@ -130,10 +129,3 @@ def test_cifar_dataset(test_case):

if __name__ == "__main__":
unittest.main()
# 1 epoch training log
# epoch: 1 step: 2000 loss: 2.107
# epoch: 1 step: 4000 loss: 1.838
# epoch: 1 step: 6000 loss: 1.644
# epoch: 1 step: 8000 loss: 1.535
# epoch: 1 step: 10000 loss: 1.528
# epoch: 1 step: 12000 loss: 1.476
42 changes: 2 additions & 40 deletions python/oneflow/test/dataloader/test_fashion_mnist_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,42 +20,7 @@
import oneflow.unittest
import oneflow as flow
import oneflow.nn as nn


# reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.10_mlp-pytorch
def load_data_fashion_mnist(
batch_size, resize=None, root="./data/fashion-mnist", download=True, source_url=None
):
"""Download the Fashion-MNIST dataset and then load into memory."""
root = os.path.expanduser(root)
transformer = []
if resize:
transformer += [flow.utils.vision.transforms.Resize(resize)]
transformer += [flow.utils.vision.transforms.ToTensor()]
transformer = flow.utils.vision.transforms.Compose(transformer)

mnist_train = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=True,
transform=transformer,
download=download,
source_url=source_url,
)
mnist_test = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=False,
transform=transformer,
download=download,
source_url=source_url,
)
num_workers = 0
train_iter = flow.utils.data.DataLoader(
mnist_train, batch_size, shuffle=True, num_workers=num_workers
)
test_iter = flow.utils.data.DataLoader(
mnist_test, batch_size, shuffle=False, num_workers=num_workers
)
return train_iter, test_iter
from data_utils import load_data_fashion_mnist


def get_fashion_mnist_labels(labels):
Expand Down Expand Up @@ -124,7 +89,7 @@ def test(test_case):
)
source_url = "https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/mnist/Fashion-MNIST/"
train_iter, test_iter = load_data_fashion_mnist(
batch_size, root=data_dir, download=True, source_url=source_url
batch_size, resize=None, root=data_dir, download=True, source_url=source_url
)
loss = nn.CrossEntropyLoss()
loss.to(device)
Expand Down Expand Up @@ -174,6 +139,3 @@ def test_fashion_mnist_dataset(test_case):

if __name__ == "__main__":
unittest.main()
# 1 epoch training log
# epoch 1, loss 0.0034, train acc 0.718, test acc 0.771, cost >>>>>>> 158.32699990272522(s)
# epoch 2, loss 0.0022, train acc 0.807, test acc 0.726, cost >>>>>>> 159.64465260505676(s)
46 changes: 1 addition & 45 deletions python/oneflow/test/dataloader/test_lenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import oneflow as flow
import oneflow.nn as nn
import oneflow.unittest
from data_utils import load_data_fashion_mnist


# reference: http://tangshusen.me/Dive-into-DL-PyTorch/#/chapter05_CNN/5.5_lenet
Expand Down Expand Up @@ -49,46 +50,6 @@ def forward(self, img):
return output


def load_data_fashion_mnist(
batch_size,
resize=None,
root="./data-test/fashion-mnist",
download=True,
source_url=None,
num_workers=0,
):
"""Download the Fashion-MNIST dataset and then load into memory."""
root = os.path.expanduser(root)
trans = []
if resize:
trans.append(flow.utils.vision.transforms.Resize(resize))
trans.append(flow.utils.vision.transforms.ToTensor())
transform = flow.utils.vision.transforms.Compose(trans)

mnist_train = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=True,
transform=transform,
download=download,
source_url=source_url,
)
mnist_test = flow.utils.vision.datasets.FashionMNIST(
root=root,
train=False,
transform=transform,
download=download,
source_url=source_url,
)

train_iter = flow.utils.data.DataLoader(
mnist_train, batch_size, shuffle=True, num_workers=num_workers
)
test_iter = flow.utils.data.DataLoader(
mnist_test, batch_size, shuffle=False, num_workers=num_workers
)
return train_iter, test_iter


def evaluate_accuracy(data_iter, net, device=None):
if device is None and isinstance(net, nn.Module):
device = list(net.parameters())[0].device
Expand Down Expand Up @@ -176,8 +137,3 @@ def test_lenet(test_case):

if __name__ == "__main__":
unittest.main()
# 1 epoch training log
# epoch 1, loss 1.1473, train acc 0.569, test acc 0.742, time 162.4 sec
# epoch 2, loss 0.5736, train acc 0.784, test acc 0.796, time 158.1 sec
# epoch 3, loss 0.4761, train acc 0.826, test acc 0.821, time 154.0 sec
# epoch 4, loss 0.4215, train acc 0.848, test acc 0.855, time 160.3 sec

0 comments on commit 2ad6321

Please sign in to comment.