Skip to content

Commit

Permalink
Adding flags to datamodules (#388)
Browse files Browse the repository at this point in the history
* Adding flags to datamodules

* Finishing up changes

* Fixing syntax error

* More syntax errors

* More

* Adding drop_last flag to sklearn test

* Adding drop_last flag to sklearn test

* Updating doc for reflect drop_last=False

* Adding flags to datamodules

* Finishing up changes

* Fixing syntax error

* More syntax errors

* More

* Adding drop_last flag to sklearn test

* Adding drop_last flag to sklearn test

* Updating doc for reflect drop_last=False

* Cleaning up parameters and docstring

* Fixing syntax error

* Fixing documentation

* Hardcoding shuffle=False for val and test
  • Loading branch information
briankosw committed Dec 16, 2020
1 parent 2948123 commit 7beb933
Show file tree
Hide file tree
Showing 12 changed files with 230 additions and 97 deletions.
29 changes: 20 additions & 9 deletions pl_bolts/datamodules/binary_mnist_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ def __init__(
val_split: int = 5000,
num_workers: int = 16,
normalize: bool = False,
seed: int = 42,
batch_size: int = 32,
seed: int = 42,
shuffle: bool = False,
pin_memory: bool = False,
drop_last: bool = False,
*args,
**kwargs,
):
Expand All @@ -60,6 +63,11 @@ def __init__(
num_workers: how many workers to use for loading data
normalize: If true applies image normalize
batch_size: size of batch
seed: random seed to be used for train/val/test splits
shuffle: If true shuffles the data every epoch
pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before
returning them
drop_last: If true drops the last incomplete batch
"""
super().__init__(*args, **kwargs)

Expand All @@ -73,8 +81,11 @@ def __init__(
self.val_split = val_split
self.num_workers = num_workers
self.normalize = normalize
self.seed = seed
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.pin_memory = pin_memory
self.drop_last = drop_last

@property
def num_classes(self):
Expand Down Expand Up @@ -107,10 +118,10 @@ def train_dataloader(self):
loader = DataLoader(
dataset_train,
batch_size=self.batch_size,
shuffle=True,
shuffle=self.shuffle,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -131,8 +142,8 @@ def val_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -148,8 +159,8 @@ def test_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand Down
25 changes: 18 additions & 7 deletions pl_bolts/datamodules/cifar10_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def __init__(
num_workers: int = 16,
batch_size: int = 32,
seed: int = 42,
shuffle: bool = False,
pin_memory: bool = False,
drop_last: bool = False,
*args,
**kwargs,
):
Expand All @@ -77,6 +80,11 @@ def __init__(
val_split: how many of the training images to use for the validation split
num_workers: how many workers to use for loading data
batch_size: number of examples per training/eval step
seed: random seed to be used for train/val/test splits
shuffle: If true shuffles the data every epoch
pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before
returning them
drop_last: If true drops the last incomplete batch
"""
super().__init__(*args, **kwargs)

Expand All @@ -91,6 +99,9 @@ def __init__(
self.num_workers = num_workers
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.pin_memory = pin_memory
self.drop_last = drop_last
self.data_dir = data_dir if data_dir is not None else os.getcwd()
self.num_samples = 50000 - val_split

Expand Down Expand Up @@ -125,10 +136,10 @@ def train_dataloader(self):
loader = DataLoader(
dataset_train,
batch_size=self.batch_size,
shuffle=True,
shuffle=self.shuffle,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -150,8 +161,8 @@ def val_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
pin_memory=True,
drop_last=True
pin_memory=self.pin_memory,
drop_last=self.drop_last
)
return loader

Expand All @@ -167,8 +178,8 @@ def test_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand Down
25 changes: 18 additions & 7 deletions pl_bolts/datamodules/cityscapes_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def __init__(
num_workers: int = 16,
batch_size: int = 32,
seed: int = 42,
shuffle: bool = False,
pin_memory: bool = False,
drop_last: bool = False,
*args,
**kwargs,
):
Expand All @@ -77,6 +80,11 @@ def __init__(
target_type: targets to use, either 'instance' or 'semantic'
num_workers: how many workers to use for loading data
batch_size: number of examples per training/eval step
seed: random seed to be used for train/val/test splits
shuffle: If true shuffles the data every epoch
pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before
returning them
drop_last: If true drops the last incomplete batch
"""
super().__init__(*args, **kwargs)

Expand All @@ -95,6 +103,9 @@ def __init__(
self.num_workers = num_workers
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.pin_memory = pin_memory
self.drop_last = drop_last
self.target_transforms = None

@property
Expand Down Expand Up @@ -123,10 +134,10 @@ def train_dataloader(self):
loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=True,
shuffle=self.shuffle,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -150,8 +161,8 @@ def val_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
pin_memory=True,
drop_last=True
pin_memory=self.pin_memory,
drop_last=self.drop_last
)
return loader

Expand All @@ -174,8 +185,8 @@ def test_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand Down
29 changes: 20 additions & 9 deletions pl_bolts/datamodules/fashion_mnist_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ def __init__(
data_dir: str,
val_split: int = 5000,
num_workers: int = 16,
seed: int = 42,
batch_size: int = 32,
seed: int = 42,
shuffle: bool = False,
pin_memory: bool = False,
drop_last: bool = False,
*args,
**kwargs,
):
Expand All @@ -59,6 +62,11 @@ def __init__(
val_split: how many of the training images to use for the validation split
num_workers: how many workers to use for loading data
batch_size: size of batch
seed: random seed to be used for train/val/test splits
shuffle: If true shuffles the data every epoch
pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before
returning them
drop_last: If true drops the last incomplete batch
"""
super().__init__(*args, **kwargs)

Expand All @@ -71,8 +79,11 @@ def __init__(
self.data_dir = data_dir
self.val_split = val_split
self.num_workers = num_workers
self.seed = seed
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.pin_memory = pin_memory
self.drop_last = drop_last

@property
def num_classes(self):
Expand Down Expand Up @@ -105,10 +116,10 @@ def train_dataloader(self):
loader = DataLoader(
dataset_train,
batch_size=self.batch_size,
shuffle=True,
shuffle=self.shuffle,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -130,8 +141,8 @@ def val_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -147,8 +158,8 @@ def test_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand Down
23 changes: 17 additions & 6 deletions pl_bolts/datamodules/imagenet_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def __init__(
image_size: int = 224,
num_workers: int = 16,
batch_size: int = 32,
shuffle: bool = False,
pin_memory: bool = False,
drop_last: bool = False,
*args,
**kwargs,
):
Expand All @@ -66,6 +69,10 @@ def __init__(
image_size: final image size
num_workers: how many data workers
batch_size: batch_size
shuffle: If true shuffles the data every epoch
pin_memory: If true, the data loader will copy Tensors into CUDA pinned memory before
returning them
drop_last: If true drops the last incomplete batch
"""
super().__init__(*args, **kwargs)

Expand All @@ -81,6 +88,9 @@ def __init__(
self.meta_dir = meta_dir
self.num_imgs_per_val_class = num_imgs_per_val_class
self.batch_size = batch_size
self.shuffle = shuffle
self.pin_memory = pin_memory
self.drop_last = drop_last
self.num_samples = 1281167 - self.num_imgs_per_val_class * self.num_classes

@property
Expand Down Expand Up @@ -143,10 +153,10 @@ def train_dataloader(self):
loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=True,
shuffle=self.shuffle,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -170,7 +180,8 @@ def val_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand All @@ -190,8 +201,8 @@ def test_dataloader(self):
batch_size=self.batch_size,
shuffle=False,
num_workers=self.num_workers,
drop_last=True,
pin_memory=True
drop_last=self.drop_last,
pin_memory=self.pin_memory
)
return loader

Expand Down

0 comments on commit 7beb933

Please sign in to comment.