Skip to content

Commit

Permalink
MANIFEST.in and setup.py clean-up (#7614)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca committed Nov 19, 2021
1 parent 94390ab commit 3d2d0f2
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 86 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/ci_pkg-install.yml
Expand Up @@ -26,12 +26,11 @@ jobs:

- name: Prepare env
run: |
pip install check-manifest "twine==3.2" setuptools wheel
pip install "twine==3.2" setuptools wheel
- name: Create package
run: |
check-manifest
# python setup.py check --metadata --strict
python setup.py check --metadata --strict
python setup.py sdist bdist_wheel
- name: Check package
Expand Down
67 changes: 3 additions & 64 deletions MANIFEST.in
Expand Up @@ -11,69 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Manifest syntax https://docs.python.org/2/distutils/sourcedist.html
graft wheelhouse

recursive-exclude __pycache__ *.py[cod] *.orig

# Include the README and CHANGELOG
include *.md

# Include the license file
include LICENSE

# Include the citation info
include *.cff

exclude *.sh
exclude *.svg
recursive-include pytorch_lightning *.py

# Include marker file for PEP 561
include pytorch_lightning/py.typed

# include examples
recursive-include pl_examples *.py *.md *.sh *.txt *.toml

# exclude tests from package
recursive-exclude tests *
recursive-exclude site *
exclude tests

# Exclude the documentation files
recursive-exclude docs *
exclude docs
recursive-include docs/source/_static/images/logos/ *
recursive-include docs/source/_static/images/general/ pl_overview* tf_* tutorial_* PTL101_*

# Include the Requirements
include pytorch_lightning/py.typed # marker file for PEP 561
include CHANGELOG.md
recursive-include requirements *.txt
recursive-exclude requirements *.sh *.py
include requirements.txt
include pyproject.toml

# Exclude build configs
exclude *.yml
exclude *.yaml
exclude *.toml
exclude *.jsonnet

# Exclude pyright config
exclude .pyrightconfig.json

# Exclude submodules
exclude .gitmodules
exclude _notebooks

# Exclude Makefile
exclude Makefile

prune .git
prune .github
prune .circleci
prune temp*
prune test*
prune benchmark*
prune dockers
prune legacy
include *.cff # citation info
102 changes: 99 additions & 3 deletions pl_examples/basic_examples/mnist_datamodule.py
Expand Up @@ -11,13 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import platform
from typing import Optional
import random
import time
import urllib
from typing import Optional, Tuple
from urllib.error import HTTPError
from warnings import warn

from torch.utils.data import DataLoader, random_split
import torch
from torch.utils.data import DataLoader, Dataset, random_split

from pl_examples import _DATASETS_PATH
from pytorch_lightning import LightningDataModule
Expand All @@ -27,6 +32,97 @@
from torchvision import transforms as transform_lib


class _MNIST(Dataset):
"""Carbon copy of ``tests.helpers.datasets.MNIST``.
We cannot import the tests as they are not distributed with the package.
See https://github.com/PyTorchLightning/pytorch-lightning/pull/7614#discussion_r671183652 for more context.
"""

RESOURCES = (
"https://pl-public-data.s3.amazonaws.com/MNIST/processed/training.pt",
"https://pl-public-data.s3.amazonaws.com/MNIST/processed/test.pt",
)

TRAIN_FILE_NAME = "training.pt"
TEST_FILE_NAME = "test.pt"
cache_folder_name = "complete"

def __init__(
self, root: str, train: bool = True, normalize: tuple = (0.1307, 0.3081), download: bool = True, **kwargs
):
super().__init__()
self.root = root
self.train = train # training set or test set
self.normalize = normalize

self.prepare_data(download)

data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME
self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file))

def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
img = self.data[idx].float().unsqueeze(0)
target = int(self.targets[idx])

if self.normalize is not None and len(self.normalize) == 2:
img = self.normalize_tensor(img, *self.normalize)

return img, target

def __len__(self) -> int:
return len(self.data)

@property
def cached_folder_path(self) -> str:
return os.path.join(self.root, "MNIST", self.cache_folder_name)

def _check_exists(self, data_folder: str) -> bool:
existing = True
for fname in (self.TRAIN_FILE_NAME, self.TEST_FILE_NAME):
existing = existing and os.path.isfile(os.path.join(data_folder, fname))
return existing

def prepare_data(self, download: bool = True):
if download and not self._check_exists(self.cached_folder_path):
self._download(self.cached_folder_path)
if not self._check_exists(self.cached_folder_path):
raise RuntimeError("Dataset not found.")

def _download(self, data_folder: str) -> None:
os.makedirs(data_folder, exist_ok=True)
for url in self.RESOURCES:
logging.info(f"Downloading {url}")
fpath = os.path.join(data_folder, os.path.basename(url))
urllib.request.urlretrieve(url, fpath)

@staticmethod
def _try_load(path_data, trials: int = 30, delta: float = 1.0):
"""Resolving loading from the same time from multiple concurrent processes."""
res, exception = None, None
assert trials, "at least some trial has to be set"
assert os.path.isfile(path_data), f"missing file: {path_data}"
for _ in range(trials):
try:
res = torch.load(path_data)
# todo: specify the possible exception
except Exception as e:
exception = e
time.sleep(delta * random.random())
else:
break
if exception is not None:
# raise the caught exception
raise exception
return res

@staticmethod
def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor:
mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
return tensor.sub(mean).div(std)


def MNIST(*args, **kwargs):
torchvision_mnist_available = not bool(os.getenv("PL_USE_MOCKED_MNIST", False))
if torchvision_mnist_available:
Expand All @@ -39,7 +135,7 @@ def MNIST(*args, **kwargs):
torchvision_mnist_available = False
if not torchvision_mnist_available:
print("`torchvision.datasets.MNIST` not available. Using our hosted version")
from tests.helpers.datasets import MNIST
MNIST = _MNIST
return MNIST(*args, **kwargs)


Expand Down
1 change: 1 addition & 0 deletions pl_examples/run_examples.sh
@@ -1,6 +1,7 @@
#!/bin/bash
set -ex

export PYTHONPATH="${PYTHONPATH}:$(pwd)"
dir_path=$(dirname "${BASH_SOURCE[0]}")
args="
--data.batch_size=32
Expand Down
6 changes: 4 additions & 2 deletions pl_examples/test_examples.py
Expand Up @@ -14,9 +14,10 @@
from unittest import mock

import pytest
import torch

from pl_examples import _DALI_AVAILABLE
from tests.helpers.runif import RunIf
from pytorch_lightning.utilities.imports import _IS_WINDOWS

ARGS_DEFAULT = (
"--trainer.default_root_dir %(tmpdir)s "
Expand All @@ -31,7 +32,8 @@


@pytest.mark.skipif(not _DALI_AVAILABLE, reason="Nvidia DALI required")
@RunIf(min_gpus=1, skip_windows=True)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
@pytest.mark.skipif(_IS_WINDOWS, reason="Not supported on Windows")
@pytest.mark.parametrize("cli_args", [ARGS_GPU])
def test_examples_mnist_dali(tmpdir, cli_args):
from pl_examples.integration_examples.dali_image_classifier import cli_main
Expand Down
1 change: 0 additions & 1 deletion requirements/test.txt
Expand Up @@ -2,7 +2,6 @@ coverage>5.2.0
codecov>=2.1
pytest>=6.0
pytest-rerunfailures>=10.2
check-manifest
twine==3.2
mypy>=0.900
flake8>=3.9.2
Expand Down
8 changes: 0 additions & 8 deletions setup.cfg
Expand Up @@ -73,14 +73,6 @@ ignore =
W503 # Ignore "Line break occurred before a binary operator"
E203 # Ignore "whitespace before ':'"

# setup.cfg or tox.ini
[check-manifest]
ignore =
*.yml
.github
.github/*
.circleci


[metadata]
license_file = LICENSE
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -74,10 +74,10 @@ def _load_py_module(fname, pkg="pytorch_lightning"):
url=about.__homepage__,
download_url="https://github.com/PyTorchLightning/pytorch-lightning",
license=about.__license__,
packages=find_packages(exclude=["tests", "tests/*", "benchmarks", "legacy", "legacy/*"]),
packages=find_packages(exclude=["tests*", "pl_examples*", "legacy*"]),
include_package_data=True,
long_description=long_description,
long_description_content_type="text/markdown",
include_package_data=True,
zip_safe=False,
keywords=["deep learning", "pytorch", "AI"],
python_requires=">=3.6",
Expand Down
5 changes: 2 additions & 3 deletions tests/helpers/datasets.py
Expand Up @@ -19,7 +19,6 @@
from typing import Optional, Sequence, Tuple

import torch
from torch import Tensor
from torch.utils.data import Dataset


Expand Down Expand Up @@ -70,7 +69,7 @@ def __init__(
data_file = self.TRAIN_FILE_NAME if self.train else self.TEST_FILE_NAME
self.data, self.targets = self._try_load(os.path.join(self.cached_folder_path, data_file))

def __getitem__(self, idx: int) -> Tuple[Tensor, int]:
def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
img = self.data[idx].float().unsqueeze(0)
target = int(self.targets[idx])

Expand Down Expand Up @@ -126,7 +125,7 @@ def _try_load(path_data, trials: int = 30, delta: float = 1.0):
return res

@staticmethod
def normalize_tensor(tensor: Tensor, mean: float = 0.0, std: float = 1.0) -> Tensor:
def normalize_tensor(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor:
mean = torch.as_tensor(mean, dtype=tensor.dtype, device=tensor.device)
std = torch.as_tensor(std, dtype=tensor.dtype, device=tensor.device)
return tensor.sub(mean).div(std)
Expand Down
1 change: 1 addition & 0 deletions tests/special_tests.sh
Expand Up @@ -81,6 +81,7 @@ fi
# report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n"

# test that a user can manually launch individual processes
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
args="--trainer.gpus 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1"
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args} &
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python pl_examples/basic_examples/mnist_examples/image_classifier_5_lightning_datamodule.py ${args}
Expand Down

0 comments on commit 3d2d0f2

Please sign in to comment.