Skip to content
This repository has been archived by the owner on Jul 4, 2023. It is now read-only.

Commit

Permalink
Update Utils Docs
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrochukM committed Mar 17, 2018
1 parent ef3877a commit d2fd3fb
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 21 deletions.
11 changes: 11 additions & 0 deletions build_tools/pypi/release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# NOTE: First, update the version number in `torchnlp.__init__.__version__``
# REFERENCE: https://packaging.python.org/tutorials/distributing-packages/

# Create a source distribution
python3.6 setup.py sdist

# Create a wheel for the project
python3.6 setup.py bdist_wheel

# Upload your distributions to PyPI using twine.
twine upload dist/*
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ sphinx-autobuild

# Auto Formatter
yapf

# Release to PyPi
wheel
59 changes: 38 additions & 21 deletions torchnlp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,35 @@
# TODO: Add back padding


def shuffle(dataset, random_seed=123):
""" Shuffle dataset deterministically based on random_seed. """
random.Random(random_seed).shuffle(dataset.rows)
def shuffle(list_, random_seed=123):
""" Shuffle list deterministically based on ``random_seed``.
Reference:
https://stackoverflow.com/questions/19306976/python-shuffling-with-a-parameter-to-get-the-same-result
Example:
>>> a = [1, 2, 3, 4, 5]
>>> b = [1, 2, 3, 4, 5]
>>> shuffle(a, random_seed=456)
>>> shuffle(b, random_seed=456)
>>> a == b
True
>>> a, b
([1, 3, 2, 5, 4], [1, 3, 2, 5, 4])
Args:
list_ (list): List to be shuffled.
random_seed (int): Random seed used to shuffle.
Returns:
None:
"""
random.Random(random_seed).shuffle(list_)


def resplit_datasets(dataset, other_dataset, random_seed=None, split=None):
"""Deterministic shuffle and split algorithm.
Given the same two datasets and the same `random_seed`, the split happens the same exact way
Given the same two datasets and the same ``random_seed``, the split happens the same exact way
every call.
Args:
Expand All @@ -28,17 +48,13 @@ def resplit_datasets(dataset, other_dataset, random_seed=None, split=None):
after split otherwise the original proportions are kept.
Returns:
dataset (lib.datasets.Dataset)
other_dataset (lib.datasets.Dataset)
:class:`lib.datasets.Dataset`, :class:`lib.datasets.Dataset`: Resplit datasets.
"""
# Prevent circular dependency
from torchnlp.datasets import Dataset

concat = dataset.rows + other_dataset.rows
# Reference:
# https://stackoverflow.com/questions/19306976/python-shuffling-with-a-parameter-to-get-the-same-result
# NOTE: Shuffle the same way every call of `shuffle_datasets` where the `random_seed` is given
random.Random(random_seed).shuffle(concat)
shuffle(concat, random_seed=random_seed)
if split is None:
return Dataset(concat[:len(dataset)]), Dataset(concat[len(dataset):])
else:
Expand All @@ -48,12 +64,12 @@ def resplit_datasets(dataset, other_dataset, random_seed=None, split=None):

def torch_equals_ignore_index(tensor, tensor_other, ignore_index=None):
"""
Compute torch.equals with the optional mask parameter.
Compute ``torch.equal`` with the optional mask parameter.
Args:
ignore_index (int, optional): specifies a tensor1 index that is ignored
ignore_index (int, optional): Specifies a ``tensor`` index that is ignored.
Returns:
(bool) iff target and prediction are equal
(bool) Returns ``True`` if target and prediction are equal.
"""
if ignore_index is not None:
assert tensor.size() == tensor_other.size()
Expand All @@ -65,18 +81,19 @@ def torch_equals_ignore_index(tensor, tensor_other, ignore_index=None):


def reporthook(t):
""" Shuffle dataset deterministically based on random_seed. """
"""https://github.com/tqdm/tqdm"""
""" ``reporthook`` to use with ``urllib.request`` that prints the process of the download.
Reference:
https://github.com/tqdm/tqdm
"""
last_b = [0]

def inner(b=1, bsize=1, tsize=None):
"""
b: int, optionala
Number of blocks just transferred [default: 1].
bsize: int, optional
Size of each block (in tqdm units) [default: 1].
tsize: int, optional
Total size (in tqdm units). If [default: None] remains unchanged.
Args:
b (int, optional): Number of blocks just transferred [default: 1].
bsize (int, optional): Size of each block (in tqdm units) [default: 1].
tsize (int, optional): Total size (in tqdm units). If [default: None] remains unchanged.
"""
if tsize is not None:
t.total = tsize
Expand Down

0 comments on commit d2fd3fb

Please sign in to comment.