Skip to content

Commit

Permalink
fix: cleaning up unn. type ignores
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Dec 26, 2023
1 parent f4ffc6e commit 0e4b222
Show file tree
Hide file tree
Showing 11 changed files with 145 additions and 114 deletions.
10 changes: 6 additions & 4 deletions src/augmenty/character/casing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
from spacy.language import Language
from spacy.training import Example

from augmenty.util import Augmenter

from ..augment_utilities import make_text_from_orth


def random_casing_augmenter_v1(
nlp: Language,
example: Example,
level: float,
) -> Iterator[Example]: # type: ignore
) -> Iterator[Example]:
def __casing(c):
if random.random() < level:
return c.lower() if random.random() < 0.5 else c.upper()
Expand All @@ -31,14 +33,14 @@ def __casing(c):
@spacy.registry.augmenters("random_casing_v1") # type: ignore
def create_random_casing_augmenter_v1(
level: float,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Create an augment that randomly changes the casing of the document.
Args:
level (float): The percentage of character that will have its casing changed.
level: The percentage of character that will have its casing changed.
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmenter.
The augmenter.
Example:
>>> import augmenty
Expand Down
40 changes: 19 additions & 21 deletions src/augmenty/character/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@

import spacy
from spacy.language import Language
from spacy.tokens import Token
from spacy.training import Example

from augmenty.util import Augmenter

from ..augment_utilities import make_text_from_orth
from ..keyboard import Keyboard

Expand All @@ -18,10 +21,10 @@ def char_replace_augmenter_v1(
example: Example,
level: float,
replace: dict,
) -> Iterator[Example]: # type: ignore
def __replace(t):
) -> Iterator[Example]:
def __replace(t: Token) -> str:
t_ = []
for i, c in enumerate(t.text):
for c in t.text:
if random.random() < level and c in replace:
c = random.choice(replace[c])
t_.append(c)
Expand All @@ -38,19 +41,17 @@ def __replace(t):
def create_char_random_augmenter_v1(
level: float,
keyboard: str = "en_qwerty_v1",
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Creates an augmenter that replaces a character with a random character
from the keyboard.
Args:
level (float): The probability to replace a character with a neightbouring
character.
keyboard (str, optional): A defined keyboard in the keyboard registry. To see a
list of all keyboard you can run `augmenty,keyboards()`. Defaults
level: The probability to replace a character with a neightbouring character.
keyboard: A defined keyboard in the keyboard registry. To see a list of all keyboard you can run `augmenty,keyboards()`. Defaults
to "en_qwerty_v1".
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmenter function.
The augmenter.
Example:
>>> import augmenty
Expand All @@ -71,17 +72,17 @@ def create_char_random_augmenter_v1(
def create_char_replace_augmenter_v1(
level: float,
replace: dict,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Creates an augmenter that replaces a character with a random character
from replace dict.
Args:
level (float): probability to augment character, if document is augmented.
replace (dict): A dictionary denoting which characters denote potentials
level: probability to augment character, if document is augmented.
replace: A dictionary denoting which characters denote potentials
replace for each character.
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmenter function.
The augmenter function.
Example:
>>> create_char_replace_augmenter_v1(level=0.02,
Expand All @@ -99,21 +100,18 @@ def create_keystroke_error_augmenter_v1(
level: float,
distance: float = 1.5,
keyboard: str = "en_qwerty_v1",
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Creates a augmenter which augments a text with plausible typos based on
keyboard distance.
Args:
level (float): The probability to replace a character with a neightbouring
character.
distance (float, optional): keyboard distance. Defaults to 1.5 corresponding to
neighbouring keys including diagonals.
keyboard (str, optional): A defined keyboard in the keyboard registry. To see a
list of all keyboard you can run `augmenty,keyboards.get_all()`. Defaults
level: The probability to replace a character with a neightbouring character.
distance: keyboard distance. Defaults to 1.5 corresponding to neighbouring keys including diagonals.
keyboard: A defined keyboard in the keyboard registry. To see a list of all keyboard you can run `augmenty,keyboards.get_all()`. Defaults
to "en_qwerty_v1".
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmentation function
The augmenter.
Example:
>>> import augmenty
Expand Down
10 changes: 6 additions & 4 deletions src/augmenty/character/spacing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
from spacy.language import Language
from spacy.training import Example

from augmenty.util import Augmenter

from ..augment_utilities import make_text_from_orth


def remove_spacing_augmenter_v1(
nlp: Language,
example: Example,
level: float,
) -> Iterator[Example]: # type: ignore
) -> Iterator[Example]:
def __replace(s):
if random.random() < level and (s is True):
return False
Expand All @@ -33,14 +35,14 @@ def __replace(s):
@spacy.registry.augmenters("remove_spacing_v1") # type: ignore
def create_remove_spacing_augmenter_v1(
level: float,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Creates an augmenter that removes spacing with a given probability.
Args:
level (float): The probability to remove a space.
level: The probability to remove a space.
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmenter.
The augmenter.
Example:
>>> import augmenty
Expand Down
12 changes: 8 additions & 4 deletions src/augmenty/character/swap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@
from spacy.language import Language
from spacy.training import Example

from augmenty.util import Augmenter

from ..augment_utilities import make_text_from_orth


def char_swap_augmenter_v1(nlp: Language, example: Example, level) -> Iterator[Example]: # type: ignore
def char_swap_augmenter_v1(
nlp: Language, example: Example, level: float
) -> Iterator[Example]:
def __replace(t):
for i, c in enumerate(t.text[:-1]):
if random.random() < level:
Expand All @@ -29,15 +33,15 @@ def __replace(t):
@spacy.registry.augmenters("char_swap_v1") # type: ignore
def create_char_swap_augmenter_v1(
level: float,
) -> Callable[[Language, Example], Iterator[Example]]: # type: ignore
) -> Augmenter:
"""Creates an augmenter that swaps two neighbouring characters in a token
with a given probability.
Args:
level (float): probability to replace a character.
level: probability to replace a character.
Returns:
Callable[[Language, Example], Iterator[Example]]: The augmenter.
The augmenter.
Example:
>>> import augmenty
Expand Down
20 changes: 18 additions & 2 deletions src/augmenty/doc/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,26 @@ def paragraph_subset_augmenter_v1(
start, end = start_n, end_n

# Respect entity spans
while start != 0 and example.y[start].ent_iob_ not in {"O", "B", ""}:
start_is_in_entity = start != 0 and example.y[start].ent_iob_ not in {"O", "B", ""}
end_is_in_entity = end < doc_len - 1 and example.y[end].ent_iob_ not in {
"O",
"B",
"",
}
while start_is_in_entity:
start -= 1
while end < doc_len - 1 and example.y[end - 1].ent_iob_ not in {"O", "B", ""}:
start_is_in_entity = start != 0 and example.y[start].ent_iob_ not in {
"O",
"B",
"",
}
while end_is_in_entity:
end += 1
end_is_in_entity = end < doc_len - 1 and example.y[end].ent_iob_ not in {
"O",
"B",
"",
}

for k in token_anno:
token_anno[k] = token_anno[k][start:end]
Expand Down
Loading

0 comments on commit 0e4b222

Please sign in to comment.