forked from Belval/TextRecognitionDataGenerator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
from_wikipedia.py
70 lines (64 loc) · 1.85 KB
/
from_wikipedia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from .from_strings import GeneratorFromStrings
from ..data_generator import FakeTextDataGenerator
from ..string_generator import create_strings_from_wikipedia
from ..utils import load_dict, load_fonts
class GeneratorFromWikipedia:
"""Generator that uses sentences taken from random Wikipedia articles"""
def __init__(
self,
count=-1,
minimum_length=1,
fonts="fonts/latin",
language="en",
size=32,
skewing_angle=0,
random_skew=False,
blur=0,
random_blur=False,
background_type=0,
distorsion_type=0,
distorsion_orientation=0,
is_handwritten=False,
width=-1,
alignment=1,
text_color="#282828",
orientation=0,
space_width=1.0,
margins=(5, 5, 5, 5),
fit=False,
):
self.count = count
self.minimum_length = minimum_length
self.language = language
self.generator = GeneratorFromStrings(
create_strings_from_wikipedia(self.minimum_length, 1000, self.language),
count,
fonts,
language,
size,
skewing_angle,
random_skew,
blur,
random_blur,
background_type,
distorsion_type,
distorsion_orientation,
is_handwritten,
width,
alignment,
text_color,
orientation,
space_width,
margins,
fit,
)
def __iter__(self):
return self.generator
def __next__(self):
return self.next()
def next(self):
if self.generator.generated_count >= 999:
self.generator.strings = create_strings_from_wikipedia(
self.minimum_length, 1000, self.language
)
return self.generator.next()