-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #64 from SekouD/cooljugator
- Loading branch information
Showing
6 changed files
with
730 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,18 @@ | ||
pip==18.0 | ||
pip==18.1 | ||
bump2version==0.5.10 | ||
wheel==0.31.1 | ||
wheel==0.32.1 | ||
watchdog==0.9.0 | ||
flake8==3.5.0 | ||
tox==3.4.0 | ||
coverage==4.5.1 | ||
Sphinx==1.8.0 | ||
Sphinx==1.8.1 | ||
cryptography==2.3.1 | ||
PyYAML==4.2b4 | ||
pytest==3.8.0 | ||
pytest-runner>=4.2 | ||
cython==0.28.5 | ||
numpy==1.15.1 | ||
scikit-learn>=0.19.1 | ||
numpy==1.15.2 | ||
scikit-learn==0.19.1 | ||
scipy==1.1.0 | ||
click==6.7 | ||
click==7.0 | ||
mypy==0.630 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import pickle | ||
from collections import OrderedDict, defaultdict | ||
|
||
|
||
def detect_root(verb, info_verb, language): | ||
if 'type' in info_verb: | ||
return None | ||
result = OrderedDict() | ||
verb_forms = [forms for key, forms in info_verb.items() if key != 'doubles'] | ||
flat_verb_forms = [] | ||
for tense in verb_forms: | ||
for form in tense: | ||
if '/' in form[-1]: | ||
if not language == 'it': | ||
standard_form = form[-1][:form[-1].index('/')] | ||
else: | ||
standard_form = form[-1][form[-1].index('/') +1:] | ||
if len(standard_form) > 0: | ||
flat_verb_forms.append(standard_form.strip().split(' ')[-1]) | ||
pass | ||
elif not form[-1] in ('' or '-'): | ||
standard_form = form[-1].strip().split(' ')[-1] | ||
if len(standard_form) > 0: | ||
flat_verb_forms.append(standard_form) | ||
root = '' | ||
for i in range(1, len(verb)+1): | ||
prefix = verb[:i] | ||
if all([elmt.startswith(prefix) for elmt in flat_verb_forms]): | ||
root = prefix | ||
else: | ||
break | ||
suffix = verb[len(root):] | ||
return (root, suffix) | ||
|
||
|
||
def construct_template(verb, info_verb, root_info, language, pronouns_dict): | ||
if 'type' in info_verb: | ||
return None | ||
result = OrderedDict() | ||
verb_roots = OrderedDict() | ||
verb_roots[verb] = root_info[0] | ||
for key, val in info_verb.items(): | ||
if key != 'doubles': | ||
forms = [] | ||
for form in val: | ||
if form[-1].startswith('('): | ||
continue | ||
if not form[-1] in ('' or '-'): | ||
forms.append(form[-1].strip().split(' ')[-1][len(root_info[0]):]) | ||
if form[-1].strip().split(' ')[-1] == '-': | ||
forms.append(form[-1].strip().split(' ')[-1]) | ||
if len(form) > 1: | ||
pronouns_dict[language].add(form[0]) | ||
result[key] = tuple(forms) | ||
return result | ||
|
||
|
||
def group_by_template(model_verbs_dict, all_verbs_dict, verb_roots): | ||
results = defaultdict(dict) | ||
for key, val in model_verbs_dict.items(): | ||
if val: | ||
pattern = frozenset(val.items()) | ||
results[pattern]['model_verb'] = key | ||
results[pattern]['members'] = [key, ] | ||
root = verb_roots[key] | ||
template = root + ':' + key[len(root):] | ||
results[pattern]['template'] = template | ||
pass | ||
for key, val in all_verbs_dict.items(): | ||
if val: | ||
pattern = frozenset(val.items()) | ||
if pattern in results: | ||
results[pattern]['members'].append(key) | ||
pass | ||
else: | ||
results[pattern]['model_verb'] = key | ||
results[pattern]['members'] = [key, ] | ||
root = verb_roots[key] | ||
template = root + ':' + key[len(root):] | ||
results[pattern]['template'] = template | ||
pass | ||
pass | ||
return results | ||
|
||
|
||
def construct_verbs_dict(hastable, verb_roots): | ||
verb_dict = {} | ||
for val in hastable.values(): | ||
root = verb_roots[val['model_verb']] | ||
template = root + ':' + val['model_verb'][len(root):] | ||
for verb in set(val['members']): | ||
verb_dict[verb] = template | ||
pass | ||
pass | ||
return verb_dict | ||
|
||
|
||
def construct_conjug_dict(hastable, verb_roots, all_verbs_conjugation): | ||
conjug_dict = {} | ||
for val in hastable.values(): | ||
root = verb_roots[val['model_verb']] | ||
template = root + ':' + val['model_verb'][len(root):] | ||
conjug = all_verbs_conjugation[val['model_verb']] | ||
conjug_dict[template] = conjug | ||
pass | ||
return conjug_dict | ||
|
||
|
||
if __name__ == "__main__": | ||
# conjug = defaultdict(dict) | ||
with open('C:/Users/SekouD/Documents/Projets_Python/mlconjug/utils' | ||
'/raw_data/cooljugator_dump_temp.pickle', 'rb') as f: | ||
conjug = pickle.load(f) | ||
print('ok.') |
Oops, something went wrong.