Skip to content

Commit

Permalink
Merge pull request #69 from victorBigand/master
Browse files Browse the repository at this point in the history
Fix bad tagging of sentences due to extra spaces
  • Loading branch information
TFA-MAIF committed Oct 30, 2020
2 parents 9b6d14b + d5fe6d5 commit 6029472
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
2 changes: 1 addition & 1 deletion melusine/prepare_email/mail_segmenting.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def split_message_to_sentences(text, sep_=r"(.*?[;.,?!])"):
text = re.sub(regex3, regex4, text) # remove double punctuation
sentence_list = re.findall(regex_pattern, text, flags=re.M)
sentence_list = [
r for s in sentence_list for r in re.split(regex_piece_jointe, s) if r
r.strip() for s in sentence_list for r in re.split(regex_piece_jointe, s) if r
]

return sentence_list
Expand Down
42 changes: 21 additions & 21 deletions tests/unit_tests/prepare_email/test_mail_segmenting.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
"structured_text": {
"header": None,
"text": [
{"part": " Bonjours, ", "tags": "HELLO"},
{"part": "Bonjours,", "tags": "HELLO"},
{
"part": " Suite a notre conversation \
téléphonique de Mardi , pourriez vous me dire la somme que je vous dois \
afin d'd'être en régularisation . \n \n ",
"part": " Suite a notre conversation \
téléphonique de Mardi , pourriez vous me dire la somme que je vous dois \
afin d'd'être en régularisation .",
"tags": "BODY",
},
{"part": "Merci bonne journée", "tags": "GREETINGS"},
Expand All @@ -44,18 +44,18 @@
"structured_text": {
"header": None,
"text": [
{"part": " Bonjour. \n \n ", "tags": "HELLO"},
{"part": "Bonjour.", "tags": "HELLO"},
{
"part": "Merci de bien vouloir prendre \
connaissance du document ci-joint : 1 - Relevé d'identité postal MUTUELLE \
(contrats) ",
connaissance du document ci-joint : 1 - Relevé d'identité postal MUTUELLE \
(contrats) ",
"tags": "BODY",
},
{"part": " Sentiments mutualistes. ", "tags": "GREETINGS"},
{"part": " La Mutuelle ", "tags": "BODY"},
{"part": "Sentiments mutualistes.", "tags": "GREETINGS"},
{"part": "La Mutuelle ", "tags": "BODY"},
{
"part": " La visualisation des fichiers \
PDF nécessite Adobe Reader. \n",
"part": "La visualisation des fichiers \
PDF nécessite Adobe Reader.",
"tags": "FOOTER",
},
],
Expand Down Expand Up @@ -96,11 +96,11 @@ def test_structure_email():
"structured_text": {
"header": None,
"text": [
{"part": " Bonjours, ", "tags": "HELLO"},
{"part": "Bonjours,", "tags": "HELLO"},
{
"part": " Suite a notre conversation \
téléphonique de Mardi , pourriez vous me dire la somme que je vous dois \
afin d'd'être en régularisation . \n \n ",
"part": " Suite a notre conversation \
téléphonique de Mardi , pourriez vous me dire la somme que je vous dois \
afin d'd'être en régularisation .",
"tags": "BODY",
},
{"part": "Merci bonne journée", "tags": "GREETINGS"},
Expand All @@ -117,17 +117,17 @@ def test_structure_email():
"structured_text": {
"header": None,
"text": [
{"part": " Bonjour. \n \n ", "tags": "HELLO"},
{"part": "Bonjour.", "tags": "HELLO"},
{
"part": "Merci de bien vouloir prendre \
connaissance du document ci-joint : 1 - Relevé d'identité postal MUTUELLE \
(contrats) ",
connaissance du document ci-joint : 1 - Relevé d'identité postal MUTUELLE \
(contrats) ",
"tags": "BODY",
},
{"part": " Sentiments mutualistes. ", "tags": "GREETINGS"},
{"part": " La Mutuelle ", "tags": "BODY"},
{"part": "Sentiments mutualistes.", "tags": "GREETINGS"},
{"part": "La Mutuelle ", "tags": "BODY"},
{
"part": " La visualisation des fichiers PDF nécessite Adobe Reader. \n",
"part": "La visualisation des fichiers PDF nécessite Adobe Reader.",
"tags": "FOOTER",
},
],
Expand Down

0 comments on commit 6029472

Please sign in to comment.