-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1154 from tarteo/11.0-mig-html_text
[11.0][MIG] html_text
- Loading branch information
Showing
14 changed files
with
385 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
.. image:: https://img.shields.io/badge/license-AGPL--3-blue.png | ||
:target: https://www.gnu.org/licenses/agpl | ||
:alt: License: AGPL-3 | ||
|
||
==================== | ||
Text from HTML field | ||
==================== | ||
|
||
This module provides some technical features that allow to extract text from | ||
any chunk of HTML, without HTML tags or attributes. You can chose either: | ||
|
||
* To truncate the result by amount of words or characters. | ||
* To append an ellipsis (or any character(s)) at the end of the result. | ||
|
||
It can be used to easily generate excerpts. | ||
|
||
Usage | ||
===== | ||
|
||
This module just adds a technical utility, but nothing for the end user. | ||
|
||
If you are a developer and need this utility for your module, see these | ||
examples and read the docs inside the code. | ||
|
||
Python example:: | ||
|
||
@api.multi | ||
def some_method(self): | ||
# Get truncated text from an HTML field. It will 40 words and 100 | ||
# characters at most, and will have "..." appended at the end if it | ||
# gets truncated. | ||
truncated_text = self.env["ir.fields.converter"].text_from_html( | ||
self.html_field, 40, 100, "...") | ||
|
||
QWeb example:: | ||
|
||
<t t-esc="env['ir.fields.converter'].text_from_html(doc.html_field)"/> | ||
|
||
.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas | ||
:alt: Try me on Runbot | ||
:target: https://runbot.odoo-community.org/runbot/149/11.0 | ||
|
||
Known issues / Roadmap | ||
====================== | ||
|
||
* An option could be added to try to respect the basic HTML tags inside the | ||
excerpt (``<b>``, ``<i>``, ``<p>``, etc.). | ||
|
||
Bug Tracker | ||
=========== | ||
|
||
Bugs are tracked on `GitHub Issues | ||
<https://github.com/OCA/server-tools/issues>`_. In case of trouble, please | ||
check there if your issue has already been reported. If you spotted it first, | ||
help us smashing it by providing a detailed and welcomed feedback. | ||
|
||
Credits | ||
======= | ||
|
||
Contributors | ||
------------ | ||
|
||
* Jairo Llopis <yajo.sk8@gmail.com> | ||
* Vicent Cubells <vicent.cubells@tecnativa.com> | ||
* Dennis Sluijk <d.sluijk@onestein.nl> | ||
|
||
Do not contact contributors directly about support or help with technical issues. | ||
|
||
Maintainer | ||
---------- | ||
|
||
.. image:: https://odoo-community.org/logo.png | ||
:alt: Odoo Community Association | ||
:target: https://odoo-community.org | ||
|
||
This module is maintained by the OCA. | ||
|
||
OCA, or the Odoo Community Association, is a nonprofit organization whose | ||
mission is to support the collaborative development of Odoo features and | ||
promote its widespread use. | ||
|
||
To contribute to this module, please visit https://odoo-community.org. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
|
||
from . import models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> | ||
# Copyright 2016 Tecnativa - Vicent Cubells | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
{ | ||
"name": "Text from HTML field", | ||
"summary": "Generate excerpts from any HTML field", | ||
"version": "11.0.1.0.0", | ||
"category": "Tools", | ||
"website": "https://github.com/OCA/server-tools", | ||
"author": "Grupo ESOC Ingeniería de Servicios, " | ||
"Tecnativa, " | ||
"Onestein, " | ||
"Odoo Community Association (OCA)", | ||
"license": "AGPL-3", | ||
"application": False, | ||
"installable": True, | ||
"external_dependencies": { | ||
"python": [ | ||
"lxml.html", | ||
], | ||
}, | ||
"depends": [ | ||
"base", | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Marc Tormo i Bochaca <mtbochaca@gmail.com>, 2017 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 9.0c\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2017-04-19 18:00+0000\n" | ||
"PO-Revision-Date: 2017-04-19 18:00+0000\n" | ||
"Last-Translator: Marc Tormo i Bochaca <mtbochaca@gmail.com>, 2017\n" | ||
"Language-Team: Catalan (https://www.transifex.com/oca/teams/23907/ca/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: ca\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.fields.converter" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Rudolf Schnapka <rs@techno-flex.de>, 2017 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 9.0c\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2017-04-19 18:00+0000\n" | ||
"PO-Revision-Date: 2017-04-19 18:00+0000\n" | ||
"Last-Translator: Rudolf Schnapka <rs@techno-flex.de>, 2017\n" | ||
"Language-Team: German (https://www.transifex.com/oca/teams/23907/de/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: de\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.fields.converter" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Pedro M. Baeza <pedro.baeza@gmail.com>, 2016 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 9.0c\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2016-12-17 02:07+0000\n" | ||
"PO-Revision-Date: 2016-12-17 02:07+0000\n" | ||
"Last-Translator: Pedro M. Baeza <pedro.baeza@gmail.com>, 2016\n" | ||
"Language-Team: Spanish (https://www.transifex.com/oca/teams/23907/es/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: es\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.fields.converter" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Fernando Lara <gennesis45@gmail.com>, 2017 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 9.0c\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2017-02-16 10:39+0000\n" | ||
"PO-Revision-Date: 2017-02-16 10:39+0000\n" | ||
"Last-Translator: Fernando Lara <gennesis45@gmail.com>, 2017\n" | ||
"Language-Team: Spanish (Spain) (https://www.transifex.com/oca/teams/23907/es_ES/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: es_ES\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.documentos.conversor" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Paolo Valier <paolo.valier@hotmail.it>, 2018 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 10.0\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2018-01-06 02:25+0000\n" | ||
"PO-Revision-Date: 2018-01-06 02:25+0000\n" | ||
"Last-Translator: Paolo Valier <paolo.valier@hotmail.it>, 2018\n" | ||
"Language-Team: Italian (https://www.transifex.com/oca/teams/23907/it/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: it\n" | ||
"Plural-Forms: nplurals=2; plural=(n != 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.fields.converter" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Translation of Odoo Server. | ||
# This file contains the translation of the following modules: | ||
# * html_text | ||
# | ||
# Translators: | ||
# Ahmet Altinisik <aaltinisik@altinkaya.com.tr>, 2016 | ||
msgid "" | ||
msgstr "" | ||
"Project-Id-Version: Odoo Server 9.0c\n" | ||
"Report-Msgid-Bugs-To: \n" | ||
"POT-Creation-Date: 2016-12-29 03:40+0000\n" | ||
"PO-Revision-Date: 2016-12-29 03:40+0000\n" | ||
"Last-Translator: Ahmet Altinisik <aaltinisik@altinkaya.com.tr>, 2016\n" | ||
"Language-Team: Turkish (https://www.transifex.com/oca/teams/23907/tr/)\n" | ||
"MIME-Version: 1.0\n" | ||
"Content-Type: text/plain; charset=UTF-8\n" | ||
"Content-Transfer-Encoding: \n" | ||
"Language: tr\n" | ||
"Plural-Forms: nplurals=2; plural=(n > 1);\n" | ||
|
||
#. module: html_text | ||
#: model:ir.model,name:html_text.model_ir_fields_converter | ||
msgid "ir.fields.converter" | ||
msgstr "ir.fields.converter" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
|
||
from . import ir_fields_converter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> | ||
# Copyright 2016 Tecnativa - Vicent Cubells | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
|
||
import logging | ||
from lxml import etree, html | ||
from odoo import api, models | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
|
||
class IrFieldsConverter(models.AbstractModel): | ||
_inherit = "ir.fields.converter" | ||
|
||
@api.model | ||
def text_from_html(self, html_content, max_words=None, max_chars=None, | ||
ellipsis=u"…", fail=False): | ||
"""Extract text from an HTML field in a generator. | ||
:param str html_content: | ||
HTML contents from where to extract the text. | ||
:param int max_words: | ||
Maximum amount of words allowed in the resulting string. | ||
:param int max_chars: | ||
Maximum amount of characters allowed in the resulting string. If | ||
you apply this limit, beware that the last word could get cut in an | ||
unexpected place. | ||
:param str ellipsis: | ||
Character(s) to be appended to the end of the resulting string if | ||
it gets truncated after applying limits set in :param:`max_words` | ||
or :param:`max_chars`. If you want nothing applied, just set an | ||
empty string. | ||
:param bool fail: | ||
If ``True``, exceptions will be raised. Otherwise, an empty string | ||
will be returned on failure. | ||
""" | ||
# Parse HTML | ||
try: | ||
doc = html.fromstring(html_content) | ||
except (TypeError, etree.XMLSyntaxError, etree.ParserError): | ||
if fail: | ||
raise | ||
else: | ||
_logger.exception("Failure parsing this HTML:\n%s", | ||
html_content) | ||
return "" | ||
|
||
# Get words | ||
words = u"".join(doc.xpath("//text()")).split() | ||
|
||
# Truncate words | ||
suffix = max_words and len(words) > max_words | ||
if max_words: | ||
words = words[:max_words] | ||
|
||
# Get text | ||
text = u" ".join(words) | ||
|
||
# Truncate text | ||
suffix = suffix or max_chars and len(text) > max_chars | ||
if max_chars: | ||
text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip() | ||
|
||
# Append ellipsis if needed | ||
if suffix: | ||
text += ellipsis | ||
|
||
return text |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
|
||
from . import test_extractor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> | ||
# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). | ||
|
||
from odoo.tools import mute_logger | ||
from odoo.tests.common import TransactionCase | ||
|
||
|
||
class ExtractorCase(TransactionCase): | ||
def setUp(self): | ||
super(ExtractorCase, self).setUp() | ||
# Shortcut | ||
self.text_from_html = self.env["ir.fields.converter"].text_from_html | ||
|
||
def test_excerpts(self): | ||
"""Text gets correctly extracted.""" | ||
html = u""" | ||
<html> | ||
<body> | ||
<div class="this should not appear"> | ||
<h1>I'm a title</h1> | ||
<p>I'm a paragraph</p> | ||
<small>¡Pues yo soy español!</small> | ||
</div> | ||
</body> | ||
</html> | ||
""" | ||
|
||
self.assertEqual( | ||
self.text_from_html(html), | ||
u"I'm a title I'm a paragraph ¡Pues yo soy español!") | ||
self.assertEqual( | ||
self.text_from_html(html, 8), | ||
u"I'm a title I'm a paragraph ¡Pues yo…") | ||
self.assertEqual( | ||
self.text_from_html(html, 8, 31), | ||
u"I'm a title I'm a paragraph ¡P…") | ||
self.assertEqual( | ||
self.text_from_html(html, 7, ellipsis=""), | ||
u"I'm a title I'm a paragraph ¡Pues") | ||
|
||
@mute_logger("odoo.addons.html_text.models.ir_fields_converter") | ||
def test_empty_html(self): | ||
"""Empty HTML handled correctly.""" | ||
self.assertEqual(self.text_from_html(""), "") | ||
with self.assertRaises(Exception): | ||
self.text_from_html("", fail=True) | ||
|
||
@mute_logger("odoo.addons.html_text.models.ir_fields_converter") | ||
def test_false_html(self): | ||
"""``False`` HTML handled correctly.""" | ||
self.assertEqual(self.text_from_html(False), "") | ||
with self.assertRaises(Exception): | ||
self.text_from_html(False, fail=True) |