Skip to content

Commit

Permalink
support images
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishka17 committed Sep 7, 2023
1 parent f4d4df7 commit 65618ef
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 27 deletions.
31 changes: 19 additions & 12 deletions src/sulguk/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Any, List, Optional, Tuple

from sulguk.render.numbers import NumberFormat

from .entities import (
Blockquote,
Bold,
Expand Down Expand Up @@ -39,7 +38,7 @@

LANG_CLASS_PREFIX = "language-"

NO_CLOSING_TAGS = ("br", "hr", "meta", "link")
NO_CLOSING_TAGS = ("br", "hr", "meta", "link", "img")


class Transformer(HTMLParser):
Expand Down Expand Up @@ -71,13 +70,20 @@ def _get_a(self, attrs: Attrs) -> Entity:
if url:
return Link(url=url)
return Group()
def _get_img(self, attrs: Attrs) -> Entity:

def _get_img(self, attrs: Attrs) -> Optional[Entity]:
url = self._find_attr("src", attrs)
if url:
return Link(url=url)
return Group()

text = self._find_attr("alt", attrs, url)
if not text and not url:
return None

text_entity = Text(text="🖼️" + text)
if not url:
return text_entity
link = Link(url=url)
link.add(text_entity)
return link

def _get_ul(self, attrs: Attrs) -> Entity:
return ListGroup(numbered=False)

Expand Down Expand Up @@ -182,11 +188,14 @@ def handle_startendtag(self, tag: str, attrs: Attrs) -> None:
entity = NewLine()
elif tag == "hr":
entity = HorizontalLine()
elif tag in ("img",):
entity = self._get_img(attrs)
elif tag in ("meta", "link"):
return # ignored tag
else:
raise ValueError(f"Unsupported single tag: `{tag}`")
self.current.add(entity)
if entity:
self.current.add(entity)

def handle_starttag(
self,
Expand All @@ -209,10 +218,8 @@ def handle_starttag(
nested = entity = self._get_ol(attrs)
elif tag in ("li",):
nested = entity = self._get_li(attrs)
elif tag in ("a", ):
elif tag in ("a",):
nested = entity = self._get_a(attrs)
elif tag in ("img",):
nested = entity = self._get_img(attrs)
elif tag in ("b", "strong"):
nested = entity = Bold()
elif tag in ("i", "em", "cite", "var"):
Expand Down
34 changes: 34 additions & 0 deletions tests/test_img.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pytest

from sulguk import transform_html

IMG_HTML_NOCLOSE = '0<img src="https://google.com">1'
IMG_HTML_CLOSE = '0<img src="https://google.com" />1'
IMG_HTML_ALT = '0<img src="https://google.com" alt="This is text" />1'
IMG_HTML_EMPTY_ALT = '0<img src="https://google.com" alt="" />1'
IMG_URL = "https://google.com"
IMG_TEXT_URL = "0🖼️https://google.com1"
IMG_TEXT_ALT = "0🖼️This is text1"
IMG_TEXT_EMPTY_ALT = "0🖼️1"


@pytest.mark.parametrize("html, url, text", [
(IMG_HTML_NOCLOSE, IMG_URL, IMG_TEXT_URL),
(IMG_HTML_CLOSE, IMG_URL, IMG_TEXT_URL),
(IMG_HTML_ALT, IMG_URL, IMG_TEXT_ALT),
(IMG_HTML_EMPTY_ALT, IMG_URL, IMG_TEXT_EMPTY_ALT),
])
def test_link_extracted(html, url, text):
result = transform_html(html)
assert result.text == text
assert len(result.entities) == 1
entity = result.entities[0]
assert entity['type'] == 'text_link'
assert entity['url'] == url


def test_empty_image():
html = '0<img src="">1'
result = transform_html(html)
assert result.text == "01"
assert not result.entities
15 changes: 0 additions & 15 deletions tests/test_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
PRE_P_HTML = "<p>1</p>\n<pre>\n 2</pre>"
PRE_P_PLAIN = "1\n\n 2\n\n"

IMG_HTML = '<img src="https://google.com">'
IMG_URL = "https://google.com"

@pytest.mark.parametrize("html, plain, name", [
(SPACES_SPAN_HTML, SPACES_SPAN_PLAIN, "span"),
Expand All @@ -65,16 +63,3 @@ def test_spaces(html, plain, name):
print(repr(plain))
print(repr(html))
assert result.text == plain



@pytest.mark.parametrize("html, url", [
(IMG_HTML, IMG_URL),
])
def test_link_extracted(html, url):
result = transform_html(html)
assert result.text == ""
assert len(result.entities) == 1
entity = result.entities[0]
assert entity['type'] == 'text_link'
assert entity['url'] == url

0 comments on commit 65618ef

Please sign in to comment.