Skip to content

Commit

Permalink
Fix absolutize URL for several cases
Browse files Browse the repository at this point in the history
There were a number of bugs related to the fact that `Item::get_links()` and `Item::get_base()` call each-other, making a nice mess during initialisation. See tests.

Furthermore, the standard Atom `self` link was not supported, wrongly falling back to `alternate`. In the same PR because otherwise the tests from both PRs would fail.
  • Loading branch information
Alkarex committed Apr 5, 2024
1 parent 1ec20d6 commit 76a4f62
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 23 deletions.
55 changes: 36 additions & 19 deletions src/Item.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,26 @@ public function get_item_tags(string $namespace, string $tag)
return null;
}

/**
* Get base URL of the item itself.
* Returns `<xml:base>` or feed base URL.
* Similar to `get_base()` but can safely be used during initialisation methods
* such as `get_links()` (`get_base()` and `get_links()` call each-other)
* and is not affected by enclosures.
*
* @param array<string, mixed> $element
* @see get_base
*/
protected function get_own_base(array $element = []): string {
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) {
return $element['xml_base'];
}
return $this->feed->get_base();
}

/**
* Get the base URL value.
* Uses `<xml:base>`, or item link, or feed base URL.
* Uses `<xml:base>`, or item link, or enclosure link, or feed base URL.
*
* @param array<string, mixed> $element
* @return string
Expand Down Expand Up @@ -812,27 +829,27 @@ public function get_links(string $rel = 'alternate')
foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_10, 'link') as $link) {
if (isset($link['attribs']['']['href'])) {
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link));
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link));
}
}
foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_03, 'link') as $link) {
if (isset($link['attribs']['']['href'])) {
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link));
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link));
}
}
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_10, 'link')) {
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_090, 'link')) {
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'link')) {
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'guid')) {
if (!isset($links[0]['attribs']['']['isPermaLink']) || strtolower(trim($links[0]['attribs']['']['isPermaLink'])) === 'true') {
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0]));
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0]));
}
}

Expand Down Expand Up @@ -1199,11 +1216,11 @@ public function get_enclosures()
// PLAYER
if ($player_parent = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) {
if (isset($player_parent[0]['attribs']['']['url'])) {
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0]));
}
} elseif ($player_parent = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) {
if (isset($player_parent[0]['attribs']['']['url'])) {
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0]));
}
}

Expand Down Expand Up @@ -1323,13 +1340,13 @@ public function get_enclosures()
if ($thumbnails = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) {
foreach ($thumbnails as $thumbnail) {
if (isset($thumbnail['attribs']['']['url'])) {
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
} elseif ($thumbnails = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) {
foreach ($thumbnails as $thumbnail) {
if (isset($thumbnail['attribs']['']['url'])) {
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
}
Expand Down Expand Up @@ -1453,7 +1470,7 @@ public function get_enclosures()
if (isset($content['attribs']['']['width'])) {
$width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT);
}
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content));

// Checking the other optional media: elements. Priority: media:content, media:group, item, channel

Expand Down Expand Up @@ -1712,9 +1729,9 @@ public function get_enclosures()

// PLAYER
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) {
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player));

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.4

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.0

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.1

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1732 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.
} elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) {
$player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player));

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.4

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.0

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.1

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 1734 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.
} else {
$player = $player_parent;
}
Expand Down Expand Up @@ -1804,14 +1821,14 @@ public function get_enclosures()
// THUMBNAILS
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) {
foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail));
}
if (is_array($thumbnails)) {
$thumbnails = array_values(array_unique($thumbnails));
}
} elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) {
foreach ($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail));
}
if (is_array($thumbnails)) {
$thumbnails = array_values(array_unique($thumbnails));
Expand Down Expand Up @@ -1909,7 +1926,7 @@ public function get_enclosures()
$width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT);
}
if (isset($content['attribs']['']['url'])) {
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content));
}
// Checking the other optional media: elements. Priority: media:content, media:group, item, channel

Expand Down Expand Up @@ -2064,7 +2081,7 @@ public function get_enclosures()
// PLAYER
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) {
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) {
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player));

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 7.4

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.0

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.1

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.2

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.

Check failure on line 2084 in src/Item.php

View workflow job for this annotation

GitHub Actions / PHP: 8.3

Parameter #1 $element of method SimplePie\Item::get_base() expects array<string, mixed>, null given.
}
} else {
$player = $player_parent;
Expand Down Expand Up @@ -2120,7 +2137,7 @@ public function get_enclosures()
if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) {
foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) {
if (isset($thumbnail['attribs']['']['url'])) {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI);
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail));
}
}
if (is_array($thumbnails)) {
Expand Down
13 changes: 9 additions & 4 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -2456,8 +2456,9 @@ public function get_image_tags(string $namespace, string $tag)
/**
* Get the base URL value from the feed
*
* Uses `<xml:base>` if available, otherwise uses the first link in the
* feed, or failing that, the URL of the feed itself.
* Uses `<xml:base>` if available,
* otherwise uses the first 'self' link or the first 'alternate' link of the feed,
* or failing that, the URL of the feed itself.
*
* @see get_link
* @see subscribe_url
Expand All @@ -2469,8 +2470,12 @@ public function get_base(array $element = [])
{
if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) {
return $element['xml_base'];
} elseif ($this->get_link() !== null) {
return $this->get_link();
}
if (($link = $this->get_link(0, 'self')) !== null) {
return $link;
}
if (($link = $this->get_link(0, 'alternate')) !== null) {
return $link;
}

return $this->subscribe_url() ?? '';
Expand Down
32 changes: 32 additions & 0 deletions tests/Unit/EnclosureTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,38 @@ public static function getLinkProvider(): iterable
,
'http://example.net/link?a=%22b%22&amp;c=%3Cd%3E',
];

yield 'Test RSS 2.0 with channel link and enclosure' => [
<<<XML
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<link>http://example.net/tests/</link>
<item>
<link>/tests/3/</link>
<media:content url="/images/3.jpg" medium="image"></media:content>
</item>
</channel>
</rss>
XML
,
'http://example.net/images/3.jpg',
];

yield 'Test RSS 2.0 with Atom channel link and enclosure' => [
<<<XML
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<atom:link href="http://example.net/tests/" rel="self" type="application/rss+xml" />
<item>
<link>/tests/4/</link>
<media:content url="/images/4.jpg" medium="image"></media:content>
</item>
</channel>
</rss>
XML
,
'http://example.net/images/4.jpg',
];
}

/**
Expand Down
44 changes: 44 additions & 0 deletions tests/Unit/ItemTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3262,6 +3262,50 @@ public static function getPermalinkDataProvider(): array
,
'http://example.com/',
],
'Test RSS 2.0 with channel link and enclosure from another domain' => [
<<<XML
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<link>http://example.net/tests/</link>
<item>
<link>/tests/1/</link>
<media:content url="http://example.com/images/1.jpg" medium="image"></media:content>
</item>
</channel>
</rss>
XML
,
'http://example.net/tests/1/',
],
'Test RSS 2.0 with Atom channel link and relative enclosure' => [
<<<XML
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<atom:link href="http://example.net/tests/" rel="self" type="application/rss+xml" />
<item>
<link>/tests/2/</link>
<media:content url="/images/2.jpg" medium="image"></media:content>
</item>
</channel>
</rss>
XML
,
'http://example.net/tests/2/',
],
'Test RSS 2.0 with xml:base and enclosure from another domain' => [
<<<XML
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<item>
<link xml:base="http://example.net/tests/">/tests/3/</link>
<media:content url="http://example.com/images/3.jpg" medium="image"></media:content>
</item>
</channel>
</rss>
XML
,
'http://example.net/tests/3/',
],
'Test Atom 1.0 xmlbase 1' => [
<<<EOT
<feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://example.com/">
Expand Down

0 comments on commit 76a4f62

Please sign in to comment.