From 912bb39ab75529a581a6786f24dec41a63365cb5 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:33:34 +0200 Subject: [PATCH 1/7] Help PHPStan (#2) * Help PHPStan * PHP signature has changed --- composer.json | 2 +- src/Parser.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index d5bd2a6b..f70559be 100644 --- a/composer.json +++ b/composer.json @@ -32,7 +32,7 @@ "require-dev": { "friendsofphp/php-cs-fixer": "^2.19 || ^3.8", "mf2/mf2": "^0.5.0", - "phpstan/phpstan": "^1.10", + "phpstan/phpstan": "^1.11", "phpunit/phpunit": "^8 || ^9 || ^10", "psr/http-client": "^1.0", "psr/http-factory": "^1.0", diff --git a/src/Parser.php b/src/Parser.php index 9c78492a..b7c5a29d 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -144,7 +144,7 @@ public function parse(string &$data, string $encoding, string $url = '') //Parse by chunks not to use too much memory do { $stream_data = fread($stream, 1048576); - if (!xml_parse($xml, $stream_data === false ? '' : $stream_data, feof($stream))) { + if (!xml_parse($xml, $stream_data == false ? '' : $stream_data, feof($stream))) { $this->error_code = xml_get_error_code($xml); $this->error_string = xml_error_string($this->error_code); $return = false; From 97cfaf779aeaa836228324f1b458747edec06f9b Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:38:48 +0200 Subject: [PATCH 2/7] Disable coding style (#3) Because it is not in sync with SimplePie code at all and make all tests fail --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2394040..0663c0a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,8 +35,8 @@ jobs: - name: "Install Composer dependencies" uses: "ramsey/composer-install@v2" - - name: "Check coding style" - run: composer cs + # - name: "Check coding style" + # run: composer cs test: runs-on: ubuntu-latest From b2585b7f35283d3fd813bec5eef9b6d982a5e3a0 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:45:08 +0200 Subject: [PATCH 3/7] Fix SimplePie autodiscovery for text/xml HTML pages (#1) * Fix SimplePie autodiscovery for text/xml HTML pages https://github.com/FreshRSS/FreshRSS/pull/1265 https://github.com/FreshRSS/FreshRSS/issues/1264 * Move comment --- src/Content/Type/Sniffer.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Content/Type/Sniffer.php b/src/Content/Type/Sniffer.php index d1da5fd7..0538b9b5 100644 --- a/src/Content/Type/Sniffer.php +++ b/src/Content/Type/Sniffer.php @@ -78,9 +78,7 @@ public function get_type() if ($official === 'unknown/unknown' || $official === 'application/unknown') { return $this->unknown(); - } elseif (substr($official, -4) === '+xml' - || $official === 'text/xml' - || $official === 'application/xml') { + } elseif (substr($official, -4) === '+xml') { // FreshRSS return $official; } elseif (substr($official, 0, 6) === 'image/') { if ($return = $this->image()) { @@ -88,7 +86,10 @@ public function get_type() } return $official; - } elseif ($official === 'text/html') { + } elseif ($official === 'text/html' + || $official === 'text/xml' // FreshRSS + || $official === 'application/xml' // FreshRSS + ) { return $this->feed_or_html(); } From b5c61d82b4b15a6d26538e99bd588b96636db73d Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 01:51:14 +0200 Subject: [PATCH 4/7] Trim body (#4) https://github.com/FreshRSS/FreshRSS/pull/1143 https://github.com/FreshRSS/FreshRSS/issues/1142 --- src/SimplePie.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SimplePie.php b/src/SimplePie.php index 115a0799..bd129ebb 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2081,6 +2081,7 @@ protected function fetch_data(&$cache) } $this->raw_data = $file->get_body_content(); + $this->raw_data = trim($this->raw_data); $this->permanent_url = $file->get_permanent_uri(); $headers = []; From f792768ec21f6163210b84d25a380adb1f394c77 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 02:27:01 +0200 Subject: [PATCH 5/7] Simplify get_build (#6) That function has too much IO and is not up-to-date https://github.com/FreshRSS/FreshRSS/commit/02d1dac0bb07884b79ddea20980bfcf21131f2d7 --- src/Misc.php | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/Misc.php b/src/Misc.php index d5e07586..145e0d29 100644 --- a/src/Misc.php +++ b/src/Misc.php @@ -2117,28 +2117,8 @@ public static function get_build() return self::$SIMPLEPIE_BUILD; } - $root = dirname(__FILE__, 2); - if (file_exists($root . '/.git/index')) { - self::$SIMPLEPIE_BUILD = filemtime($root . '/.git/index'); - - return self::$SIMPLEPIE_BUILD; - } elseif (file_exists($root . '/SimplePie')) { - $time = 0; - foreach (glob($root . '/SimplePie/*.php') as $file) { - if (($mtime = filemtime($file)) > $time) { - $time = $mtime; - } - } - self::$SIMPLEPIE_BUILD = $time; - - return self::$SIMPLEPIE_BUILD; - } elseif (file_exists(dirname(__FILE__) . '/Core.php')) { - self::$SIMPLEPIE_BUILD = filemtime(dirname(__FILE__) . '/Core.php'); - - return self::$SIMPLEPIE_BUILD; - } - - self::$SIMPLEPIE_BUILD = filemtime(__FILE__); + $mtime = @filemtime(dirname(__FILE__) . '/SimplePie.php'); // FreshRSS + self::$SIMPLEPIE_BUILD = $mtime ?: (filemtime(__FILE__) ?: 0); // FreshRSS return self::$SIMPLEPIE_BUILD; } From d3ee906e2c7cf6ea172d9f7a8c5e96a900a704c8 Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 02:39:00 +0200 Subject: [PATCH 6/7] fix for Atom feeds using namespace for type (#7) https://github.com/FreshRSS/FreshRSS/pull/1893 https://github.com/FreshRSS/FreshRSS/issues/1892 --- src/Misc.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Misc.php b/src/Misc.php index 145e0d29..298ace07 100644 --- a/src/Misc.php +++ b/src/Misc.php @@ -1876,8 +1876,14 @@ public static function atom_10_construct_type(array $attribs) */ public static function atom_10_content_construct_type(array $attribs) { + $type = ''; if (isset($attribs['']['type'])) { - $type = strtolower(trim($attribs['']['type'])); + $type = trim($attribs['']['type']); + } elseif (isset($attribs[\SimplePie\SimplePie::NAMESPACE_ATOM_10]['type'])) { // FreshRSS + $type = trim($attribs[\SimplePie\SimplePie::NAMESPACE_ATOM_10]['type']); + } + if ($type != '') { // FreshRSS + $type = strtolower($type); // FreshRSS switch ($type) { case 'text': return \SimplePie\SimplePie::CONSTRUCT_TEXT; From 2edfc227eec039872260f086a13284d63b45903f Mon Sep 17 00:00:00 2001 From: Alexandre Alapetite Date: Sun, 16 Jun 2024 09:21:32 +0200 Subject: [PATCH 7/7] Fix absolutize URL for several cases (#8) * Fix absolutize URL for several cases There were a number of bugs related to the fact that `Item::get_links()` and `Item::get_base()` call each-other, making a nice mess during initialisation. See tests. Furthermore, the standard Atom `self` link was not supported, wrongly falling back to `alternate`. In the same PR because otherwise the tests from both PRs would fail. * Minor style * Fix PHPStan * Improved comment --- src/Item.php | 56 ++++++++++++++++++++++++------------ src/SimplePie.php | 13 ++++++--- tests/Unit/EnclosureTest.php | 32 +++++++++++++++++++++ tests/Unit/ItemTest.php | 44 ++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 23 deletions(-) diff --git a/src/Item.php b/src/Item.php index 5d75a1f7..1e7a29f0 100644 --- a/src/Item.php +++ b/src/Item.php @@ -117,9 +117,27 @@ public function get_item_tags(string $namespace, string $tag) return null; } + /** + * Get base URL of the item itself. + * Returns `` or feed base URL. + * Similar to `Item::get_base()` but can safely be used during initialisation methods + * such as `Item::get_links()` (`Item::get_base()` and `Item::get_links()` call each-other) + * and is not affected by enclosures. + * + * @param array $element + * @see get_base + */ + protected function get_own_base(array $element = []): string + { + if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { + return $element['xml_base']; + } + return $this->feed->get_base(); + } + /** * Get the base URL value. - * Uses ``, or item link, or feed base URL. + * Uses ``, or item link, or enclosure link, or feed base URL. * * @param array $element * @return string @@ -812,27 +830,27 @@ public function get_links(string $rel = 'alternate') foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_10, 'link') as $link) { if (isset($link['attribs']['']['href'])) { $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; - $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); + $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); } } foreach ((array) $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_ATOM_03, 'link') as $link) { if (isset($link['attribs']['']['href'])) { $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate'; - $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($link)); + $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($link)); } } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_10, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_090, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'link')) { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } if ($links = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_RSS_20, 'guid')) { if (!isset($links[0]['attribs']['']['isPermaLink']) || strtolower(trim($links[0]['attribs']['']['isPermaLink'])) === 'true') { - $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($links[0])); + $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_own_base($links[0])); } } @@ -1199,11 +1217,11 @@ public function get_enclosures() // PLAYER if ($player_parent = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { if (isset($player_parent[0]['attribs']['']['url'])) { - $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); } } elseif ($player_parent = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'player')) { if (isset($player_parent[0]['attribs']['']['url'])) { - $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player_parent = $this->sanitize($player_parent[0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($player_parent[0])); } } @@ -1323,13 +1341,13 @@ public function get_enclosures() if ($thumbnails = $this->get_item_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { foreach ($thumbnails as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } } elseif ($thumbnails = $parent->get_channel_tags(\SimplePie\SimplePie::NAMESPACE_MEDIARSS, 'thumbnail')) { foreach ($thumbnails as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails_parent[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } } @@ -1453,7 +1471,7 @@ public function get_enclosures() if (isset($content['attribs']['']['width'])) { $width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); } - $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); // Checking the other optional media: elements. Priority: media:content, media:group, item, channel @@ -1712,9 +1730,9 @@ public function get_enclosures() // PLAYER if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { - $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); } elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { - $player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])); } else { $player = $player_parent; } @@ -1804,14 +1822,14 @@ public function get_enclosures() // THUMBNAILS if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } if (is_array($thumbnails)) { $thumbnails = array_values(array_unique($thumbnails)); } } elseif (isset($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($group['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } if (is_array($thumbnails)) { $thumbnails = array_values(array_unique($thumbnails)); @@ -1909,7 +1927,7 @@ public function get_enclosures() $width = $this->sanitize($content['attribs']['']['width'], \SimplePie\SimplePie::CONSTRUCT_TEXT); } if (isset($content['attribs']['']['url'])) { - $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $url = $this->sanitize($content['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content)); } // Checking the other optional media: elements. Priority: media:content, media:group, item, channel @@ -2064,7 +2082,7 @@ public function get_enclosures() // PLAYER if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'])) { if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) { - $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $player = $this->sanitize($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['player'][0])); } } else { $player = $player_parent; @@ -2120,7 +2138,7 @@ public function get_enclosures() if (isset($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'])) { foreach ($content['child'][\SimplePie\SimplePie::NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { if (isset($thumbnail['attribs']['']['url'])) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI); + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], \SimplePie\SimplePie::CONSTRUCT_IRI, $this->get_base($thumbnail)); } } if (is_array($thumbnails)) { diff --git a/src/SimplePie.php b/src/SimplePie.php index bd129ebb..7e9d37b3 100644 --- a/src/SimplePie.php +++ b/src/SimplePie.php @@ -2457,8 +2457,9 @@ public function get_image_tags(string $namespace, string $tag) /** * Get the base URL value from the feed * - * Uses `` if available, otherwise uses the first link in the - * feed, or failing that, the URL of the feed itself. + * Uses `` if available, + * otherwise uses the first 'self' link or the first 'alternate' link of the feed, + * or failing that, the URL of the feed itself. * * @see get_link * @see subscribe_url @@ -2470,8 +2471,12 @@ public function get_base(array $element = []) { if (!empty($element['xml_base_explicit']) && isset($element['xml_base'])) { return $element['xml_base']; - } elseif ($this->get_link() !== null) { - return $this->get_link(); + } + if (($link = $this->get_link(0, 'self')) !== null) { + return $link; + } + if (($link = $this->get_link(0, 'alternate')) !== null) { + return $link; } return $this->subscribe_url() ?? ''; diff --git a/tests/Unit/EnclosureTest.php b/tests/Unit/EnclosureTest.php index 25fa9df6..bfd8ea46 100644 --- a/tests/Unit/EnclosureTest.php +++ b/tests/Unit/EnclosureTest.php @@ -88,6 +88,38 @@ public static function getLinkProvider(): iterable , 'http://example.net/link?a=%22b%22&c=%3Cd%3E', ]; + + yield 'Test RSS 2.0 with channel link and enclosure' => [ + << + + http://example.net/tests/ + + /tests/3/ + + + + +XML + , + 'http://example.net/images/3.jpg', + ]; + + yield 'Test RSS 2.0 with Atom channel link and enclosure' => [ + << + + + + /tests/4/ + + + + +XML + , + 'http://example.net/images/4.jpg', + ]; } /** diff --git a/tests/Unit/ItemTest.php b/tests/Unit/ItemTest.php index 458172df..a46abfd9 100644 --- a/tests/Unit/ItemTest.php +++ b/tests/Unit/ItemTest.php @@ -3262,6 +3262,50 @@ public static function getPermalinkDataProvider(): array , 'http://example.com/', ], + 'Test RSS 2.0 with channel link and enclosure from another domain' => [ +<< + + http://example.net/tests/ + + /tests/1/ + + + + +XML + , + 'http://example.net/tests/1/', + ], + 'Test RSS 2.0 with Atom channel link and relative enclosure' => [ +<< + + + + /tests/2/ + + + + +XML + , + 'http://example.net/tests/2/', + ], + 'Test RSS 2.0 with xml:base and enclosure from another domain' => [ +<< + + + /tests/3/ + + + + +XML + , + 'http://example.net/tests/3/', + ], 'Test Atom 1.0 xmlbase 1' => [ <<