Skip to content

Commit

Permalink
[MediapartBridge] Fix article parsing
Browse files Browse the repository at this point in the history
* Only process article item, fix issue #1292
  • Loading branch information
killruana authored and teromene committed Sep 16, 2019
1 parent f12f6a2 commit c694810
Showing 1 changed file with 27 additions and 22 deletions.
49 changes: 27 additions & 22 deletions bridges/MediapartBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,34 @@ public function collectData() {
protected function parseItem($newsItem) {
$item = parent::parseItem($newsItem);

// Enable single page mode?
if ($this->getInput('single_page_mode') === true) {
$item['uri'] .= '?onglet=full';
}
// Mediapart provide multiple type of contents.
// We only process items relative to the newspaper
// See issue #1292 - https://github.com/RSS-Bridge/rss-bridge/issues/1292
if (strpos($item['uri'], self::URI . 'journal/') === 0) {
// Enable single page mode?
if ($this->getInput('single_page_mode') === true) {
$item['uri'] .= '?onglet=full';
}

// If a session cookie is defined, get the full article
$mpsessid = $this->getInput('mpsessid');
if (!empty($mpsessid)) {
// Set the session cookie
$opt = array();
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;

// Get the page
$articlePage = getSimpleHTMLDOM(
$newsItem->link . '?onglet=full',
array(),
$opt);

// If a session cookie is defined, get the full article
$mpsessid = $this->getInput('mpsessid');
if (!empty($mpsessid)) {
// Set the session cookie
$opt = array();
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;

// Get the page
$articlePage = getSimpleHTMLDOM(
$newsItem->link . '?onglet=full',
array(),
$opt);

// Extract the article content
$content = $articlePage->find('div.content-article', 0)->innertext;
$content = sanitize($content);
$content = defaultLinkTo($content, static::URI);
$item['content'] .= $content;
// Extract the article content
$content = $articlePage->find('div.content-article', 0)->innertext;
$content = sanitize($content);
$content = defaultLinkTo($content, static::URI);
$item['content'] .= $content;
}
}

return $item;
Expand Down

0 comments on commit c694810

Please sign in to comment.