Skip to content

Commit

Permalink
[PcGamerBridge] Use meta tags to generate feed contents (#2271)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdemoss committed Oct 19, 2021
1 parent 8d0fc54 commit 8f98e07
Showing 1 changed file with 18 additions and 28 deletions.
46 changes: 18 additions & 28 deletions bridges/PcGamerBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,33 @@
class PcGamerBridge extends BridgeAbstract
{
const NAME = 'PC Gamer';
const URI = 'https://www.pcgamer.com/archive/';
const DESCRIPTION = 'PC Gamer Most Read Stories';
const CACHE_TIMEOUT = 3600;
const URI = 'https://www.pcgamer.com/';
const DESCRIPTION = 'PC Gamer is your source for exclusive reviews, demos,
updates and news on all your favorite PC gaming franchises.';
const MAINTAINER = 'IceWreck, mdemoss';

public function collectData()
{
$html = getSimpleHTMLDOMCached($this->getURI(), 300);
$stories = $html->find('ul.basic-list li.day-article');
$i = 0;
// Find induvidual stories in the archive page
$stories = $html->find('a.article-link');
foreach ($stories as $element) {
if($i == 15) break;
$item['uri'] = $element->find('a', 0)->href;
// error_log(print_r($item['uri'], TRUE));
$item = array();
$item['uri'] = $element->href;
$articleHtml = getSimpleHTMLDOMCached($item['uri']);
$item['title'] = $element->find('a', 0)->plaintext;
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
$item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext;

// Get the article content
$articleContents = $articleHtml->find('#article-body', 0);

/*
By default the img src has a link to an error image and then the actual image
is added in by JS. So we replace the error image with the actual full size image
whoose link is in one of the attributes of the img tag
*/
foreach($articleContents->find('img') as $img) {
$imgsrc = $img->getAttribute('data-original-mos');
// error_log($imgsrc);
$img->src = $imgsrc;
}

$item['content'] = $articleContents;
// Relying on meta tags ought to be more reliable.
$item['title'] = $articleHtml->find('meta[name=parsely-title]', 0)->content;
$item['content'] = html_entity_decode($articleHtml->find('meta[name=description]', 0)->content);
$item['author'] = $articleHtml->find('meta[name=parsely-author]', 0)->content;
$item['enclosures'][] = $articleHtml->find('meta[name=parsely-image-url]', 0)->content;
/* I don't know why every article has two extra tags, but because
one matches another common tag, "guide," it needs to be removed. */
$item['categories'] = array_diff(
explode(',', $articleHtml->find('meta[name=parsely-tags]', 0)->content),
array('van_buying_guide_progressive', 'serversidehawk')
);
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
$this->items[] = $item;
$i++;
}
}
}

0 comments on commit 8f98e07

Please sign in to comment.