[PcGamerBridge] Use meta tags to generate feed contents (#2271)

RSS-Bridge · Oct 19, 2021 · 8f98e07 · 8f98e07
1 parent 8d0fc54
commit 8f98e07
Showing 1 changed file with 18 additions and 28 deletions.
diff --git a/bridges/PcGamerBridge.php b/bridges/PcGamerBridge.php
@@ -2,43 +2,33 @@
 class PcGamerBridge extends BridgeAbstract
 {
 	const NAME = 'PC Gamer';
-	const URI = 'https://www.pcgamer.com/archive/';
-	const DESCRIPTION = 'PC Gamer Most Read Stories';
-	const CACHE_TIMEOUT = 3600;
+	const URI = 'https://www.pcgamer.com/';
+	const DESCRIPTION = 'PC Gamer is your source for exclusive reviews, demos, 
+		updates and news on all your favorite PC gaming franchises.';
 	const MAINTAINER = 'IceWreck, mdemoss';
 
 	public function collectData()
 	{
 		$html = getSimpleHTMLDOMCached($this->getURI(), 300);
-		$stories = $html->find('ul.basic-list li.day-article');
-		$i = 0;
-		// Find induvidual stories in the archive page
+		$stories = $html->find('a.article-link');
 		foreach ($stories as $element) {
-			if($i == 15) break;
-			$item['uri'] = $element->find('a', 0)->href;
-			// error_log(print_r($item['uri'], TRUE));
+			$item = array();
+			$item['uri'] = $element->href;
 			$articleHtml = getSimpleHTMLDOMCached($item['uri']);
-			$item['title'] = $element->find('a', 0)->plaintext;
-			$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
-			$item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext;
-
-			// Get the article content
-			$articleContents = $articleHtml->find('#article-body', 0);
 
-			/*
-				By default the img src has a link to an error image and then the actual image
-				is added in by JS. So we replace the error image with the actual full size image
-				whoose link is in one of the attributes of the img tag
-			*/
-			foreach($articleContents->find('img') as $img) {
-				$imgsrc = $img->getAttribute('data-original-mos');
-				// error_log($imgsrc);
-				$img->src = $imgsrc;
-			}
-
-			$item['content'] = $articleContents;
+			// Relying on meta tags ought to be more reliable.
+			$item['title'] = $articleHtml->find('meta[name=parsely-title]', 0)->content;
+			$item['content'] = html_entity_decode($articleHtml->find('meta[name=description]', 0)->content);
+			$item['author'] = $articleHtml->find('meta[name=parsely-author]', 0)->content;
+			$item['enclosures'][] = $articleHtml->find('meta[name=parsely-image-url]', 0)->content;
+			/* I don't know why every article has two extra tags, but because
+			one matches another common tag, "guide," it needs to be removed. */
+			$item['categories'] = array_diff(
+				explode(',', $articleHtml->find('meta[name=parsely-tags]', 0)->content),
+				array('van_buying_guide_progressive', 'serversidehawk')
+			);
+			$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
 			$this->items[] = $item;
-			$i++;
 		}
 	}
 }