Skip to content

Commit

Permalink
Modifying ReutersBridge (#5)
Browse files Browse the repository at this point in the history
[ReutersBridge] Add all article from 'Editor\'s Highlight' to the feed, more categories, author name, full article text.
  • Loading branch information
csisoap authored and hollowleviathan committed Oct 8, 2020
1 parent a58b12e commit d4a689d
Showing 1 changed file with 160 additions and 54 deletions.
214 changes: 160 additions & 54 deletions bridges/ReutersBridge.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php
class ReutersBridge extends BridgeAbstract {

class ReutersBridge extends BridgeAbstract
{
const MAINTAINER = 'hollowleviathan, spraynard, csisoap';
const NAME = 'Reuters Bridge';
const URI = 'https://reuters.com/';
Expand All @@ -9,92 +9,198 @@ class ReutersBridge extends BridgeAbstract {
private $feedName = self::NAME;

const ALLOWED_WIREITEM_TYPES = array(
'story'
'story',
'headlines'
);

const ALLOWED_TEMPLATE_TYPES = array(
'story'
);

const PARAMETERS = array(array(
'feed' => array(
'name' => 'News Feed',
'type' => 'list',
'exampleValue' => 'World',
'title' => 'Reuters feed. World, US, Tech...',
'values' => array(
'Tech' => 'tech',
'Wire' => 'wire',
'Health' => 'health',
'Business' => 'business',
'World' => 'world',
'Politics' => 'politics',
'Science' => 'science',
'Energy' => 'energy',
'Aerospace and Defence' => 'aerospace',
'China' => 'china',
'Top News' => 'home/topnews',
'Lifestyle' => 'lifestyle',
'Markets' => 'markets',
'Sports' => 'sports',
'Pic of the Day' => 'pictures', // This has a different configuration than the others.
'USA News' => 'us'
)
),
));

private function getJson($feedname) {
const PARAMETERS = array(
array(
'feed' => array(
'name' => 'News Feed',
'type' => 'list',
'exampleValue' => 'World',
'title' => 'Reuters feed. World, US, Tech...',
'values' => array(
'Tech' => 'tech',
'Wire' => 'wire',
'Health' => 'health',
'Business' => 'business',
'World' => 'world',
'Politics' => 'politics',
'Science' => 'science',
'Lifestyle' => 'life',
'Energy' => 'energy',
'Aerospace and Defence' => 'aerospace',
'China' => 'china',
'Top News' => 'home/topnews',
'Markets' => 'markets',
'Sports' => 'sports',
'Pic of the Day' => 'pictures', // This has a different configuration than the others.
'USA News' => 'us',
),
),
),
);

private function getJson($feedname)
{
$uri = "https://wireapi.reuters.com/v8/feed/rapp/us/tabbar/feeds/$feedname";
$returned_data = getContents($uri);
return json_decode($returned_data, true);
}

public function getName() {
public function getName()
{
return $this->feedName;
}

public function collectData() {
$feed = $this->getInput('feed');
$data = $this->getJson($feed);
$reuters_wireitems = $data['wireitems'];
$this->feedName = $data['wire_name'] . ' | Reuters';
private function processData($data)
{
/**
* Gets a list of wire items which are groups of templates
*/
$reuters_allowed_wireitems = array_filter(
$reuters_wireitems, function ($wireitem) {
return in_array($wireitem['wireitem_type'], self::ALLOWED_WIREITEM_TYPES);
$data, function ($wireitem) {
return in_array(
$wireitem['wireitem_type'],
self::ALLOWED_WIREITEM_TYPES
);
}
);

/**
* Gets a list of "Templates", which is data containing a story
*/
/*
* Gets a list of "Templates", which is data containing a story
*/
$reuters_wireitem_templates = array_reduce(
$reuters_allowed_wireitems, function (array $carry, array $wireitem) {
$reuters_allowed_wireitems,
function (array $carry, array $wireitem) {
$wireitem_templates = $wireitem['templates'];
return array_merge(
$carry, array_filter(
$wireitem_templates, function (array $template_data) {
return in_array($template_data['type'], self::ALLOWED_TEMPLATE_TYPES);
$carry,
array_filter(
$wireitem_templates, function (
array $template_data
) {
return in_array(
$template_data['type'],
self::ALLOWED_TEMPLATE_TYPES
);
}
)
);
}, array()
},
array()
);

return $reuters_wireitem_templates;
}

private function getArticle($feed_uri)
{
// This will make another request to API to get full detail of article and author's name.
$uri = "https://wireapi.reuters.com/v8$feed_uri";
$data = getContents($uri);
$process_data = json_decode($data, true);
$reuters_wireitems = $process_data['wireitems'];
$processedData = $this->processData($reuters_wireitems);

$first = reset($processedData);
$article_content = $first['story']['body_items'];
$authorlist = $first['story']['authors'];

$author = '';
$counter = 0;
foreach ($authorlist as $data) {
//Formatting author's name.
$counter++;
$name = $data['name'];
if ($counter == count($authorlist)) {
$author = $author . $name;
} else {
$author = $author . "$name, ";
}
}

$description = '';
foreach ($article_content as $content) {
$data = $content['content'];
// This will check whether that content is a image URL or not.
if (strpos($data, '.png') !== false
|| strpos($data, '.jpg') !== false
|| strpos($data, '.PNG') !== false
) {
$description = $description . "<img src=\"$data\">";
} else {
if ($content['type'] == 'inline_items') {
//Fix issue with some content included brand name or company name.
$item_list = $content['items'];
$description = $description . '<p>';
foreach ($item_list as $item) {
$description = $description . $item['content'];
}
$description = $description . '</p>';
} else {
if (strtoupper($data) == $data
|| $content['type'] == 'heading'
) {
//Add heading for any part of content served as header.
$description = $description . "<h3>$data</h3>";
} else {
$description = $description . "<p>$data</p>";
}
}
}
}

$content_detail = array(
'content' => $description,
'author' => $author,
);
return $content_detail;
}

// Check to see if there have Editor's Highlight sections in the first index.
if($reuters_wireitems[0]['wireitem_type'] == 'headlines') {
$top_highlight = $reuters_wireitems[0]['templates'][1]['headlines'];
$reuters_wireitem_templates = array_merge($top_highlight, $reuters_wireitem_templates);
public function collectData()
{
$feed = $this->getInput('feed');
$data = $this->getJson($feed);
$reuters_wireitems = $data['wireitems'];
$this->feedName = $data['wire_name'] . ' | Reuters';
$processedData = $this->processData($reuters_wireitems);

// Merge all articles from Editor's Highlight section into existing array of templates.
$top_section = reset($reuters_wireitems);
if ($top_section['wireitem_type'] == 'headlines') {
$top_articles = $top_section['templates'][1]['headlines'];
$processedData = array_merge($top_articles, $processedData);
}

foreach ($reuters_wireitem_templates as $story) {
$item['content'] = $story['story']['lede'];
foreach ($processedData as $story) {
$item['uid'] = $story['story']['usn'];
$article_uri = $story['template_action']['api_path'];
$content_detail = $this->getArticle($article_uri);
$description = $content_detail['content'];
$author = $content_detail['author'];
$item['author'] = $author;
if (!(bool) $description) {
$description = $story['story']['lede']; // Just in case the content doesn't have anything.
}
// $description = $story['story']['lede'];
$image_url = $story['image']['url'];
if (!(bool) $image_url) {
// $image_url =
// 'https://s4.reutersmedia.net/resources_v2/images/rcom-default.png'; //Just in case if there aren't any pictures.
$item['content'] = $description;
} else {
$item['content'] = "<img src=\"$image_url\"> \n
$description";
}
$item['title'] = $story['story']['hed'];
$item['timestamp'] = $story['story']['updated_at'];
$item['uri'] = $story['template_action']['url'];

$this->items[] = $item;
}
}
Expand Down

0 comments on commit d4a689d

Please sign in to comment.