Skip to content

Commit

Permalink
Fix issue #10 : On peut désormais charger les articles complets pour …
Browse files Browse the repository at this point in the history
…des flux incomplets... Nécessite des tests plus approfondis
  • Loading branch information
marienfressinaud committed Nov 6, 2012
1 parent 6dfab0b commit 87ae244
Show file tree
Hide file tree
Showing 7 changed files with 1,788 additions and 5 deletions.
1 change: 1 addition & 0 deletions app/App_FrontController.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public function init () {
}

private function loadLibs () {
require (LIB_PATH . '/lib_simple_html_dom.php');
require (LIB_PATH . '/lib_rss.php');
require (LIB_PATH . '/lib_simplepie.php');
}
Expand Down
18 changes: 15 additions & 3 deletions app/models/Feed.php
Original file line number Diff line number Diff line change
Expand Up @@ -79,21 +79,33 @@ public function load () {
$feed->init ();

$title = $feed->get_title ();
$this->loadEntries ($feed);
$this->_name (!is_null ($title) ? $title : $this->url);
$this->_website ($feed->get_link ());
$this->_description ($feed->get_description ());
$this->loadEntries ($feed);
}
}
private function loadEntries ($feed) {
$entries = array ();

foreach ($feed->get_items () as $item) {
$title = $item->get_title ();
$author = $item->get_author ();
$content = $item->get_content ();
$link = $item->get_permalink ();
$date = strtotime ($item->get_date ());

// Gestion du contenu
// On cherche à récupérer les articles en entier... même si le flux ne le propose pas
$path = get_path ($this->website ());
if ($path) {
try {
$content = get_content_by_parsing ($item->get_permalink (), $path);
} catch (Exception $e) {
$content = $item->get_content ();
}
} else {
$content = $item->get_content ();
}

$entry = new Entry (
$this->id (),
Expand Down
42 changes: 42 additions & 0 deletions lib/lib_rss.php
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,45 @@ function getFeed ($outline, $cat_id) {

return $feed;
}

/*
* Vérifie pour un site donné s'il faut aller parser directement sur le site
* Renvoie le path (id et class html) pour récupérer le contenu, false si pas besoin
* On se base sur une base connue de sites
*/
function get_path ($url) {
$list_sites_parse = include (PUBLIC_PATH . '/data/Sites.array.php');
if (isset ($list_sites_parse[$url])) {
return $list_sites_parse[$url];
} else {
return false;
}
}


/* supprime les trucs inutiles des balises html */
function good_bye_extra ($element) {
$element->style = null;
$element->class = null;
$element->id = null;
}
/* permet de récupérer le contenu d'un article pour un flux qui n'est pas complet */
function get_content_by_parsing ($url, $path) {
$content = new simple_html_dom ();
$content->set_callback ('good_bye_extra');
$ok = $content->load_file ($url);

if ($ok !== false) {
// Le __toString () permet d'écraser le DOM (on n'en a plus besoin)
// une autre solution serait $content->clear () qui vide le dom
$content = $content->find ($path, 0)->__toString ();

if ($content) {
return $content;
} else {
throw new Exception ();
}
} else {
throw new Exception ();
}
}
Loading

0 comments on commit 87ae244

Please sign in to comment.