Permalink
Browse files

moved method from crawler to webpage for higher cohesion

  • Loading branch information...
1 parent 7ae7957 commit 05575b263144bd4d81a246bcf8ebd00846598c32 @gooh gooh committed Mar 19, 2012
Showing with 21 additions and 15 deletions.
  1. +11 −14 src/app/mvc/model/ChatSearch/Crawler.php
  2. +10 −1 src/app/mvc/model/ChatSearch/Webpage.php
View
25 src/app/mvc/model/ChatSearch/Crawler.php
@@ -7,6 +7,11 @@ class Crawler
protected $webpage;
/**
+ * @var integer
+ */
+ protected $maxScrapes = 25;
+
+ /**
* @return void
*/
public function __construct(Webpage $chatSearchUrl)
@@ -20,26 +25,18 @@ public function __construct(Webpage $chatSearchUrl)
public function findAllQuestionIds()
{
$allLinks = array();
- $noResultsFound = false;
- while (!$noResultsFound) {
- $this->setUrlToNextPage();
+ $maxScrapes = $this->maxScrapes;
+ do {
+ $this->webpage->setUrlToNextPage();
$links = $this->scrapeCurrentUrlForQuestionIds();
- $noResultsFound = empty($links);
$allLinks = array_merge($allLinks, $links);
- }
+ } while (
+ --$maxScrapes !== 0 && !empty($links)
+ );
return array_unique($allLinks);
}
/**
- * @return void
- */
- protected function setUrlToNextPage()
- {
- $query = $this->webpage->getQuery();
- $query['page'] = $query['page'] + 1;
- }
-
- /**
* @return array
*/
protected function scrapeCurrentUrlForQuestionIds()
View
11 src/app/mvc/model/ChatSearch/Webpage.php
@@ -7,7 +7,7 @@ class Webpage extends Url
protected $siteUrl = 'http://chat.stackoverflow.com/search';
/**
- * @var QueryString
+ * @var array
*/
protected $query = array(
'q' => 'cv-pls',
@@ -24,4 +24,13 @@ public function __construct()
parent::__construct($this->siteUrl);
$this->setQuery(new QueryString($this->query));
}
+
+ /**
+ * @return void
+ */
+ public function setUrlToNextPage()
+ {
+ $query = $this->getQuery();
+ $query['page'] = $query['page'] + 1;
+ }
}

0 comments on commit 05575b2

Please sign in to comment.