diff --git a/scraper.php b/scraper.php index af21dc0..0a1472e 100644 --- a/scraper.php +++ b/scraper.php @@ -1,34 +1,48 @@ -load($html); + + $DOM_batteries = $dom->find('table.result td table tbody tr'); - // Get table of batteries: - // Each row contains the following (amongst other stuff too): - // + - // Where the 11895 is the product Id we are looking for. - // So look for span tags that contain 'tst' in the id attribute. - $batteriesTableDom = $dom->find("span[id*=tst]"); - - foreach ($batteriesTableDom as $data) - { - $id = intval(str_replace("tst", "", $data->id)); - scraperwiki::save(array("id"), array( "id" => $id, "cells" => $cells)); - //print $id . "\n"; + /* Remove first element (The sort row) */ + array_shift($DOM_batteries); + + foreach($DOM_batteries as $data) { + $id = intval(str_replace('uh_viewItem.asp?idProduct=', '', $data->children(1)->childNodes(0)->getAttribute('href'))); + $cells = trim($data->children(2)->plaintext); + + scraperwiki::save(['id'], [ + 'id' => $id, + 'cells' => $cells + ]); + + _log($id); } + + /* Check to see if we're on the last page */ + $DOM_pages = $dom->find('.resultPager'); + if($page >= intval($DOM_pages[4]->plaintext)) { + _log('Completed'); + die(); + } } -?> \ No newline at end of file + +_log('Error: Max pages reached!'); +die(); \ No newline at end of file