Skip to content

Commit

Permalink
Updated scrapper to work with site changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
Dustin Holden committed Sep 27, 2015
1 parent a190759 commit b6adbef
Showing 1 changed file with 36 additions and 22 deletions.
58 changes: 36 additions & 22 deletions scraper.php
@@ -1,34 +1,48 @@
<?php
<?PHP
require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
$maxCells = 6;
$minCapacity = 0;
$maxCapacity = 99999;

for ($cells = 1; $cells <= $maxCells; $cells++)
{
print "Retrieving Lipo list for $cells cells\n";
function _log($str) { echo $str."\n"; }

/* Sanity - Max number of pages to scrape. As of 09/27/2015 there is only 49 pages. */
$max_pages = 100;

for($page = 1; $page <= $max_pages; $page++) {
_log('Retrieving Lipo Page: ' . $page);

$url = "http://www.hobbyking.com/hobbyking/store/lithium_polymer_battery_configuration.asp?con=$cells&cap1=$minCapacity&cap2=$maxCapacity&location=INT";
$url = 'http://www.hobbyking.com/hobbyking/store/lipofinderajax.asp?warehouseid=HK&column1=row2&column2=row4' .
'&column3=row3&pCapacityMin=0&pCapacityMax=6000&pCapoverMax=0&pConfig=1&pDischargeMin=0&pDischargeMax=110'.
'&pWeightMin=0&pWeightMax=2000&pAmin=0&pAmax=350&pBmin=0&pBmax=350&pCmin=0&pCmax=350&sqlcount=+top+(20)+&pageNumber=' . $page;

$html = scraperWiki::scrape($url);
//print $html . "\n";

$dom = new simple_html_dom();
$dom->load($html);

$DOM_batteries = $dom->find('table.result td table tbody tr');

// Get table of batteries:
// Each row contains the following (amongst other stuff too):
// <SPAN id="tst11895" onClick="toggle(11895)" style="cursor:pointer">+</SPAN>
// Where the 11895 is the product Id we are looking for.
// So look for span tags that contain 'tst' in the id attribute.
$batteriesTableDom = $dom->find("span[id*=tst]");

foreach ($batteriesTableDom as $data)
{
$id = intval(str_replace("tst", "", $data->id));
scraperwiki::save(array("id"), array( "id" => $id, "cells" => $cells));
//print $id . "\n";
/* Remove first element (The sort row) */
array_shift($DOM_batteries);

foreach($DOM_batteries as $data) {
$id = intval(str_replace('uh_viewItem.asp?idProduct=', '', $data->children(1)->childNodes(0)->getAttribute('href')));
$cells = trim($data->children(2)->plaintext);

scraperwiki::save(['id'], [
'id' => $id,
'cells' => $cells
]);

_log($id);
}

/* Check to see if we're on the last page */
$DOM_pages = $dom->find('.resultPager');
if($page >= intval($DOM_pages[4]->plaintext)) {
_log('Completed');
die();
}
}
?>

_log('Error: Max pages reached!');
die();

0 comments on commit b6adbef

Please sign in to comment.