Skip to content

Commit

Permalink
Fixed ScrapeAppReviews failing to parse review totals over 1,000.
Browse files Browse the repository at this point in the history
  • Loading branch information
Bilge committed Jan 12, 2020
1 parent dd0f24a commit 1b3bd2c
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 16 deletions.
25 changes: 12 additions & 13 deletions src/Resource/ScrapeAppReviews.php
Expand Up @@ -28,7 +28,7 @@ final class ScrapeAppReviews implements AsyncResource, Url
'filter_offtopic_activity' => 0,
'start_date' => -1,
'end_date' => -1,
// Must be set to "include" otherwise start/end date are ignored.
// Must be set to 'include' otherwise start/end date are ignored.
'date_range_type' => 'include',
];

Expand All @@ -50,7 +50,7 @@ public function fetchAsync(ImportConnector $connector): Iterator

return new AsyncGameReviewsRecords(
new Producer(function (\Closure $callable) use ($connector, $total): \Generator {
while (true) {
do {
/** @var HttpResponse $response */
$response = yield $connector->fetchAsync(new AsyncHttpDataSource($this->getUrl()));

Expand All @@ -60,23 +60,22 @@ public function fetchAsync(ImportConnector $connector): Iterator

$json = json_decode($response->getBody(), true);

// Stop condition is an empty recommendation list. This is quicker and easier than parsing HTML.
if (!$json['recommendationids']) {
break;
}

if (isset($json['review_score'])) {
$total->resolve($this->parseResultsTotal($json['review_score']));
}

$reviews = GameReviewsParser::parse(new Crawler($json['html']));
if ($json['recommendationids']) {
$reviews = GameReviewsParser::parse(new Crawler($json['html']));

foreach ($reviews as $review) {
yield $callable($review);
foreach ($reviews as $review) {
yield $callable($review);
}
}

$this->query['cursor'] = $json['cursor'];
}

// Stop condition is an empty recommendation list. This is quicker and easier than parsing HTML.
} while ($json['recommendationids']);
}),
$total->promise(),
$this
Expand All @@ -90,8 +89,8 @@ public function getUrl(): string

private function parseResultsTotal(string $reviewScore): int
{
if (preg_match('[<b>(\\d+)</b>]', $reviewScore, $matches)) {
return (int)$matches[1];
if (preg_match('[<b>([\\d,]+)</b>]', $reviewScore, $matches)) {
return (int)strtr($matches[1], [',' => '']);
}

throw new ParserException('Failed to parse results total.');
Expand Down
38 changes: 35 additions & 3 deletions test/Functional/ScrapeAppReviewsTest.php
Expand Up @@ -26,7 +26,21 @@ protected function setUp(): void
}

/**
* @see https://store.steampowered.com/app/719070/BlowOut/
* @link https://store.steampowered.com/app/1150670/Sorcerer_Lord/
*/
public function testZeroReviews(): \Generator
{
$reviews = $this->porter->importAsync(new AsyncImportSpecification(new ScrapeAppReviews(1150670)))
->findFirstCollection();

// Exceptions will be masked if we don't advance first.
self::assertFalse(yield $reviews->advance());

self::assertSame(0, yield $reviews->getTotal());
}

/**
* @link https://store.steampowered.com/app/719070/BlowOut/
*/
public function testOnePage(): \Generator
{
Expand All @@ -52,7 +66,7 @@ public function testOnePage(): \Generator
/**
* Tests that an app with two review pages is parsed correctly.
*
* @see https://store.steampowered.com/app/614770/Beachhead_DESERT_WAR/
* @link https://store.steampowered.com/app/614770/Beachhead_DESERT_WAR/
*/
public function testTwoPages(): \Generator
{
Expand All @@ -78,7 +92,7 @@ public function testTwoPages(): \Generator
/**
* Tests that an app with multiple review pages is parsed correctly.
*
* @see https://store.steampowered.com/app/302160/The_Egyptian_Prophecy_The_Fate_of_Ramses/
* @link https://store.steampowered.com/app/302160/The_Egyptian_Prophecy_The_Fate_of_Ramses/
*/
public function testMultiplePages(): \Generator
{
Expand Down Expand Up @@ -116,6 +130,24 @@ public function testDateRange(): \Generator
self::assertFalse(yield $reviews->advance());
}

/**
* Tests that an app with a large total number of review is parsed successfully.
*
* @link https://store.steampowered.com/app/730/CounterStrike_Global_Offensive/
*/
public function testLargeTotal(): \Generator
{
/** @var AsyncGameReviewsRecords $reviews */
$reviews = $this->porter->importAsync(new AsyncImportSpecification(
new ScrapeAppReviews(730)
))->findFirstCollection();

// Exceptions will be masked if we don't advance first.
self::assertTrue(yield $reviews->advance());

self::assertGreaterThan(3800000, yield $reviews->getTotal());
}

private static function assertLooksLikeReview(array $review): void
{
self::assertArrayHasKey('review_id', $review);
Expand Down

0 comments on commit 1b3bd2c

Please sign in to comment.