Skip to content

Commit

Permalink
closed #37 fixed a bug where image ratings were not being fetched spo…
Browse files Browse the repository at this point in the history
…radically
  • Loading branch information
Kaishiyoku committed Jul 3, 2019
1 parent 8b7956c commit cd33063
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 52 deletions.
10 changes: 3 additions & 7 deletions app/Console/Commands/CrawlImages.php
Expand Up @@ -120,15 +120,11 @@ private function getImages(Collection $uris)
$externalId = $this->getExternalIdByUri($uri);

$crawler = new Crawler(getAstolfoContent($uri));
$imageInfoFieldValues = getImageInfoFieldValues($crawler);

$imageNode = $crawler->filter('img#main_image')->first();
$imageInfoFieldValues = collect(getImageInfoFieldValues($crawler));

// only save not yet crawled images, update others
$image = Image::find($externalId);

$imageUrl = $imageNode->count() > 0 ? env('CRAWLER_BASE_URL') . $imageNode->attr('src') : null;

$tags = collect($imageInfoFieldValues['tags'])
->reject(function ($value) {
return $value == 'tagme';
Expand All @@ -140,12 +136,12 @@ private function getImages(Collection $uris)

$values = array_merge([
'external_id' => $externalId,
'url' => $imageUrl,
'url' => $imageInfoFieldValues['imageUrl'],
], $imageInfoFieldValues->reject(function ($item, $key) {
return $key == 'tags';
})->toArray());

if ($imageUrl != null) {
if ( $imageInfoFieldValues['imageUrl'] != null) {
if ($image) {
$image->fill($values);
} else {
Expand Down
20 changes: 11 additions & 9 deletions app/Console/Commands/CrawlTest.php
Expand Up @@ -5,6 +5,7 @@
use App\Console\BaseCommand;
use App\Mail\CrawlerTestFailed;
use Illuminate\Support\Facades\Mail;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\DomCrawler\Crawler;

class CrawlTest extends BaseCommand
Expand Down Expand Up @@ -40,22 +41,23 @@ public function __construct()
*/
public function handle()
{
$hasError = false;
$expectedFieldNames = ['views', 'uploader', 'tags', 'source', 'locked', 'parent', 'rating', 'imageUrl'];
$crawler = new Crawler(getAstolfoContent('/post/view/1'));
$imageInfoFieldValues = getImageInfoFieldValues($crawler);

$collectedFields = collect($imageInfoFieldValues)->map(function ($value, $key) {
return [$key];
})->flatten()->toArray();
$fields = getImageInfoFieldValues($crawler);

if ($collectedFields != getImageInfoFields()) {
$hasError = true;
}
$fieldChecks = array_map(function ($fieldName) use ($fields) {
return array_key_exists($fieldName, $fields) && hasValueOrNull($fields[$fieldName]);
}, $expectedFieldNames);

$hasError = !array_reduce($fieldChecks, function ($acc, $value) {
return $acc && $value;
}, true);

if ($hasError) {
$this->logError('Crawler test failed.');

Mail::to(env('CRAWLER_NOTIFICATION_MAIL'))->send(new CrawlerTestFailed($collectedFields, getImageInfoFields()));
Mail::to(env('CRAWLER_NOTIFICATION_MAIL'))->send(new CrawlerTestFailed($fields, getImageInfoFields()));
} else {
$this->logInfo('No errors occurred.');
}
Expand Down
67 changes: 39 additions & 28 deletions app/helpers.php
@@ -1,15 +1,20 @@
<?php

use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\DomCrawler\Crawler;

if (!function_exists('getImageInfoFields')) {
function getImageInfoFields()
{
return [
'views',
'uploader',
'tags',
'source',
'locked',
'parent',
'rating',
'imageUrl',
];
}
}
Expand Down Expand Up @@ -49,39 +54,45 @@ function replaceNewLines($content)
if (!function_exists('getImageInfoFieldValues')) {
function getImageInfoFieldValues(Crawler $crawler)
{
return collect($crawler
->filterXPath('//table[@class="image_info form"]/tr')
->each(function (Crawler $node) {
$label = strtolower(str_replace_first(':', '', replaceNewLines($node->children()->getNode(0)->textContent)));
$value = replaceNewLines($node->children()->getNode(1)->textContent);

return [$label => $value];
})
)->filter(function ($item) {
return in_array(key($item), getImageInfoFields());
})->flatMap(function ($item) {
$key = key($item);
$value = $item[$key];

if ($key == 'tags') {
$item[$key] = explode(' ', strtolower($value));
}

if ($key == 'source' && $value == 'Unknown') {
$item[$key] = null;
}

if (empty($value)) {
return null;
}

return $item;
$converter = new CssSelectorConverter();

$image = $crawler->filterXPath($converter->toXPath('img#main_image, video#main_image source'));
$imageUrl = env('CRAWLER_BASE_URL') . $image->attr('src');

list($views, $uploader, $tags, $source, $locked, $parent, $rating) = $crawler->filterXPath($converter->toXPath('table.image_info tr'))->each(function (Crawler $crawler) {
return trim($crawler->children()->getNode(1)->textContent);
});

$tags = explode(' ', $tags);
$source = $source == 'Unknown' ? null : $source;
$locked = $locked == 'No' ? false : true;
$parent = $parent == 'None' ? null : $parent;

return compact('views', 'uploader', 'tags', 'source', 'locked', 'parent', 'rating', 'imageUrl');
}
}

if (!function_exists('hasValueOrNull')) {
function hasValueOrNull($value)
{
return !empty($value) || $value == null;
}
}

if (!function_exists('toString')) {
function toString($value)
{
if (is_array($value)) {
return implode(', ', $value);
}

return $value;
}
}

if (!function_exists('getSocialMediaLinks')) {
function getSocialMediaLinks() {
function getSocialMediaLinks()
{
$str = env('SOCIAL_MEDIA_LINKS');

$data = empty($str) ? collect() : collect(explode(';', env('SOCIAL_MEDIA_LINKS')))->map(function ($item) {
Expand Down
2 changes: 2 additions & 0 deletions composer.json
Expand Up @@ -8,9 +8,11 @@
"php": "^7.1.3",
"doctrine/dbal": "2.9.2",
"fideloper/proxy": "4.1.0",
"guzzlehttp/guzzle": "^6.3",
"laravel/framework": "5.8.27",
"laravel/tinker": "1.0.8",
"laravelcollective/html": "5.8.0",
"symfony/css-selector": "4.3.2",
"symfony/dom-crawler": "4.3.2",
"webup/laravel-sendinblue": "1.1.4"
},
Expand Down

0 comments on commit cd33063

Please sign in to comment.