From 7dea53055bf188cba40f6527b0997db3b03b3aa0 Mon Sep 17 00:00:00 2001 From: Kaishiyoku Date: Wed, 23 May 2018 15:41:38 +0200 Subject: [PATCH] closed #4 fixed rating crawl logic --- app/Console/Commands/CrawlImages.php | 35 +++++++++++++------ .../2018_05_13_165545_create_images_table.php | 7 +--- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/app/Console/Commands/CrawlImages.php b/app/Console/Commands/CrawlImages.php index 36ac460..2a66fff 100644 --- a/app/Console/Commands/CrawlImages.php +++ b/app/Console/Commands/CrawlImages.php @@ -97,7 +97,18 @@ public function handle() private function getContent($uri) { - return file_get_contents(env('CRAWLER_BASE_URL') . $uri); + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_FRESH_CONNECT, true); + curl_setopt($ch, CURLOPT_URL, env('CRAWLER_BASE_URL') . $uri); + + $content = curl_exec($ch); + + curl_close($ch); + + return $content; } private function getListContent($pageNumber = null) @@ -145,15 +156,17 @@ private function getImages(Collection $uris, $isTest = false) $crawler = new Crawler($this->getContent($uri)); - $rating = collect( - $crawler - ->filterXPath('//table/tr/td') - ->each(function (Crawler $node) { - return $this->replaceNewLines($node->text()); - }) - )->filter(function ($value) { - return in_array($value, $this->getRatings()); - })->first(); + $rating = collect($crawler + ->filterXPath('//table[@class="image_info form"]/tr') + ->each(function (Crawler $node) { + $label = $this->replaceNewLines($node->children()->getNode(0)->textContent); + $value = $this->replaceNewLines($node->children()->getNode(1)->textContent); + + return compact('label', 'value'); + }) + )->filter(function ($item) { + return $item['label'] == 'Rating' && in_array($item['value'], $this->getRatings()); + })->first()['value']; $imageNode = $crawler->filter('img#main_image')->first(); @@ -188,7 +201,7 @@ private function getImages(Collection $uris, $isTest = false) $image->save(); } - $this->verbose(function () use ($externalId) {$this->line(' #' . $externalId);}); + $this->verbose(function () use ($externalId, $rating) {$this->line(' #' . $externalId . '|' . $rating);}); }); } diff --git a/database/migrations/2018_05_13_165545_create_images_table.php b/database/migrations/2018_05_13_165545_create_images_table.php index 7330713..985cf5b 100644 --- a/database/migrations/2018_05_13_165545_create_images_table.php +++ b/database/migrations/2018_05_13_165545_create_images_table.php @@ -16,12 +16,7 @@ public function up() Schema::create('images', function (Blueprint $table) { $table->unsignedInteger('external_id')->unique(); $table->string('url', 1528); - $table->enum('rating', [ - 'Unknown', - 'Safe', - 'Questionable', - 'Explicit', - ]); + $table->enum('rating', explode(',', env('CRAWLER_RATINGS'))); $table->timestamps(); $table->primary('external_id');