Skip to content
This repository has been archived by the owner on Nov 3, 2020. It is now read-only.

Debug mode, fix exclusion, better testing & code style adjustments #10

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions composer.json
Original file line number Diff line number Diff line change
@@ -1,20 +1,8 @@
{
"name": "juhlinus/depictr",
"description": "A middleware for rendering static pages when crawled by search engines.",
"type": "library",
"license": "MIT",
"autoload": {
"psr-4": {
"Depictr\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"Depictr\\Tests\\": "tests/"
}
},
"require-dev": {
"orchestra/testbench": "^5.1"
},
"authors": [
{
"name": "Linus Juhlin",
Expand All @@ -25,6 +13,19 @@
"php": "^7.4",
"symfony/panther": "^1.0@dev"
},
"require-dev": {
"orchestra/testbench": "^5.1"
},
"autoload": {
"psr-4": {
"Depictr\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"Depictr\\Tests\\": "tests/"
}
},
"extra": {
"laravel": {
"providers": [
Expand Down
33 changes: 28 additions & 5 deletions config/depictr.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
<?php

return [

/*
|--------------------------------------------------------------------------
| Depictr Debug Mode
|--------------------------------------------------------------------------
|
| When Depictr is in debug mode, all included endpoints are rendered
| as-if the request is coming from a crawler. When disabled, this
| only happens for the crawlers you've defined.
|
*/

'debug' => env('DEPICTR_DEBUG', false),

/*
|--------------------------------------------------------------------------
| Crawlers
Expand All @@ -11,7 +25,9 @@
| should be returned as static html or not.
|
*/

'crawlers' => [

/*
|--------------------------------------------------------------------------
| Search engines
Expand All @@ -22,6 +38,7 @@
| SEO.
|
*/

'googlebot', // Google
'duckduckbot', // DuckDuckGo
'bingbot', // Bing
Expand All @@ -38,6 +55,7 @@
| they link to your website on the social network websites.
|
*/

'facebookexternalhit', // Facebook
'twitterbot', // Twitter
'whatsapp', // WhatsApp
Expand All @@ -54,35 +72,40 @@
| generations may gaze upon your craftsmanship.
|
*/

'ia_archiver', // Alexa

],

/*
|--------------------------------------------------------------------------
| Excluded
|--------------------------------------------------------------------------
|
| URLs that should NOT be processed by Depictr. This is useful for plain
| text files like sitemap.txt where Panther will wrap it in a stripped
| URLs that should NOT be processed by Depictr. This is useful for plain
| files such as sitemap.txt where Depictr will wrap it in a stripped
| down HTML file. Uses $request->is(), so using `*` for wildcard
| is permitted. The admin route and its sub-routes have
| been added to showacase the functionality.
| been added to showcase the functionality.
|
*/

'excluded' => [
'admin/*'
'admin/*',
],

/*
|--------------------------------------------------------------------------
| Environments
|--------------------------------------------------------------------------
|
| Which Laravel environments should depictr be active for.
| The application environments on which Depictr should be enabled.
|
*/

'environments' => [
'production',
'testing',
],

];
27 changes: 27 additions & 0 deletions src/Browsers/ChromeBrowser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?php

namespace Depictr\Browsers;

use Depictr\Contracts\Browser;
use Symfony\Component\Panther\Client as PantherClient;
use Throwable;

class ChromeBrowser implements Browser
{
/**
* Renders a HTML page.
*
* @param string $url
* @return string
* @throws Throwable
*/
public function render(string $url): string
{
$client = PantherClient::createChromeClient();
$client->request('GET', $url);

return tap($client->getPageSource(), function () use ($client) {
$client->close();
});
}
}
17 changes: 17 additions & 0 deletions src/Contracts/Browser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

namespace Depictr\Contracts;

use Throwable;

interface Browser
{
/**
* Renders a HTML page.
*
* @param string $url
* @return string
* @throws Throwable
*/
public function render(string $url): string;
}
131 changes: 73 additions & 58 deletions src/Middleware.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,107 +3,122 @@
namespace Depictr;

use Closure;
use RuntimeException;
use Illuminate\Support\Str;
use Depictr\Contracts\Browser;
use Illuminate\Http\Request;
use Illuminate\Support\Facades\Log;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Panther\Client as PantherClient;
use Throwable;

class Middleware
{
/**
* @var Browser
*/
protected $browser;

public function __construct(Browser $browser)
{
$this->browser = $browser;
}

/**
* Handle an incoming request.
*
* @param \Illuminate\Http\Request $request
* @param \Closure $next
* @param Request $request
* @param Closure $next
*
* @return \Symfony\Component\HttpFoundation\Response
* @return Response
*/
public function handle($request, Closure $next): Response
{
if ($this->shouldDepict($request)) {
try {
$response = $this->requestRenderedPage($request->fullUrl());
} catch (RuntimeException $exception) {
return $next($request);
}
if (! $this->shouldDepict($request)) {
return $next($request);
}

return response(
$response['content'],
$response['code'],
);
try {
$contents = $this->browser->render($request->fullUrl());
} catch (Throwable $exception) {
Log::error($exception);

return $next($request);
}

return $next($request);
return new Response(
$contents,
200,
['X-Depicted' => now()->toString()]
);
}

/**
* Returns whether or not the request is made by a search
* engine crawler.
*
* @param \Illuminate\Http\Request $request The request
* @param Request $request
*
* @return boolean
* @return bool
*/
private function shouldDepict(Request $request): bool
protected function shouldDepict(Request $request): bool
{
return app()->environment(config('depictr.environments', []))
&& $this->comesFromCrawler($request)
&& $request->isMethod('GET')
&& ! $request->header('X-Inertia')
&& ! $this->urlIsExcluded($request);
if (! $request->isMethod('GET')
|| $request->header('X-Inertia')
|| $this->isExcluded($request)
) {
return false;
}

if (config('depictr.debug') && ! $request->header('X-Depictr')) {
return true;
}

return $this->environmentEnabled()
&& $this->isFromCrawler($request);
}

/**
* Returns whether not the request is made by a valid crawler.
* Determine whether not the request is made by a valid crawler.
*
* @param \Illuminate\Http\Request $request The request
* @param Request $request
*
* @return boolean
* @return bool
*/
private function comesFromCrawler(Request $request): bool
protected function isFromCrawler(Request $request): bool
{
return ! empty($request->userAgent())
&& Str::contains(
strtolower($request->userAgent()),
config('depictr.crawlers')
);
if (empty($userAgent = $request->userAgent())) {
return false;
}

return collect(config('depictr.crawlers'))
->map(function ($crawler) {
return strtolower($crawler);
})
->contains(strtolower($userAgent));
}

/**
* Renders a HTML page for the search enginie crawler.
* Determine whether the Request is for an excluded page.
*
* @param string $url The url
* @param Request $request
*
* @return array Status code and raw HTML.
* @return bool
*/
private function requestRenderedPage(string $url): array
private function isExcluded(Request $request): bool
{
$client = PantherClient::createChromeClient();
$client->request('GET', $url);

$pageSource = $client->getPageSource();

$client->close();

return [
'content' => $pageSource,
'code' => 200,
];
return $request->is(
...config('depictr.excluded', [])
);
}

/**
* The method returns whether the request is an excluded URL
* or not. \Illuminate\Http\Request::is(...$patterns)
* is used, which allows you to match routes
* using wildcards.
*
* @param \Illuminate\Http\Request $request The request
* Determine whether Depictr is enabled
* for this environment.
*
* @return boolean
* @return bool
*/
private function urlIsExcluded(Request $request): bool
protected function environmentEnabled(): bool
{
return $request->is(config('depictr.excluded', []));
return app()->environment(
config('depictr.environments', [])
);
}
}
Loading