Skip to content

Commit

Permalink
update dependencies & fix cs
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinDev committed Jul 17, 2023
1 parent 8b8b8c4 commit beaf2c8
Show file tree
Hide file tree
Showing 19 changed files with 69 additions and 57 deletions.
30 changes: 12 additions & 18 deletions .github/workflows/split-monorepo.yaml
Expand Up @@ -18,18 +18,12 @@ jobs:
fail-fast: false
matrix:
package:
-
package: 'crawler'
-
package: 'curl'
-
package: 'extractor'
-
package: 'google'
-
package: 'google-spreedsheet-seo-scraper'
-
package: 'text-analyzer'
- crawler
- curl
- extractor
- google
- google-spreedsheet-seo-scraper
- text-analyzer

steps:
-
Expand All @@ -39,29 +33,29 @@ jobs:
-
if: "!startsWith(github.ref, 'refs/tags/')"
# Uses an action in the root directory
name: Monorepo Split of ${{ matrix.package.package }}
name: Monorepo Split of ${{ matrix.package }}
uses: danharrin/monorepo-split-github-action@v2.3.0
env:
GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN }}
with:
package_directory: 'packages/${{ matrix.package.package }}'
package_directory: 'packages/${{ matrix.package }}'
repository_organization: 'piedweb'
repository_name: '${{ matrix.package.package }}'
repository_name: '${{ matrix.package }}'
user_name: "GitHub Action"
user_email: "action@github.com"

# with tag
-
if: "startsWith(github.ref, 'refs/tags/')"
# Uses an action in the root directory
name: Monorepo Tagged Split of ${{ matrix.package.package }}
name: Monorepo Tagged Split of ${{ matrix.package }}
uses: danharrin/monorepo-split-github-action@v2.3.0
env:
GITHUB_TOKEN: ${{ secrets.ACCESS_TOKEN }}
with:
tag: ${GITHUB_REF#refs/tags/}
package_directory: 'packages/${{ matrix.package.package }}'
package_directory: 'packages/${{ matrix.package }}'
repository_organization: 'piedweb'
repository_name: '${{ matrix.package.package }}'
repository_name: '${{ matrix.package }}'
user_name: "GitHub Action"
user_email: "action@github.com"
11 changes: 6 additions & 5 deletions composer.json
Expand Up @@ -18,7 +18,7 @@
"symfony/cache": "^6.1",
"symfony/console": "^6.1",
"symfony/css-selector": "^6.1",
"symfony/dom-crawler": "^6.2",
"symfony/dom-crawler": "^6.3",
"symfony/serializer": "^6.2",
"thecodingmachine/safe": "^2.1",
"vierbergenlars/php-semver": "^3.0.3",
Expand Down Expand Up @@ -49,7 +49,8 @@
"PiedWeb\\GoogleSpreadsheetSeoScraper\\": "packages/google-spreadsheet-seo-scraper/src",
"PiedWeb\\Extractor\\": "packages/extractor/src",
"PiedWeb\\Crawler\\": "packages/crawler/src",
"PiedWeb\\TextAnalyzer\\": "packages/text-analyzer/src"
"PiedWeb\\TextAnalyzer\\": "packages/text-analyzer/src",
"PiedWeb\\Perso\\": "packages/perso"
}
},
"autoload-dev": {
Expand All @@ -63,11 +64,11 @@
},
"scripts": {
"stan": "vendor/bin/phpstan analyze packages/*/src",
"format": "vendor/bin/php-cs-fixer fix && vendor/bin/php-cs-fixer fix rector.php",
"psalm": "vendor/bin/psalm --no-suggestions --no-cache",
"rector": "vendor/bin/rector process",
"test": "vendor/bin/phpunit --stop-on-failure",
"test-google": "vendor/bin/phpunit --stop-on-failure --testsuite google",
"format": "vendor/bin/php-cs-fixer fix && vendor/bin/php-cs-fixer fix rector.php"
"test-google": "vendor/bin/phpunit --stop-on-failure --testsuite google"
},
"config": {
"sort-packages": true,
Expand All @@ -80,7 +81,7 @@
"repositories": [
{
"type": "vcs",
"url": "https://github.com/zoonru/puphpeteer.git"
"url": "https://github.com/RobinDev/puphpeteer.git"
},
{
"type": "vcs",
Expand Down
5 changes: 4 additions & 1 deletion package.json
@@ -1,5 +1,8 @@
{
"dependencies": {
"@nesk/puphpeteer": "https://github.com/zoonru/puphpeteer"
"@nesk/puphpeteer": "https://github.com/zoonru/puphpeteer",
"puppeteer": "^20",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
}
}
6 changes: 3 additions & 3 deletions packages/crawler/src/Crawler.php
Expand Up @@ -27,7 +27,7 @@ final class Crawler

private bool $nothingUpdated = true;

public readonly CrawlerConfig $config;
public readonly CrawlerConfig $config;

public function __construct(
CrawlerConfig|string $config,
Expand All @@ -39,7 +39,7 @@ public function __construct(
public static function continue(
string $id,
bool $debug = true,
?string $dataDirectory = null
string $dataDirectory = null
): self {
$config = CrawlerConfig::loadFrom($id, $dataDirectory);
$current = new self($config, $debug);
Expand All @@ -56,7 +56,7 @@ public static function restart(
string $id,
bool $fromCache = false,
bool $debug = true,
?string $dataDirectory = null
string $dataDirectory = null
): self {
$config = CrawlerConfig::loadFrom($id, $dataDirectory);
$current = new self($config, $debug);
Expand Down
16 changes: 8 additions & 8 deletions packages/crawler/src/CrawlerConfig.php
Expand Up @@ -42,12 +42,12 @@ final class CrawlerConfig
*/
public function __construct(
public readonly int $depthLimit = 0,
?string $userAgent = null,
?int $cacheMethod = null,
?int $sleepBetweenReqInMs = null, // ms
?string $virtualRobotsTxtRules = null,
?array $toHarvest = null,
?string $dataDirectory = null,
string $userAgent = null,
int $cacheMethod = null,
int $sleepBetweenReqInMs = null, // ms
string $virtualRobotsTxtRules = null,
array $toHarvest = null,
string $dataDirectory = null,
public readonly int $autosave = 500 // number of Urls we can crawled before saving (0 = autosaving disabled)
) {
$this->userAgent = $userAgent ?? 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)';
Expand Down Expand Up @@ -78,7 +78,7 @@ public function getStartUrl(): UrlManipuler
return $this->startUrl;
}

public static function dataDirectory(?string $dataDirectory = null): string
public static function dataDirectory(string $dataDirectory = null): string
{
$dataDirectory = (string) $dataDirectory;

Expand Down Expand Up @@ -110,7 +110,7 @@ public static function getLastCrawl(string $dataDirectory): string
return $lastCrawl;
}

public static function loadFrom(string $crawlId, ?string $dataDirectory = null): self
public static function loadFrom(string $crawlId, string $dataDirectory = null): self
{
$dataDirectory = self::dataDirectory($dataDirectory);

Expand Down
2 changes: 1 addition & 1 deletion packages/crawler/src/ExtractExternalLinks.php
Expand Up @@ -15,7 +15,7 @@ final class ExtractExternalLinks

public function __construct(
string $id,
?string $dataDirectory = null
string $dataDirectory = null
) {
$this->config = CrawlerConfig::loadFrom($id, $dataDirectory);
$this->dir = $this->config->getDataFolder().'/links';
Expand Down
2 changes: 1 addition & 1 deletion packages/crawler/src/LinksVisualizer.php
Expand Up @@ -16,7 +16,7 @@ class LinksVisualizer
*/
protected array $results = ['nodes' => [], 'links' => []];

public function __construct(string $id, ?string $dataDirectory = null)
public function __construct(string $id, string $dataDirectory = null)
{
$this->config = CrawlerConfig::loadFrom($id, $dataDirectory);

Expand Down
2 changes: 1 addition & 1 deletion packages/crawler/src/SimplePageRankCalculator.php
Expand Up @@ -32,7 +32,7 @@ final class SimplePageRankCalculator

private float $dampingFactor = 0.85;

public function __construct(string $id, ?string $dataDirectory = null)
public function __construct(string $id, string $dataDirectory = null)
{
$this->config = CrawlerConfig::loadFrom($id, $dataDirectory);

Expand Down
4 changes: 2 additions & 2 deletions packages/curl/src/Client.php
Expand Up @@ -14,7 +14,7 @@ class Client

protected Response $response;

public function __construct(?string $target = null)
public function __construct(string $target = null)
{
if (null !== $target) {
$this->setTarget($target);
Expand Down Expand Up @@ -133,7 +133,7 @@ public function resetError(): void
$this->errorMessage = '';
}

public function request(?string $target = null): bool
public function request(string $target = null): bool
{
if (null !== $target) {
$this->setTarget($target);
Expand Down
4 changes: 2 additions & 2 deletions packages/curl/src/ExtendedClient.php
Expand Up @@ -13,7 +13,7 @@ class ExtendedClient extends Client
*/
final public const DEFAULT_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.104 Safari/537.36';

private bool $fakeBrowserHeader = false;
private bool $fakeBrowserHeader = false;

public ?string $referer = null;

Expand Down Expand Up @@ -262,7 +262,7 @@ public function checkHeader(\CurlHandle $handle, string $line): int
/**
* Execute the request.
*/
public function request(?string $target = null, bool $updateRefererAndCookies = true): bool
public function request(string $target = null, bool $updateRefererAndCookies = true): bool
{
if ($this->fakeBrowserHeader) {
$this->setBrowserHeader();
Expand Down
2 changes: 1 addition & 1 deletion packages/curl/src/Response.php
Expand Up @@ -146,7 +146,7 @@ public function getCookies(): ?string
*
* @return int|string|array<string, string|int>|null
*/
public function getInfo(?string $key = null): int|string|array|null
public function getInfo(string $key = null): int|string|array|null
{
return null !== $key && '' !== $key ? ($this->info[$key] ?? null) : $this->info;

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.170214 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.170214 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.001994 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.016137 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.001849 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.001773 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.002576 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.001974 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.002301 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.006009 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.306656 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.306656 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.011895 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.087053 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.006513 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.004231 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.002461 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.002843 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.006843 to int loses precision

Check warning on line 151 in packages/curl/src/Response.php

View workflow job for this annotation

GitHub Actions / P8.1 - prefer-stable - ubuntu-latest

Implicit conversion from float 0.004687 to int loses precision
}
Expand Down
2 changes: 1 addition & 1 deletion packages/curl/src/ResponseFromCache.php
Expand Up @@ -12,7 +12,7 @@ class ResponseFromCache extends Response
*/
public function __construct(
string $filePathOrContent,
?string $url = null,
string $url = null,
array $info = [],
mixed $headersSeparator = \PHP_EOL.\PHP_EOL
) {
Expand Down
2 changes: 1 addition & 1 deletion packages/extractor/src/Link.php
Expand Up @@ -64,7 +64,7 @@ public static function initialize(
string $url,
Url $parentUrl,
bool $parentMayFollow = true,
?\DOMElement $element = null
\DOMElement $element = null
): self {
$self = new self();
$self->element = $element;
Expand Down
5 changes: 4 additions & 1 deletion packages/google/package.json
@@ -1,5 +1,8 @@
{
"dependencies": {
"@nesk/puphpeteer": "https://github.com/zoonru/puphpeteer.git"
"@nesk/puphpeteer": "https://github.com/zoonru/puphpeteer.git",
"puppeteer": "^20",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2"
}
}
2 changes: 1 addition & 1 deletion packages/google/src/Extractor/SERPExtractor.php
Expand Up @@ -307,7 +307,7 @@ public function getRelatedSearches(): array
/**
* @param string[] $xpaths
*/
public function exists(array $xpaths, ?\DOMNode &$node = null): bool
public function exists(array $xpaths, \DOMNode &$node = null): bool
{
try {
$node = $this->getNode($xpaths);
Expand Down
12 changes: 7 additions & 5 deletions packages/google/src/GoogleRequester.php
Expand Up @@ -37,7 +37,7 @@ public function getPuppeteerClient(): Puphpeteer
return $this->puppeteerClient;
}

public function requestGoogleWithCurl(GoogleSERPManager $Google, ?callable $manageProxy = null): string
public function requestGoogleWithCurl(GoogleSERPManager $Google, callable $manageProxy = null): string
{
$this->getCurlClient()->setLanguage($Google->language.';q=0.9');

Expand All @@ -54,11 +54,13 @@ public function requestGoogleWithCurl(GoogleSERPManager $Google, ?callable $mana
return $this->getCurlClient()->getResponse()->getBody();
}

public function requestGoogleWithPuppeteer(GoogleSERPManager $manager, ?callable $manageProxy = null): string
public function requestGoogleWithPuppeteer(GoogleSERPManager $manager, callable $manageProxy = null): string
{
$this->getPuppeteerClient()
->instantiate(Puphpeteer::EMULATE_OPTIONS_MOBILE, $manager->language)
->setCookie('CONSENT', 'YES+', '.google.fr');
$pClient = $this->getPuppeteerClient();

$pClient->instantiate(Puphpeteer::EMULATE_OPTIONS_MOBILE, $manager->language);

$pClient->setCookie('CONSENT', 'YES+', '.google.fr');

if (null !== $manageProxy) {
\call_user_func($manageProxy, $this->getPuppeteerClient());
Expand Down
15 changes: 12 additions & 3 deletions packages/google/src/Helper/Puphpeteer.php
Expand Up @@ -51,6 +51,7 @@ public function getLogger(): PuppeteerLogger
'isLandscape' => false,
],
'userAgent' => 'Mozilla/5.0 (Linux; Android 10; SM-A305N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.210 Mobile Safari/537.36',
'headless' => 'new',
];

/**
Expand Down Expand Up @@ -78,7 +79,7 @@ public function instantiate(
array $emulateOptions = [],
string $language = '',
array $userOptions = [
'headless' => true,
'headless' => 'new',
'slowMo' => 250,
'read_timeout' => 9000,
'idle_timeout' => 9000, ]
Expand All @@ -97,14 +98,23 @@ public function instantiate(
return $this;
}

$userOptions['js_extra'] = "
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
instruction.setDefaultResource(puppeteer);
";

Logger::log('launching new Puppeteer instance `'.self::$currentKey.'`');

self::$puppeteer[self::$currentKey] = new Puppeteer($userOptions);
self::$browser[self::$currentKey] = self::$puppeteer[self::$currentKey]->launch(
array_merge(
[] !== $emulateOptions ? $emulateOptions : self::EMULATE_OPTIONS_MOBILE,
// ['executablePath' => '/snap/bin/chromium',]
)
);

self::$browserPage[self::$currentKey] = $this->getBrowserPage(true);
self::$browserPage[self::$currentKey]->emulate([] !== $emulateOptions ? $emulateOptions : self::EMULATE_OPTIONS_MOBILE);

Expand All @@ -131,8 +141,7 @@ public function getBrowserPage(bool $new = false): Page
if (self::$browserPage[self::$currentKey]::class === BasicResource::class) {
dump($new);
dump(self::$browser[self::$currentKey]::class);
dump(self::$browserPage[self::$currentKey]::class);
// self::$browserPage[self::$currentKey] = self::$browserPage[self::$currentKey]->newPage();
// self::$browserPage[self::$currentKey] = self::$browser[self::$currentKey]->newPage();
dd(self::$browserPage[self::$currentKey]::class);
}

Expand Down
2 changes: 1 addition & 1 deletion packages/google/tests/GoogleSerpTest.php
Expand Up @@ -34,7 +34,7 @@ public function testPuphpeteerMobile(): void
$manager = $this->getSerpManager();

$googleRequester = new GoogleRequester();
$rawHtml = $manager->getCache() ?? $manager->setCache($googleRequester->requestGoogleWithPuppeteer($manager));
$rawHtml = $googleRequester->requestGoogleWithPuppeteer($manager); // $manager->getCache() ?? $manager->setCache($googleRequester->requestGoogleWithPuppeteer($manager));
file_put_contents('debug.html', $rawHtml);
$googleRequester->getPuppeteerClient()->getBrowserPage()->screenshot(['path' => 'debug.png']);

Expand Down
2 changes: 1 addition & 1 deletion packages/text-analyzer/src/Analysis.php
Expand Up @@ -34,7 +34,7 @@ public function getWordNumber(): int
/**
* @return array<string, int>
*/
public function getExpressions(?int $minFound = null): array
public function getExpressions(int $minFound = null): array
{
return $minFound ? array_filter(
$this->getExpressions(),
Expand Down

0 comments on commit beaf2c8

Please sign in to comment.