Skip to content

Commit

Permalink
Remove unnecessary regexes (#168)
Browse files Browse the repository at this point in the history
* Remove regexes where user agents already contain bot

* Remove regexes already caught with github.com/ regex
  • Loading branch information
MaxGiting committed Jan 26, 2017
1 parent be0d602 commit d2dbf5e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 15 deletions.
16 changes: 1 addition & 15 deletions src/Fixtures/Crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class Crawlers extends AbstractProvider
'Arachmo',
'arachnode',
'Arachnophilia',
'archive-com',
'aria2',
'asafaweb.com',
'AskQuickly',
Expand All @@ -99,7 +98,6 @@ class Crawlers extends AbstractProvider
'BCKLINKS',
'BDFetch',
'BegunAdvertising\/',
'bibnum\.bnf',
'BigBozz',
'biglotron',
'BingLocalSearch',
Expand Down Expand Up @@ -150,7 +148,6 @@ class Crawlers extends AbstractProvider
'contactbigdatafr',
'convera',
'copyright sheriff',
'cosmos\/[0-9]',
'Covario-IDS',
'CrawlForMe\/[0-9]',
'cron-job\.org',
Expand All @@ -159,7 +156,6 @@ class Crawlers extends AbstractProvider
'Curious George',
'curl',
'cuwhois\/[0-9]',
'CyberPatrol',
'cybo\.com',
'DareBoost',
'DataparkSearch',
Expand Down Expand Up @@ -226,7 +222,6 @@ class Crawlers extends AbstractProvider
'Fever\/[0-9]',
'findlink',
'findthatfile',
'Flamingo_SearchEngine',
'FlipboardBrowserProxy',
'FlipboardProxy',
'FlipboardRSS',
Expand All @@ -247,6 +242,7 @@ class Crawlers extends AbstractProvider
'getprismatic\.com',
'GetURLInfo\/[0-9]',
'GigablastOpenSource',
'github\.com\/',
'Go [\d\.]* package http',
'Go-http-client',
'gofetch',
Expand Down Expand Up @@ -400,12 +396,10 @@ class Crawlers extends AbstractProvider
'MailChimp\.com',
'Mandrill',
'marketinggrader',
'masscan\/',
'Mediapartners-Google',
'MegaIndex\.ru',
'Melvil Rawi\/',
'MergeFlow-PageReader',
'MetaInspector',
'Metaspinner',
'MetaURI',
'Microsearch',
Expand Down Expand Up @@ -539,7 +533,6 @@ class Crawlers extends AbstractProvider
'Redirect Checker Tool',
'ReederForMac',
'request\.js',
'resolver\/[0-9]',
'ResponseCodeTest\/[0-9]',
'RestSharp',
'RetrevoPageAnalyzer',
Expand Down Expand Up @@ -571,7 +564,6 @@ class Crawlers extends AbstractProvider
'SEOCentro',
'SeoCheck',
'SeopultContentAnalyzer',
'SEOstats',
'Server Density Service Monitoring',
'servernfo\.com',
'Seznam screenshot-generator',
Expand Down Expand Up @@ -652,11 +644,9 @@ class Crawlers extends AbstractProvider
'Tweetminster',
'Twikle',
'Twingly',
'Typhoeus',
'ubermetrics-technologies',
'uclassify',
'UdmSearch',
'unfurlist',
'UnwindFetchor',
'updated',
'Upflow',
Expand All @@ -672,7 +662,6 @@ class Crawlers extends AbstractProvider
'visionutils',
'vkShare',
'voltron',
'Vortex\/[0-9]',
'voyager\/',
'VSAgent\/[0-9]',
'VSB-TUO\/[0-9]',
Expand All @@ -683,7 +672,6 @@ class Crawlers extends AbstractProvider
'W3C_I18n-Checker',
'W3C_Unicorn',
'wangling',
'Wappalyzer',
'WatchMouse',
'WbSrch\/',
'web-capture\.net',
Expand All @@ -707,7 +695,6 @@ class Crawlers extends AbstractProvider
'Website Analyzer\/',
'websitepulse[+ ]checker',
'Websnapr\/',
'Websquash\.com',
'Webthumb\/[0-9]',
'WebThumbnail',
'WeCrawlForThePeace',
Expand All @@ -727,7 +714,6 @@ class Crawlers extends AbstractProvider
'wkhtmlto',
'wmtips',
'Woko',
'WomlpeFactory',
'Word\/',
'WordPress\/',
'wotbox',
Expand Down
2 changes: 2 additions & 0 deletions tests/crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2889,3 +2889,5 @@ Mozilla/5.0 (compatible; Semiocast HTTP client; http://semiocast.com/)
Mozilla/5.0 (compatible; The Drop Reaper; +http://tag1consulting.com/dropreaper.html)
/n software IPWorks HTTP/S Component - www.nsoftware.com
safe-agent-scanner
Mozilla/5.0 (compatible; ACHE/Unknown Version; +https://github.com/ViDA-NYU/ache; )
ANAM (github.com/dutchcoders/anam)

0 comments on commit d2dbf5e

Please sign in to comment.