diff --git a/src/Fixtures/Crawlers.php b/src/Fixtures/Crawlers.php index 8c32a92..77098d9 100644 --- a/src/Fixtures/Crawlers.php +++ b/src/Fixtures/Crawlers.php @@ -22,6 +22,7 @@ class Crawlers extends AbstractProvider '.*Java.*outbrain', '008\/', '192.comAgent', + '404checker', '^bluefish ', '^FDM ', '^Java\/', @@ -29,7 +30,9 @@ class Crawlers extends AbstractProvider '^NING\/', '^PHP\/[0-9]', '^Ruby|Ruby\/[0-9]', + '^VSE\/[0-9]', '^WordPress\.com', + '^XRL\/[0-9]', 'a3logics\.in', 'A6-Indexer', 'a\.pr-cy\.ru', @@ -51,6 +54,7 @@ class Crawlers extends AbstractProvider 'alertra', 'alexa site audit', 'alyze\.info', + 'amagit', 'Anemone', 'Ant\.com', 'Anturis Agent', @@ -62,6 +66,7 @@ class Crawlers extends AbstractProvider 'Arachnophilia', 'archive-com', 'aria2', + 'asafaweb.com', 'AskQuickly', 'Astute', 'autocite', @@ -70,6 +75,7 @@ class Crawlers extends AbstractProvider 'Backlink-Ceck\.de', 'Bad-Neighborhood', 'baidu\.com', + 'baypup\/[0-9]', 'baypup\/colbert', 'BazQux', 'BCKLINKS', @@ -83,6 +89,7 @@ class Crawlers extends AbstractProvider 'binlar', 'Blackboard Safeassign', 'Bloglovin', + 'BlogPulseLive', 'BlogSearch', 'Blogtrottr', 'BoardReader Favicon Fetcher', @@ -194,6 +201,7 @@ class Crawlers extends AbstractProvider 'flynxapp', 'forensiq', 'FoundSeoTool\/[0-9]', + 'free thumbnails', 'FreeWebMonitoring SiteChecker', 'Funnelback', 'g00g1e\.net', @@ -276,6 +284,7 @@ class Crawlers extends AbstractProvider 'igdeSpyder', 'IlTrovatore', 'ImageEngine\/', + 'ImageFetcher\/[0-9]', 'InAGist', 'inbound\.li parser', 'InDesign%20CC', @@ -292,9 +301,11 @@ class Crawlers extends AbstractProvider 'ips-agent', 'ipv6-test.com validator', 'iqdb\/', + 'Irokez', 'isitup\.org', 'iskanie', 'iZSearch', + 'janforman', 'Jigsaw', 'Jobboerse', 'jobo', @@ -455,6 +466,7 @@ class Crawlers extends AbstractProvider 'RebelMouse', 'redback\/', 'ReederForMac', + 'ResponseCodeTest\/[0-9]', 'RestSharp', 'RetrevoPageAnalyzer', 'Riddler', @@ -478,6 +490,7 @@ class Crawlers extends AbstractProvider 'SEO Browser', 'Seo Servis', 'seo-nastroj.cz', + 'SEOCentro', 'SeoCheck', 'SEOstats', 'Server Density Service Monitoring', @@ -533,6 +546,7 @@ class Crawlers extends AbstractProvider 'SwiteScraper', 'Symfony2 BrowserKit', 'Sysomos', + 'Tarantula\/', 'teoma', 'terrainformatica\.com', 'theinternetrules', @@ -556,6 +570,7 @@ class Crawlers extends AbstractProvider 'updated', 'Upflow', 'URLChecker', + 'URLitor.com', 'urlresolver', 'Urlstat', 'UrlTrends Ranking Updater', @@ -567,6 +582,7 @@ class Crawlers extends AbstractProvider 'voltron', 'Vortex\/[0-9]', 'voyager\/', + 'VSAgent\/[0-9]', 'VSB-TUO\/[0-9]', 'VYU2', 'w3af\.org', diff --git a/tests/crawlers.txt b/tests/crawlers.txt index fa40841..d8d1218 100644 --- a/tests/crawlers.txt +++ b/tests/crawlers.txt @@ -1721,4 +1721,20 @@ nWormFeedFinder (http://www.nworm.com) Ocelli/1.4 (http://www.globalspec.com/Ocelli) Mozilla/5.0 (compatible; AportWorm/3.2; +http://www.aport.ru/help) Surphace Scout&v4.0 - scout at surphace dot com -WebImages 0.3 ( http://herbert.groot.jebbink.nl/?app=WebImages ) \ No newline at end of file +WebImages 0.3 ( http://herbert.groot.jebbink.nl/?app=WebImages ) +Irokez.cz monitoring v1.2 - (http://www.irokez.cz, Irokez.cz, crawl) +Mozilla/5.0 (compatible; janforman.com/2.4; +http://janforman.com/) +Mozilla/5.0 (compatible; ImageFetcher/5.6; +http://images.weserv.nl/) +asafaweb.com +URLitor.com +VSE/1.0 (rabraham@multiview.com) +Mozilla/5.0 (compatible; VSAgent/1.2) +ResponseCodeTest/1.1 +404 Checker [http://www.404checker.com/user-agent] +SEOCentro Page Keyword Analyzer v1.2 +Tarantula/BETA-0.5 (Tarantula de la Enlle, el mejor buscador en la red; http://www.enlle.com; tarantula@enlle.com) +XRL/3.00 (Linux; i686; en-us) (+http://metamark.net/about) +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535. Safari/535.22+; (+http://immediatenet.com/thumbnail_api.html; free thumbnails) +BlogPulseLive (support@blogpulse.com) +http://www.amagit.com/ +baypup/1.1 (Baypup; http://www.baypup.com/; jason@baypup.com) \ No newline at end of file