Skip to content

Commit

Permalink
Merge 2fce15a into 67a6b30
Browse files Browse the repository at this point in the history
  • Loading branch information
JayBizzle committed Jan 31, 2017
2 parents 67a6b30 + 2fce15a commit 053a2b7
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/Fixtures/Crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ class Crawlers extends AbstractProvider
'Digg',
'Dispatch\/',
'dlvr',
'DMBrowser-UV',
'DNS-Tools Header-Analyzer',
'DNSPod-reporting',
'docoloc',
Expand Down Expand Up @@ -237,6 +238,7 @@ class Crawlers extends AbstractProvider
'Funnelback',
'g00g1e\.net',
'GAChecker',
'ganarvisitas\/[0-9]',
'geek-tools',
'Genderanalyzer',
'Genieo',
Expand Down Expand Up @@ -357,6 +359,7 @@ class Crawlers extends AbstractProvider
'Jobrapido',
'JS-Kit',
'KeepRight OpenStreetMap Checker',
'KeyCDN Perf Test',
'Keywords Research',
'KickFire',
'KimonoLabs\/',
Expand Down Expand Up @@ -491,7 +494,7 @@ class Crawlers extends AbstractProvider
'phpcrawl',
'phpservermon',
'Pi-Monster',
'Pingdom\.com',
'Pingdom',
'Pingoscope',
'PingSpot',
'pinterest\.com',
Expand Down Expand Up @@ -628,6 +631,7 @@ class Crawlers extends AbstractProvider
'Sysomos',
'T0PHackTeam',
'Tarantula\/',
'Taringa UGC',
'teoma',
'terrainformatica\.com',
'Test Certificate Info',
Expand Down Expand Up @@ -744,6 +748,7 @@ class Crawlers extends AbstractProvider
'yandex',
'yanga',
'yeti',
' YLT',
'Yo-yo',
'Yoleo Consumer',
'yoogliFetchAgent',
Expand Down
160 changes: 159 additions & 1 deletion tests/crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2788,7 +2788,6 @@ boitho.com-dc/0.81 ( http://www.boitho.com/dcbot.html )
boitho.com-dc/0.82 ( http://www.boitho.com/dcbot.html )
boitho.com-dc/0.83 ( http://www.boitho.com/dcbot.html )
boitho.com-dc/0.85 ( http://www.boitho.com/dcbot.html )
boitho.com-robot/1.0
boitho.com-robot/1.1
Blackboard Safeassign
Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36 (BingLocalSearch)
Expand Down Expand Up @@ -2897,3 +2896,162 @@ Calypso v/0.01
-Mozilla/4.0 (compatible; http://search.thunderstone.com/texis/websearch/about.html)
Mozilla/5.0 (compatible; Tweezler/2.0; http://tweezler.com)
Jeode/1.x.x
KeyCDN Perf Test
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) YLT Chrome/27.0.1453.110 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36 DMBrowser-UV
Taringa UGC www.taringa.net/shared-link.php
Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) PingdomTMS/0.8.5 Safari/534.34
ganarvisitas/1.0 (+http://www.ganarvisitas.com/)
FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)
Mozilla/5.0 (compatible) Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)
Mozilla/5.0 (compatible; Exabot PyExalead/3.0; +http://www.exabot.com/go/robot)
Mozilla/5.0 PhantomJS (compatible; Seznam screenshot-generator 2.1; +http://fulltext.sblog.cz/screenshot/)
Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://napoveda.seznam.cz/en/seznambot-intro/)
Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://napoveda.seznam.cz/en/seznambot-intro/)
Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://fulltext.sblog.cz/)
Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://fulltext.sblog.cz/)
Mozilla/5.0 (compatible; SeznamBot/3.2; +http://fulltext.sblog.cz/)
Seznam-Zbozi-robot/3.2.1
SeznamBot/2.0 (+http://fulltext.sblog.cz/robot/)
Seznam-Zbozi-robot/3.2.2
Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://fulltext.sblog.cz/)
Mozilla/5.0 (compatible; SeznamBot/3.2-test3; +http://fulltext.sblog.cz/)
Seznam-Zbozi-robot/3.3
SklikBot/2.0 (sklik@firma.seznam.cz;+http://napoveda.sklik.cz/)
SeznamBot/3.0 (+http://fulltext.sblog.cz/)
Mozilla/5.0 (compatible; SeznamBot/3.1-test1; +http://fulltext.sblog.cz/)
Mozilla/5.0 (Linux; U; Android 4.1.2; cs-cz; Seznam screenshot-generator Build/Q3) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30
SeznamBot/3.0-test (+http://fulltext.sblog.cz/), I
SeznamBot/3.0 (HaF+http://fulltext.sblog.cz/)
SeznamBot/3.0-test (+http://fulltext.sblog.cz/)
SeznamBot/3.0-beta (+http://fulltext.sblog.cz/), I
SeznamBot/3.0-beta (+http://fulltext.sblog.cz/)
SeznamBot/3.0-alpha (+http://fulltext.sblog.cz/)
SeznamBot/2.0 (+http://fulltext.seznam.cz/)
SeznamBot/2.0-Test (+http://fulltext.sblog.cz/robot/)
Mozilla/5.0 (compatible; Seznam screenshot-generator 2.0; +http://fulltext.sblog.cz/screenshot/)
Mozilla/5.0 (compatible; FlipboardRSS/1.2; +http://flipboard.com/browserproxy)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0 (FlipboardProxy/1.2; +http://flipboard.com/browserproxy)
Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7 AppEngine-Google; (+http://code.google.com/appengine; appid: s~feedly-social)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/23.0.912.77 Safari/535.7 AppEngine-Google; (+http://code.google.com/appengine; appid: s~feedly-nikon3)
Mozilla/5.0 (compatible; SemrushBot/1.2~bl; +http://www.semrush.com/bot.html)
Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots mtmon01it.mtrs.yandex.ru)
Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexBot/3.0; +http://yandex.com/bots)
Mozilla/5.0 (compatible; memoryBot/1.24.61 +http://internetmemory.org/en/)
Mozilla/5.0 (compatible; memoryBot/1.24.54 +http://internetmemory.org/en/)
BUbiNG - Research at Brno University of Technology - KNOT group - http://knot.fit.vutbr.cz/crawling/ - Stop? http://law.di.unimi.it/BUbiNG.html
Mozilla/5.0 (compatible; MSIE 9.0; Firefox/50.1) Daum/4.1
Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/534.34 (KHTML, like Gecko) PhantomJS/1.9.7 Daum Raam Edition b20140820 Safari/534.34
Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)
yacybot (/global; amd64 Linux 4.4.0-59-generic; java 1.8.0_121; Europe/de) http://yacy.net/bot.html
yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html
yacybot (/global; amd64 Windows 8.1 6.3; java 1.8.0_111; Europe/de) http://yacy.net/bot.html
yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.8.0_111; Europe/en) http://yacy.net/bot.html
yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html
yacybot (webportal-global; amd64 Linux 4.4.0-46-generic; java 1.8.0_91; Europe/en) http://yacy.net/bot.html
yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html
Mozilla/5.0 (compatible; special_archiver/3.3.0 bot@archive.org +https://archive.org/details/archive.org_bot)
Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)
Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)
SeobilityBot (SEO-Check; http://bit.ly/1dJuuzs)
Testomatobot/1.0 (Linux x86_64; +http://www.testomato.com/testomatobot) minicrawler/5.0.4
Testomatobot/1.0 (Linux x86_64; +http://www.testomato.com/testomatobot) minicrawler/5.0.3
Testomatobot/1.0 (Linux x86_64; +http://www.testomato.com/testomatobot) minicrawler/5.0.2
Testomatobot/1.0 (Linux x86_64; +http://www.testomato.com/testomatobot) minicrawler/5.0.1
Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)
Heurekabot-Feed/1.0 (+https://sluzby.heureka.cz/napoveda/heurekabot/)
Heurekabot-ImageFullText/1.0 (+https://sluzby.heureka.cz/napoveda/heurekabot/)
Heurekabot-ImageFullText/1.0 (+http://sluzby.heureka.cz/napoveda/heurekabot/)
Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 QIHU 360SE; 360Spider
ZoomBot (Linkbot 1.0 http://suite.seozoom.it/bot.html)
Mozilla/5.0 (compatible; SOLOFIELD/1.0 +http://solofield.net/bot.html)
Qwantify/1.0
Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)
Scrapy/1.3.0 (+http://scrapy.org)
Scrapy/1.0.5.post4+g4b324a8 (+http://scrapy.org)
Scrapy/1.2.1 (+http://scrapy.org)
Scrapy/0.16.5 (+http://scrapy.org)
Mozilla/5.0 (compatible; Dataprovider.com;)
Mozilla/5.0 (compatible; Dataprovider; https://www.dataprovider.com/spider/)
Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)
ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)
DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)
Mozilla/5.0 (compatible; SMTBot/1.0; http://www.similartech.com/smtbot)
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) Chromium/31.0.1650.63 Site-Shot/2.1 (http://www.site-shot.com/) Safari/538.1
Mozilla/5.0 (Windows NT 6.1; WOW64; Neustar WPM) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36
Mozilla/5.0 (iPad; CPU OS 6_1_2 like Mac OS X; Neustar WPM) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25
Mozilla/5.0 (Windows NT 6.0; Neustar WPM) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Neustar WPM) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.82 Safari/537.36
Mozilla/5.0 (Windows NT 6.0; rv:38.0; Neustar WPM) Gecko/20100101 Firefox/38.0
Mozilla/5.0 (Windows NT 6.1; rv:38.0; Neustar WPM) Gecko/20100101 Firefox/38.0
Mozilla/5.0 (Windows NT 6.1; Neustar WPM) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36
Mozilla/5.0 (Linux; Android 4.1.1; HTC6435LVW Build/JRO03C; Neustar WPM) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.169 Mobile Safari/537.22
Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)
Mozilla/5.0 (Linux; Android 5.0.2; SM-G920T Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840 Mobile Safari/537.36 DareBoost
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840 Safari/537.36 DareBoost
Mozilla/5.0 (Linux; Android 5.0; LG-D855 Build/LRX21R.A1422018487) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840 Mobile Safari/537.36 DareBoost
Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A403 Safari/602.1 DareBoost
Mozilla/5.0 (Linux; Android 4.4.3; HTC One Build/KTU84L) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840 Mobile Safari/537.36 DareBoost
Mozilla/5.0 (Linux; Android 4.4; Nexus 5 Build/KRT16M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/54.0.2840 Mobile Safari/537.36 DareBoost
HubSpot Website Grader (web-crawlers@hubspot.com)
HubSpot Marketing Grader, HubSpot Marketing Grader
com.hubspot.spooks.core.UserAgent@753535c0, HubSpot Marketing Grader
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 (http://www.shrinktheweb.com)
scrapy-redis (+https://github.com/rolando/scrapy-redis)
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 PTST/348
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36 PTST/345
TeamSpeak3-ImageFetcher-1.0
wf_crawler (http://www.websitefigures.com)
wf_crawler (http://websitefigures.com)
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 TinEye/1.0 (via http://www.tineye.com/)
TinEye-bot/0.61 (see http://www.tineye.com/crawler.html)
Mozilla/5.0 (compatible; Googlebot/2.1; https://deepcrawl.com/bot)
Googlebot deepcrawl
Mozilla/5.0 (compatible; evc-batch/2.0.20170110125844)
Mozilla/5.0 (compatible; evc-batch/2.0.20161220184139)
Mozilla/5.0 (compatible; evc-batch/2.0.20161122223033)
Mozilla/5.0 (compatible; evc-batch/2.0.20161026010401)
Mozilla/5.0 (X11; Linux x86_64; rv:49.0; GTmetrix https://gtmetrix.com/) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Linux; Android 4.3; Galaxy Nexus Build/JWR66Y; GTmetrix https://gtmetrix.com/) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.68 Mobile Safari/537.36
Mozilla/5.0 (X11; Linux x86_64; GTmetrix https://gtmetrix.com/) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36
Mediumbot-MetaTagFetcher/0.3 (+https://medium.com/)
Mediumbot-ProxyFetcher/0.1 (+https://medium.com/)
YOOBI.de - WebBot 1.0
YOOBI - WebBot 1.0
Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)
IAS crawler (page scorer; http://integralads.com/site-indexing-policy/)
Mozilla/5.0 (compatible; JobKeresoBot; +https://www.kozvetlen-allasok.hu/help.jsp; info@kozvetlen-allasok.hu; 5.0
Mozilla/5.0 (compatible; JobKereso; +https://www.kozvetlen-allasok.hu/robot.jsp info@kozvetlen-allasok.hu)
pyspider/0.3.9 (+http://pyspider.org/)
Mozilla/5.0 (X11; U; Linux x86_64; de-DE; rv:1.9.0.19) Gecko/2010120923 ThumbShotsBot (KFSW 3.0.6-3)
Mozilla/5.0 (compatible; Discordbot/1.0; +https://discordapp.com)
Mozilla/5.0 (compatible; Dead Link Checker; http://www.dead-link-checker.com/)
Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0) CrawlerProcess (http://www.PowerMapper.com) /5.24.776.0
Mozilla/4.0 (compatible; BOTW Spider; +http://botw.org)
TwengaBot-2.0
Iframely/1.0.0 (+https://iframely.com/;)
Iframely/0.8.5 (+http://iframely.com/;)
rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)
rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com
Mozilla/5.0 (compatible; Sitemap Generator/1.3; http://www.check-domains.com/sitemap/index.php) Gecko Check-domains)
mindUpBot
Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36
Contacts Crawler (+http://www.scrapinghub.com)
TA SEO Crawler v0.1 URLData.pm (smcquaker@tripadvisor.com)
Mozilla/5.0 (compatible; scanbot/1.0; +http://dazzlepod.com/ip/)
ArielisBot/1.1 (+http://arielis.com/
RedesScrapy/0.24.1 (+http://g2pi.tsc.uc3m.es/es)
Blackboard Safeassign/0.1 (a Storm-based Blackboard Safeassign web-crawler; https://github.com/DigitalPebble/storm-crawler; stormcrawler@digitalpebble.com)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0 glindahl-cocrawler/0.01 (+http://www.pbm.com/~lindahl/glindahl-cocrawler.html)
botrobin/Nutch-1.13-SNAPSHOT (http://smarter.codes/bot-robin/; botrobin@smarter.codes)
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36 (compatible; aylienbot/0.2; +http://www.aylien.com/bot.html)
GloomarBot/1.1 (https://www.gloomar.com/bot)
Hatena::Bookmark/2.00 (Hatena::Bookmark; master;)
Opendi Screenshot Bot bot@opendi.com
Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)
Mozilla/5.0 (compatible; KAZ.KZ_Bot/3.0)
BuckyOHare/1.3 (Googlebot/2.1; +https://hypefactors.com/webcrawler)
uipbot/1.0 (uipbot@semasio.net)
gMozilla/5.0 (compatible; Infohelfer/1.4.3; +http://www.infohelfer.de/crawler.php)
Mozilla/5.0 (compatible; lincobot/0.3; http://www.linksco.com/about/robot.html) AppEngine-Google; (+http://code.google.com/appengine)

0 comments on commit 053a2b7

Please sign in to comment.