Skip to content

Commit

Permalink
Merge pull request #120 from JayBizzle/develop
Browse files Browse the repository at this point in the history
Added a load of new bot regexes
  • Loading branch information
JayBizzle committed Aug 19, 2016
2 parents 3cf4b25 + 06cd937 commit 82fa7ac
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 23 deletions.
78 changes: 61 additions & 17 deletions src/Fixtures/Crawlers.php
Expand Up @@ -23,39 +23,51 @@ class Crawlers extends AbstractProvider
'008\/',
'^Java\/',
'^NING\/',
'^PHP\/[0-9]',
'^Ruby|Ruby\/[0-9]',
'^WordPress\.com',
'a3logics\.in',
'A6-Indexer',
'a\.pr-cy\.ru',
'Aboundex',
'aboutthedomain',
'Accoona-AI-Agent',
'acoon',
'adbeat',
'AddThis',
'ADmantX',
'adressendeutschland',
'Advanced Email Extractor v',
'agentslug',
'AHC',
'aihit',
'Airmail',
'akula\/k311',
'alertra',
'alexa site audit',
'alyze\.info',
'Anemone',
'Ant\.com',
'Anturis Agent',
'Apache-HttpClient\/',
'AppEngine-Google',
'Arachmo',
'arachnode',
'Arachnophilia',
'archive-com',
'aria2',
'AskQuickly',
'Astute',
'autocite',
'Autonomy',
'B-l-i-t-z-B-O-T',
'Backlink-Ceck\.de',
'Bad-Neighborhood',
'baidu\.com',
'baypup\/colbert',
'BazQux',
'BDFetch',
'BegunAdvertising\/',
'bibnum\.bnf',
'BigBozz',
'biglotron',
Expand All @@ -75,22 +87,24 @@ class Crawlers extends AbstractProvider
'BuzzSumo',
'CakePHP',
'CapsuleChecker',
'CaretNail',
'cb crawl',
'CC Metadata Scaper',
'Cerberian Drtrs',
'cg-eye',
'changedetection',
'Charlotte',
'CheckHost',
'CirrusExplorer\/',
'CISPA Vulnerability Notification',
'clips\.ua\.ac\.be',
'CloudFlare-AlwaysOnline',
'cmcm\.com',
'coccoc',
'contactbigdatafr',
'CommaFeed',
'Commons-HttpClient',
'Comodo SSL Checker',
'contactbigdatafr',
'convera',
'copyright sheriff',
'cosmos\/[0-9]',
Expand Down Expand Up @@ -144,6 +158,7 @@ class Crawlers extends AbstractProvider
'ezooms',
'facebookexternalhit',
'facebookplatform',
'fairshare',
'Faraday v',
'Faveeo',
'FavOrg',
Expand All @@ -157,6 +172,7 @@ class Crawlers extends AbstractProvider
'Feedly',
'Feedspot',
'FeedValidator',
'feeltiptop',
'Fetch API',
'Fever\/[0-9]',
'findlink',
Expand Down Expand Up @@ -203,6 +219,7 @@ class Crawlers extends AbstractProvider
'GoSpotCheck',
'GoSquared-Status-Checker',
'gosquared-thumbnailer',
'GotSiteMonitor',
'Grammarly',
'grouphigh',
'grub-client',
Expand All @@ -213,6 +230,7 @@ class Crawlers extends AbstractProvider
'HeartRails_Capture',
'heritrix',
'Holmes',
'HostTracker',
'ht:\/\/check',
'htdig',
'HTMLParser\/',
Expand All @@ -224,10 +242,10 @@ class Crawlers extends AbstractProvider
'http_requester',
'httphr',
'HTTPMon',
'httpunit',
'HttpUrlConnection',
'httpscheck',
'httpssites_power',
'httpunit',
'HttpUrlConnection',
'httrack',
'huaweisymantec',
'HubPages.*crawlingpolicy',
Expand All @@ -240,6 +258,7 @@ class Crawlers extends AbstractProvider
'Iframely',
'igdeSpyder',
'IlTrovatore',
'ImageEngine\/',
'InAGist',
'inbound\.li parser',
'InDesign%20CC',
Expand All @@ -254,11 +273,14 @@ class Crawlers extends AbstractProvider
'IODC',
'IOI',
'ips-agent',
'isitup\.org',
'iZSearch',
'Jigsaw',
'Jobboerse',
'jobo',
'Jobrapido',
'KeepRight OpenStreetMap Checker',
'KimonoLabs\/',
'knows\.is',
'kouio',
'KrOWLer',
Expand All @@ -274,15 +296,18 @@ class Crawlers extends AbstractProvider
'linkCheck',
'linkdex',
'LinkExaminer',
'linkfluence',
'LinkTiger',
'LinkWalker',
'Lipperhey',
'livedoor ScreenShot',
'LoadImpactPageAnalyzer',
'LoadImpactRload',
'LongURL API',
'ltx71',
'lwp-trivial',
'lycos',
'LYT\.SR',
'mabontland',
'MagpieRSS',
'Mail.Ru',
Expand Down Expand Up @@ -313,9 +338,9 @@ class Crawlers extends AbstractProvider
'Mrcgiguy',
'mShots',
'MVAClient',
'Netcraft SSL Server Survey',
'Netcraft Web Server Survey',
'NetcraftSurveyAgent',
'nagios',
'Najdi\.si\/',
'NETCRAFT',
'NetLyzer FastProbe',
'netresearch',
'NetShelter ContentScan',
Expand All @@ -325,7 +350,6 @@ class Crawlers extends AbstractProvider
'NeutrinoAPI',
'NewsBlur .*(Fetcher|Finder)',
'NewsGator',
'nagios',
'newsme',
'newspaper\/',
'NG-Search',
Expand All @@ -346,23 +370,26 @@ class Crawlers extends AbstractProvider
'omgili',
'Online Domain Tools',
'OpenCalaisSemanticProxy',
'Openstat\/',
'OpenVAS',
'Optimizer',
'Orbiter',
'ow\.ly',
'ownCloud News',
'Page Analyzer',
'Page Valet',
'page2rss',
'page_verifier',
'PagePeeker',
'panscient',
'Panopta',
'panscient',
'parsijoo',
'PayPal IPN',
'Pcore-HTTP',
'peerindex',
'Peew',
'PhantomJS\/',
'Photon\/',
'^PHP\/[0-9]',
'phpcrawl',
'phpservermon',
'Pi-Monster',
Expand All @@ -374,17 +401,23 @@ class Crawlers extends AbstractProvider
'Plukkie',
'PocketParser',
'Pompos',
'Port Monitor',
'postano',
'PostPost',
'postrank',
'Priceonomics Analysis Engine',
'Prlog',
'Project 25499',
'prospectb2b',
'Protopage',
'proximic',
'PTST ',
'PTST\/[0-9]+',
'Pulsepoint XT3 web scraper',
'Python-httplib2',
'python-requests',
'Python-urllib',
'Qirina Hurdler',
'Qseero',
'Qualidator.com SiteAnalyzer',
'Quora Link Preview',
Expand All @@ -401,8 +434,8 @@ class Crawlers extends AbstractProvider
'Rival IQ',
'Robosourcer',
'ROI Hunter',
'^Ruby|Ruby\/[0-9]',
'SalesIntelligent',
'SauceNAO',
'SBIder',
'Scoop',
'scooter',
Expand All @@ -412,7 +445,9 @@ class Crawlers extends AbstractProvider
'Scrubby',
'SearchSight',
'semanticdiscovery',
'semanticjuice',
'SEO Browser',
'Seo Servis',
'SEOstats',
'Server Density Service Monitoring',
'servernfo\.com',
Expand All @@ -424,14 +459,14 @@ class Crawlers extends AbstractProvider
'SilverReader',
'SimplePie',
'SimplyFast',
'Site-Shot\/',
'Site24x7',
'SiteBar',
'siteexplorer\.info',
'SiteGuardian',
'Siteimprove\.com',
'Sitemap(s)? Generator',
'SiteTruth',
'SiteUptime',
'SkypeUriPreview',
'slider\.com',
'slurp',
Expand All @@ -443,14 +478,17 @@ class Crawlers extends AbstractProvider
'Snoopy',
'sogou web',
'SortSite',
'spaziodati',
'Specificfeeds',
'speedy',
'SPEng',
'Spinn3r',
'spray-can',
'spyonweb',
'Sqworm',
'SSL Labs',
'StackRambler',
'Statastico\/',
'StatusCake',
'Stratagems Kumo',
'StudioFACA',
Expand All @@ -475,11 +513,11 @@ class Crawlers extends AbstractProvider
'Twingly',
'Typhoeus',
'ubermetrics-technologies',
'uclassify',
'UdmSearch',
'UnwindFetchor',
'updated',
'Upflow',
'uptimedoctor',
'URLChecker',
'urlresolver',
'Urlstat',
Expand All @@ -503,7 +541,9 @@ class Crawlers extends AbstractProvider
'wangling',
'Wappalyzer',
'WatchMouse',
'WbSrch\/',
'web-capture\.net',
'Web-Monitoring',
'Web-sniffer',
'Webauskunft',
'WebCapture',
Expand All @@ -515,10 +555,13 @@ class Crawlers extends AbstractProvider
'webkit2png',
'webmastercoffee',
'webmon ',
'webnumbrFetcher',
'weborama-fetcher',
'webscreenie',
'Webshot',
'Website Analyzer\/',
'websitepulse[+ ]checker',
'Websnapr\/',
'Websquash\.com',
'WebThumbnail',
'WeCrawlForThePeace',
Expand All @@ -530,15 +573,15 @@ class Crawlers extends AbstractProvider
'WhatsApp',
'WhatWeb',
'Whibse',
'wkhtmlto',
'wmtips',
'Whynder Magnet',
'Windows-RSS-Platform',
'WinHttpRequest',
'wkhtmlto',
'wmtips',
'Woko',
'WomlpeFactory',
'Word\/',
'WordPress\/',
'^WordPress\.com',
'wotbox',
'WP Engine Install Performance API',
'WPScan',
Expand All @@ -557,6 +600,7 @@ class Crawlers extends AbstractProvider
'YahooCacheSystem',
'YahooSeeker',
'YahooYSMcm',
'YandeG',
'yandex',
'yanga',
'yeti',
Expand All @@ -567,11 +611,11 @@ class Crawlers extends AbstractProvider
'yourls\.org',
'Zao',
'Zemanta Aggregator',
'Zend_Http_Client',
'Zend\\\\Http\\\\Client',
'Zend_Http_Client',
'zgrab',
'ZnajdzFoto',
'ZyBorg',
'[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)',
'[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider|uptime)',
);
}

0 comments on commit 82fa7ac

Please sign in to comment.