From 0ffea34489b258a2709bfe93a9553e1efa5d1904 Mon Sep 17 00:00:00 2001 From: MaxGiting Date: Tue, 25 Aug 2020 22:35:55 +0100 Subject: [PATCH] Add 27 crawlers (#408) --- raw/Crawlers.json | 2 +- raw/Crawlers.txt | 29 +++++++++++++++++++++++++++-- src/Fixtures/Crawlers.php | 29 +++++++++++++++++++++++++++-- tests/crawlers.txt | 29 ++++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/raw/Crawlers.json b/raw/Crawlers.json index 9076d69..9487ce4 100644 --- a/raw/Crawlers.json +++ b/raw/Crawlers.json @@ -1 +1 @@ -[" YLT","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^DangDang","^DavClnt","^DHSH","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2ip\\.ru","404enemy","7Siters","80legs","a\\.pr-cy\\.ru","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","Adstxtaggregator","adstxt-worker","adstxt\\.com","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","^Amazon CloudFront$","^Amazon Simple Notification Service Agent$","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","AskQuickly","Ask Jeeves","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","axios\\\/","AWS Security Scanner","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","biz_Directory","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter","Charlotte","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Crowsnest","Criteo","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Disqus\\\/","Dispatch\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconkit","faviconarchive","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","fluffy","Flunky","flynxapp","forensiq","FoundSeoTool","http:\\\/\\\/www.neomo.de\\\/","free thumbnails","Freeuploader","FreshRSS","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Goldfire Server","Go [\\d\\.]* package http","Go http package","Go-Ahead-Got-It","Go-http-client","Go!Zilla","gobyus","Gofeed","gofetch","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google-Podcast","Google PP Default","Google Search Console","Google Web Preview","Google-Ads-Overview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google-xrawler","GoogleDocs","GoogleHC\\\/","GoogleProducer","GoogleSites","Google-Transparency-Report","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hatena","Havij","HaxerMen","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","HTTP_Compression_Test","http_request2","http_requester","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http\\.rb\\\/","http_get","HttpComponents","httphr","HTTPMon","HTTPie","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","huaweisymantec","HubSpot ","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","internet_archive","Internet Ninja","InternetSeer","internetVista monitor","internetwache","intraVnews","IODC","IOI","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kubectl","kube-probe","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lightspeedsystems","Lighthouse","Likse","limber\\.io","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreviewGenerator","LinkScan","LinksManager","LinkTiger","LinkWalker","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","ltx71","lua-resty-http","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","mabontland","Mag-Net","MagpieRSS","Mail\\.Ru","MailChimp","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Office ","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft Data Access","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","Mister PiX","mixdata dot com","mixed-content-scan","Mixmax-LinkPreview","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Monit\\\/","monitis","Monitority\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mr\\.4x3 Powered","Mrcgiguy","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","nagios","Najdi\\.si","Name Intelligence","Nameprotect","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","NetSystemsResearch","Nexgate Ruby Client","nghttp2","NG-Search","Nibbler","NICErsPRO","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib","node\\.io","Nodemeter","NodePing","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","ow\\.ly","Owler","Owlin","ownCloud News","OxfordCloudService","Page Valet","page_verifier","page scorer","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","phpservermon","php-requests","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","ping\\.blo\\.gs","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime","PostPost","postrank","PowerPoint\\\/","Prebid","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project ?25499","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pump","python-httpx","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","QuiteRSS","Quora Link Preview","Qwantify","Radian6","RankActive","RankFlex","RankSonicSiteAuditor","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSOwl","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","Sendsay\\.Ru","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","scan\\.lol","ScanAlert","Scoop","scooter","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","search\\.thunderstone","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","SEOkicks","SEOlizer","Seomoz","SEOprofiler","SEOsearch","seoscanners","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","shortURL lengthener","ShortLinkTranslate","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site-Shot\\\/","Site Sucker","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","spaziodati","SPDYCheck","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","StatusCake","Steeler","Stratagems Kumo","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Knowledge AI","The Intraformant","theinternetrules","TheNomad","Thinklab","Thumbshots","ThumbSniper","Thumbor","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","unchaos","unirest-java","UniversalFeedParser","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","Urlcheckr","URL Verifier","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C_Unicorn","W3C-checklink","W3C-mobileOK","WAC-OFU","Wallpapers\\\/[0-9]+","WallpapersHD","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","web-capture\\.net","Web-sniffer","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","Webthumb\\\/","WebThumbnail","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","WinHTTP\\\/","WinHttp-Autoproxy-Service","Windows-RSS-Platform","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","Word\\\/","WordPress\\\/","worldping-api","WordupinfoSearch","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","www\\.monitor\\.us","WWWOFFLE","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Y\\!J-BRW","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend_Http_Client","Zend\\\\Http\\\\Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"] \ No newline at end of file +[" YLT","^b0t$","^bluefish ","^Calypso v\\\/","^Corax","^COMODO DCV","^DangDang","^DavClnt","^DHSH","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^Nuclei","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^Swurl ","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2GDPR\\\/","2ip\\.ru","404enemy","7Siters","80legs","a\\.pr-cy\\.ru","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","Adstxtaggregator","adstxt-worker","adstxt\\.com","AffiliateLabz\\\/","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","^Amazon CloudFront$","^Amazon Simple Notification Service Agent$","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","apimon\\.de","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","AskQuickly","Ask Jeeves","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","axios\\\/","AWS Security Scanner","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","biz_Directory","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter","Charlotte","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","Contextual Code Sites Explorer","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Crowsnest","Criteo","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Disqus\\\/","Dispatch\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Domains Project\\\/","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconkit","faviconarchive","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","Feedshow\\\/","FeedshowOnline","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","fluffy","Flunky","flynxapp","forensiq","FoundSeoTool","http:\\\/\\\/www.neomo.de\\\/","free thumbnails","Freeuploader","FreshRSS","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","gdnplus\\.com","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Goldfire Server","Go [\\d\\.]* package http","Go http package","Go-Ahead-Got-It","Go-http-client","Go!Zilla","gobyus","Gofeed","gofetch","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google-Podcast","Google PP Default","Google Search Console","Google Web Preview","Google-Ads-Overview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google-xrawler","GoogleDocs","GoogleHC\\\/","GoogleProducer","GoogleSites","Google-Transparency-Report","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hatena","Havij","HaxerMen","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","HTTP_Compression_Test","http_request2","http_requester","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http\\.rb\\\/","http_get","HttpComponents","httphr","HTTPMon","HTTPie","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","huaweisymantec","HubSpot ","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","internet_archive","Internet Ninja","InternetSeer","internetVista monitor","internetwache","intraVnews","IODC","IOI","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kubectl","kube-probe","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lightspeedsystems","Lighthouse","Likse","limber\\.io","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkScan","LinksManager","LinkPreview","LinkTiger","LinkWalker","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","ltx71","lua-resty-http","Lush Http Client","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","mabontland","Mag-Net","MagpieRSS","Mail\\.Ru","MailChimp","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft\\.Data\\.Mashup","Microsoft Office ","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft Data Access","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","Mister PiX","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Monit\\\/","monitis","Monitority\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mr\\.4x3 Powered","Mrcgiguy","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","myseosnapshot","nagios","Najdi\\.si","Name Intelligence","NameFo\\.com","Nameprotect","nationalarchives","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","NetSystemsResearch","Nexgate Ruby Client","nghttp2","NG-Search","Nibbler","NICErsPRO","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib","node\\.io","Nodemeter","NodePing","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Open Source RSS","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","ow\\.ly","Owler","Owlin","ownCloud News","OxfordCloudService","Page Valet","page_verifier","page scorer","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","phpservermon","php-requests","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","ping\\.blo\\.gs","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime","postplanner\\.com","PostPost","postrank","PowerPoint\\\/","Prebid","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project ?25499","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pump","python-httpx","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","QuiteRSS","Quora Link Preview","Qwantify","Radian6","RankActive","RankFlex","RankSonicSiteAuditor","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSOwl","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","Sendsay\\.Ru","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","scan\\.lol","ScanAlert","Scoop","scooter","ScoutJet","ScopeContentAG-HTTP-Client","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","search\\.thunderstone","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","SEOkicks","SEOlizer","Seomoz","SEOprofiler","SEOsearch","seoscanners","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","shortURL lengthener","ShortLinkTranslate","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site-Shot\\\/","Site Sucker","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","sli-systems\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","spaziodati","SPDYCheck","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","Statically-Screenshot","StatusCake","Steeler","Stratagems Kumo","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","techiaith\\.cymru","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Knowledge AI","The Intraformant","theinternetrules","TheNomad","Thinklab","Thumbshots","ThumbSniper","Thumbor","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","unchaos","unirest-java","ultimate_sitemap_parser","UniversalFeedParser","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","Urlcheckr","URL Verifier","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C_Unicorn","W3C-checklink","W3C-mobileOK","WAC-OFU","Wallpapers\\\/[0-9]+","WallpapersHD","WakeletLinkExpander","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","web-capture\\.net","Web-sniffer","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","Webthumb\\\/","WebThumbnail","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","WinHTTP\\\/","WinHttp-Autoproxy-Service","Windows-RSS-Platform","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","Word\\\/","WordPress\\\/","worldping-api","WordupinfoSearch","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","www\\.monitor\\.us","WWWOFFLE","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","XenForo\\\/","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Y\\!J-BRW","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yomins\\.com","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend_Http_Client","Zend\\\\Http\\\\Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZoteroTranslationServer","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"] \ No newline at end of file diff --git a/raw/Crawlers.txt b/raw/Crawlers.txt index dc81b94..8be4ff2 100644 --- a/raw/Crawlers.txt +++ b/raw/Crawlers.txt @@ -2,6 +2,7 @@ ^b0t$ ^bluefish ^Calypso v\/ +^Corax ^COMODO DCV ^DangDang ^DavClnt @@ -19,9 +20,11 @@ ^Microsoft URL Control ^NG\/[0-9\.] ^NING\/ +^Nuclei ^PHP\/[0-9] ^RMA\/ ^Ruby|Ruby\/[0-9] +^Swurl ^VSE\/[0-9] ^WordPress\.com ^XRL\/[0-9] @@ -29,6 +32,7 @@ 008\/ 13TABS 192\.comAgent +2GDPR\/ 2ip\.ru 404enemy 7Siters @@ -55,6 +59,7 @@ adscanner Adstxtaggregator adstxt-worker adstxt\.com +AffiliateLabz\/ agentslug AHC aihit @@ -86,6 +91,7 @@ Apache-HttpAsyncClient Apache-HttpClient ApacheBench Apexoo +apimon\.de APIs-Google AportWorm\/ AppBeat\/ @@ -217,6 +223,7 @@ Commons-HttpClient commonscan contactbigdatafr contentkingapp +Contextual Code Sites Explorer convera CookieReports copyright sheriff @@ -270,6 +277,7 @@ docoloc Dolphin http client DomainAppender DomainLabz +Domains Project\/ Donuts Content Explorer dotMailer content retrieval dotSemantic @@ -340,6 +348,7 @@ FeedBunch\/ FeedBurner feeder Feedly +Feedshow\/ FeedshowOnline Feedspot FeedViewer\/ @@ -374,6 +383,7 @@ Fuzz Faster U Fool G-i-g-a-b-o-t g00g1e\.net ganarvisitas +gdnplus\.com geek-tools Genieo GentleSource @@ -626,9 +636,9 @@ linkdex LinkExaminer linkfluence linkpeek -LinkPreviewGenerator LinkScan LinksManager +LinkPreview LinkTiger LinkWalker Lipperhey @@ -642,6 +652,7 @@ looid\.com looksystems\.net ltx71 lua-resty-http +Lush Http Client lwp-request lwp-trivial LWP::Simple @@ -673,6 +684,7 @@ Metaspinner MetaURI MFC_Tear_Sample Microsearch +Microsoft\.Data\.Mashup Microsoft Office Microsoft Outlook Microsoft Windows Network Diagnostics @@ -686,7 +698,6 @@ Miniflux Mister PiX mixdata dot com mixed-content-scan -Mixmax-LinkPreview mixnode Mnogosearch mogimogi @@ -711,10 +722,13 @@ MuckRack\/ muhstik-scan MVAClient MxToolbox\/ +myseosnapshot nagios Najdi\.si Name Intelligence +NameFo\.com Nameprotect +nationalarchives Navroad NearSite Needle @@ -784,6 +798,7 @@ Openstat\/ OpenVAS OPPO A33 Optimizer +Open Source RSS Orbiter OrgProbe\/ orion-semantics @@ -850,6 +865,7 @@ Porkbun Port Monitor postano PostmanRuntime +postplanner\.com PostPost postrank PowerPoint\/ @@ -926,6 +942,7 @@ ScanAlert Scoop scooter ScoutJet +ScopeContentAG-HTTP-Client ScoutURLMonitor ScrapeBox Page Scanner Scrapy @@ -999,6 +1016,7 @@ sitexy\.com SkypeUriPreview Slack\/ slider\.com +sli-systems\.com slurp SlySearch SmartDownload @@ -1036,6 +1054,7 @@ SSL Labs ssl-tools StackRambler Statastico\/ +Statically-Screenshot StatusCake Steeler Stratagems Kumo @@ -1060,6 +1079,7 @@ tAkeOut Tarantula\/ Taringa UGC TarmotGezgin +techiaith\.cymru Teleport Telesoft Telesphoreo @@ -1114,6 +1134,7 @@ uclassify UdmSearch unchaos unirest-java +ultimate_sitemap_parser UniversalFeedParser Unshorten\.It Untiny @@ -1155,6 +1176,7 @@ W3C-mobileOK WAC-OFU Wallpapers\/[0-9]+ WallpapersHD +WakeletLinkExpander wangling Wappalyzer WatchMouse @@ -1256,6 +1278,7 @@ WWWOFFLE x09Mozilla x22Mozilla XaxisSemanticsClassifier +XenForo\/ Xenu Link Sleuth XING-contenttabreceiver xpymep([0-9]?)\.exe @@ -1273,6 +1296,7 @@ yanga yeti Yo-yo Yoleo Consumer +yomins\.com yoogliFetchAgent YottaaMonitor Your-Website-Sucks @@ -1293,5 +1317,6 @@ ZnajdzFoto ZnHTTP Zombie\.js Zoom\.Mac +ZoteroTranslationServer ZyBorg [a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper) \ No newline at end of file diff --git a/src/Fixtures/Crawlers.php b/src/Fixtures/Crawlers.php index 267803f..ac991de 100644 --- a/src/Fixtures/Crawlers.php +++ b/src/Fixtures/Crawlers.php @@ -23,6 +23,7 @@ class Crawlers extends AbstractProvider '^b0t$', '^bluefish ', '^Calypso v\/', + '^Corax', '^COMODO DCV', '^DangDang', '^DavClnt', @@ -40,9 +41,11 @@ class Crawlers extends AbstractProvider '^Microsoft URL Control', '^NG\/[0-9\.]', '^NING\/', + '^Nuclei', '^PHP\/[0-9]', '^RMA\/', '^Ruby|Ruby\/[0-9]', + '^Swurl ', '^VSE\/[0-9]', '^WordPress\.com', '^XRL\/[0-9]', @@ -50,6 +53,7 @@ class Crawlers extends AbstractProvider '008\/', '13TABS', '192\.comAgent', + '2GDPR\/', '2ip\.ru', '404enemy', '7Siters', @@ -76,6 +80,7 @@ class Crawlers extends AbstractProvider 'Adstxtaggregator', 'adstxt-worker', 'adstxt\.com', + 'AffiliateLabz\/', 'agentslug', 'AHC', 'aihit', @@ -107,6 +112,7 @@ class Crawlers extends AbstractProvider 'Apache-HttpClient', 'ApacheBench', 'Apexoo', + 'apimon\.de', 'APIs-Google', 'AportWorm\/', 'AppBeat\/', @@ -238,6 +244,7 @@ class Crawlers extends AbstractProvider 'commonscan', 'contactbigdatafr', 'contentkingapp', + 'Contextual Code Sites Explorer', 'convera', 'CookieReports', 'copyright sheriff', @@ -291,6 +298,7 @@ class Crawlers extends AbstractProvider 'Dolphin http client', 'DomainAppender', 'DomainLabz', + 'Domains Project\/', 'Donuts Content Explorer', 'dotMailer content retrieval', 'dotSemantic', @@ -361,6 +369,7 @@ class Crawlers extends AbstractProvider 'FeedBurner', 'feeder', 'Feedly', + 'Feedshow\/', 'FeedshowOnline', 'Feedspot', 'FeedViewer\/', @@ -395,6 +404,7 @@ class Crawlers extends AbstractProvider 'G-i-g-a-b-o-t', 'g00g1e\.net', 'ganarvisitas', + 'gdnplus\.com', 'geek-tools', 'Genieo', 'GentleSource', @@ -647,9 +657,9 @@ class Crawlers extends AbstractProvider 'LinkExaminer', 'linkfluence', 'linkpeek', - 'LinkPreviewGenerator', 'LinkScan', 'LinksManager', + 'LinkPreview', 'LinkTiger', 'LinkWalker', 'Lipperhey', @@ -663,6 +673,7 @@ class Crawlers extends AbstractProvider 'looksystems\.net', 'ltx71', 'lua-resty-http', + 'Lush Http Client', 'lwp-request', 'lwp-trivial', 'LWP::Simple', @@ -694,6 +705,7 @@ class Crawlers extends AbstractProvider 'MetaURI', 'MFC_Tear_Sample', 'Microsearch', + 'Microsoft\.Data\.Mashup', 'Microsoft Office ', 'Microsoft Outlook', 'Microsoft Windows Network Diagnostics', @@ -707,7 +719,6 @@ class Crawlers extends AbstractProvider 'Mister PiX', 'mixdata dot com', 'mixed-content-scan', - 'Mixmax-LinkPreview', 'mixnode', 'Mnogosearch', 'mogimogi', @@ -732,10 +743,13 @@ class Crawlers extends AbstractProvider 'muhstik-scan', 'MVAClient', 'MxToolbox\/', + 'myseosnapshot', 'nagios', 'Najdi\.si', 'Name Intelligence', + 'NameFo\.com', 'Nameprotect', + 'nationalarchives', 'Navroad', 'NearSite', 'Needle', @@ -805,6 +819,7 @@ class Crawlers extends AbstractProvider 'OpenVAS', 'OPPO A33', 'Optimizer', + 'Open Source RSS', 'Orbiter', 'OrgProbe\/', 'orion-semantics', @@ -871,6 +886,7 @@ class Crawlers extends AbstractProvider 'Port Monitor', 'postano', 'PostmanRuntime', + 'postplanner\.com', 'PostPost', 'postrank', 'PowerPoint\/', @@ -947,6 +963,7 @@ class Crawlers extends AbstractProvider 'Scoop', 'scooter', 'ScoutJet', + 'ScopeContentAG-HTTP-Client', 'ScoutURLMonitor', 'ScrapeBox Page Scanner', 'Scrapy', @@ -1020,6 +1037,7 @@ class Crawlers extends AbstractProvider 'SkypeUriPreview', 'Slack\/', 'slider\.com', + 'sli-systems\.com', 'slurp', 'SlySearch', 'SmartDownload', @@ -1057,6 +1075,7 @@ class Crawlers extends AbstractProvider 'ssl-tools', 'StackRambler', 'Statastico\/', + 'Statically-Screenshot', 'StatusCake', 'Steeler', 'Stratagems Kumo', @@ -1081,6 +1100,7 @@ class Crawlers extends AbstractProvider 'Tarantula\/', 'Taringa UGC', 'TarmotGezgin', + 'techiaith\.cymru', 'Teleport', 'Telesoft', 'Telesphoreo', @@ -1135,6 +1155,7 @@ class Crawlers extends AbstractProvider 'UdmSearch', 'unchaos', 'unirest-java', + 'ultimate_sitemap_parser', 'UniversalFeedParser', 'Unshorten\.It', 'Untiny', @@ -1176,6 +1197,7 @@ class Crawlers extends AbstractProvider 'WAC-OFU', 'Wallpapers\/[0-9]+', 'WallpapersHD', + 'WakeletLinkExpander', 'wangling', 'Wappalyzer', 'WatchMouse', @@ -1277,6 +1299,7 @@ class Crawlers extends AbstractProvider 'x09Mozilla', 'x22Mozilla', 'XaxisSemanticsClassifier', + 'XenForo\/', 'Xenu Link Sleuth', 'XING-contenttabreceiver', 'xpymep([0-9]?)\.exe', @@ -1294,6 +1317,7 @@ class Crawlers extends AbstractProvider 'yeti', 'Yo-yo', 'Yoleo Consumer', + 'yomins\.com', 'yoogliFetchAgent', 'YottaaMonitor', 'Your-Website-Sucks', @@ -1314,6 +1338,7 @@ class Crawlers extends AbstractProvider 'ZnHTTP', 'Zombie\.js', 'Zoom\.Mac', + 'ZoteroTranslationServer', 'ZyBorg', '[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)', ); diff --git a/tests/crawlers.txt b/tests/crawlers.txt index 428b0f2..3864f84 100644 --- a/tests/crawlers.txt +++ b/tests/crawlers.txt @@ -3533,4 +3533,31 @@ DHSH Ads Check DHSH Statistik Chec DHSH Law Check DHSH User Check -Mozilla/5.0+(compatible; Cula/2.0; https://cula.io/) \ No newline at end of file +Mozilla/5.0+(compatible; Cula/2.0; https://cula.io/) +Winds: Open Source RSS & Podcast app: https://getstream.io/winds/ +XenForo/2.x (https://www.recreationalflying.com) +TECHIAITH_CY (+https://techiaith.cymru) +ScopeContentAG-HTTP-Client www.thescope.com/0.1 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Lush Http Client +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36 The National Archives UK Government Web Archive: http://www.nationalarchives.gov.uk/webarchive/; webarchive@nationalarchives.gov.uk +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36 (compatible; NameFo; https://namefo.com/) +Mozilla/5.0 (Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36 (compatible; Statically-Screenshot; +https://statically.io/screenshot) +Mozilla/5.0 (compatible; Domains Project/1.0.8; +https://domainsproject.org) +Mozilla/5.0 (compatible; AffiliateLabz/1.0; +https://affiliatelabz.com/) +Mozilla/5.0 (compatible; 2GDPR/1.2; https://2gdpr.com) +Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1 +Microsoft.Data.Mashup (https://go.microsoft.com/fwlink/?LinkID=304225) +LinkPreview/1.5 (http://linkpreview.net) +https://gdnplus.com:Gather Analyze Provide. +Feedshow/x.0 (http://www.feedshow.com; 1 subscriber) +apimon.de/http-apis#redirect +ZoteroTranslationServer/WMF (mailto:services@lists.wikimedia.org) +yomins.com/0.3 +WakeletLinkExpander/0.1 +ultimate_sitemap_parser/0.5 +Swurl Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36 +postplanner.com/site-scraping +Nuclei (@pdiscoveryio) +myseosnapshot/1.0 +Contextual Code Sites Explorer +Corax - support@coraxcyber.com \ No newline at end of file