Skip to content

Commit

Permalink
Validate BingPreview the same way as BingBot.
Browse files Browse the repository at this point in the history
Also, skip all fake-browser tests for already-validated bots since they are pointless.
  • Loading branch information
Flameeyes committed Aug 23, 2017
1 parent 5b2a34c commit 688068d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 12 deletions.
4 changes: 2 additions & 2 deletions rules/flameeyes_15_robots_validation.conf
Expand Up @@ -65,8 +65,8 @@ SecRule REQUEST_HEADERS:User-Agent "@contains googlebot" \
SecRule REMOTE_HOST "!@endsWith .googlebot.com" "chain"
SecRule REQUEST_HEADERS:User-Agent "!^Googlebot-richsnippets"

SecRule REQUEST_HEADERS:User-Agent "@pm msnbot bingbot" \
"id:431501,t:lowercase,chain,deny,status:403,msg:'Fake msnbot/bingbot crawler.',phase:2"
SecRule REQUEST_HEADERS:User-Agent "@pm msnbot bingbot bingpreview" \
"id:431501,t:lowercase,chain,deny,status:403,msg:'Fake msnbot/bingbot/bingpreview crawler.',phase:2"
SecRule REMOTE_HOST "!(msnbot-\d+-\d+-\d+-\d+\.search\.msn\.com|\.msn\.net)$"

SecRule REQUEST_HEADERS:User-Agent "@contains yahoo! slurp" \
Expand Down
29 changes: 19 additions & 10 deletions rules/flameeyes_60_fake_browsers.conf
Expand Up @@ -12,6 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

# Checking if the browser provides all HTTP/1.1-compliant headers is
# usually very helpful; unfortunately a number of transparent proxies
# seem to mangle this badly, causing false positives. If using a
# proxy, bypass this section.

# A number of spambots tries to mimic known good User-Agent values,
# harvested either on honeypot sites, or through statistics viewers
# such as awstats. Often enough, they get them slightly wrong,
Expand Down Expand Up @@ -92,14 +97,16 @@ SecRule REQUEST_HEADERS:User-Agent "!@pm newsblur linkedinbot" "t:lowercase"
# HEADER CHECKING STARTS HERE #
###############################

# Checking if the browser provides all HTTP/1.1-compliant headers is
# usually very helpful; unfortunately a number of transparent proxies
# seem to mangle this badly, causing false positives. If using a
# proxy, bypass this section.

SecRule IP:FLAMEEYES_IS_PROXY "@eq 1" \
"id:436080,phase:2,skipAfter:FLAMEEYES_END_FAKE_BROWSERS_HEADERS,nolog"

# Don't try to validate GoogleBot, BingBot and other similar user
# agents because they will try to pass for browsers even when they are
# not. We already validate these bots to be coming from a FcRDNS so we
# don't need to apply more validation.
SecRule REQUEST_AGENT:User-Agent "@pm googlebot bingbot bingpreview" \
"id:436998,phase:1,skipAfter:FLAMEEYES_END_FAKE_BROWSERS_HEADERS,nolog"

# Unfortunately some of bing verification features try to pass for
# MSIE8 even though the request does not look like it.
SecRule REMOTE_HOST "^msnbot(?:-[0-9]+){4}.search.msn.com$" \
Expand Down Expand Up @@ -132,11 +139,13 @@ SecRule &REQUEST_HEADERS:Accept-Language "@eq 0" \
"id:436083,chain,phase:2,msg:'Missing Accept-Language header when passing as a browser',deny,status:403"
SecRule REQUEST_HEADERS:User-Agent "@pm safari opera" \
"t:lowercase,chain"
# Epiphany browser reports itself as Safari (and Chrome!) but has a
# much rougher HTTP implementation. Similarly dwb. GoogleBot reports
# itself as Mobile Safari, but uses a much reduced request, newsblur
# is also reporting itself as Safari.
SecRule REQUEST_HEADERS:User-Agent "!@pm dwb epiphany google newsblur s~feedly-social vienna bingbot" \
# A number of browsers appear to report themselves as Safari/Chrome,
# but with significantly rougher HTTP clients, namely Epiphany and
# dwb.
#
# Feed readers including NewsBlur, Feedly and Vienna appear to have
# similar bugs.
SecRule REQUEST_HEADERS:User-Agent "!@pm dwb epiphany newsblur s~feedly-social vienna" \
"t:lowercase"

# Sony PlayStation 3 systems will provide a further header that stay
Expand Down

0 comments on commit 688068d

Please sign in to comment.