Permalink
Cannot retrieve contributors at this time
#!/usr/bin/ruby | |
# | |
# Copyright (c) 2015 netnea, AG. (https://www.netnea.com/) | |
# | |
# A ruby script to analyse modsecurity core rules anomaly scores in | |
# STDIN. | |
# The data is then extracted and summarized in a statistical table | |
# that gives an overview over the anomaly scores in the data. | |
# | |
# This script was written by Christian Folini. Feel free to use it | |
# and to adopt it to your needs. | |
# | |
# Run with --help to get an usage overview. | |
# | |
# TODO | |
# - option to suppress lines / scores with 0 results / requests | |
# ----------------------------------------------------------- | |
# INIT | |
# ----------------------------------------------------------- | |
require "optparse" | |
require "date" | |
require "pp" | |
require "rubygems" | |
$params = Hash.new | |
$params[:verbose] = false | |
$params[:debug] = false | |
$params[:incoming] = true; | |
$params[:outgoing] = true; | |
$params[:headers] = true; | |
$params[:totals] = true; | |
$params[:empty] = true; | |
$params[:summary] = true; | |
$params[:baseline] = 0; # Number of requests with scores 0/0 to be added to stats | |
# ----------------------------------------------------------- | |
# SUB-FUNCTIONS (those that are specific to this script) | |
# ----------------------------------------------------------- | |
def read_stdin() | |
# Purpose: import data out of STDIN | |
# Input : none | |
# Output : Array of inbound scores, array ouf outbound scores | |
# Remarks: Empty values are mapped to nil, non-integer values are mapped to 0 | |
vprint "Starting to read STDIN" | |
def map_data(data,nils,stats) | |
if data == "-" or data == "" | |
nils = nils + 1 | |
else | |
stats << data.to_i | |
end | |
return nils, stats | |
end | |
stats_in = Array.new() | |
stats_out = Array.new() | |
nils_in = 0 | |
nils_out = 0 | |
n = 0 | |
formatcheck_ok = false | |
STDIN.each do |line| # we checked for STDIN in check parameter phase | |
n = n + 1 | |
dprint "Processing line ##{n}: #{line.chomp}" | |
begin | |
in_data, out_data = line.chomp.split(";") | |
unless formatcheck_ok | |
if in_data == "-" | |
# we accept the empty value "-" | |
formatcheck_ok = true | |
elsif in_data != in_data.to_i.to_s | |
puts_error("Input's first line indicates, input is not in CSV format as") | |
puts_error("explained by help text. This is fatal. Aborting.") | |
exit 1 | |
else | |
formatcheck_ok = true | |
end | |
end | |
nils_in, stats_in = map_data(in_data, nils_in, stats_in) | |
nils_out, stats_out = map_data(out_data, nils_out, stats_out) | |
rescue => detail | |
puts_error("Could not read line ##{n}: \"#{line.chomp}\". Ignoring.") | |
end | |
end | |
1.upto($params[:baseline]) do | |
stats_in << 0 | |
stats_out << 0 | |
end | |
vprint "Done reading STDIN (imported #{n} lines of data)" | |
return nils_in, stats_in, nils_out, stats_out | |
end | |
def print_stats_wrapper(nils_in, stats_in, nils_out, stats_out) | |
# Purpose: print statistics about anomaly score data | |
# Input : stats arrays, number of nil values | |
# Output : statistics to STDOUT | |
# Remarks: none | |
vprint "Starting to calculate and print statistics" | |
def avg(arr) | |
return arr.inject(0.0){ |sum, el| sum + el } / arr.size | |
end | |
def median(arr) | |
sorted = arr.sort | |
len = sorted.length | |
return (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0 | |
end | |
def sample_variance(arr) | |
avg=avg(arr) | |
sum=arr.inject(0){ |acc,i| acc + (i - avg)**2 } | |
return(1/arr.length.to_f*sum) | |
end | |
def standard_deviation(arr) | |
return Math.sqrt(sample_variance(arr)) | |
end | |
def print_stats(verb, nils, stats) | |
def round(f, n) | |
# The ruby "round" function before ruby 1.9 works differently, so we implement it ourselves | |
return (f * 10 ** n).to_i.to_f / 10 ** n | |
end | |
total = stats.length + nils | |
max = stats.max {|a,b| a <=> b } | |
if max | |
freq = Hash.new | |
0.upto(max) { |n| | |
freq[n] = stats.select{|x| x == n }.length | |
} | |
puts "#{verb.upcase} Num of req. | % of req. | Sum of % | Missing %" if $params[:headers] | |
printf("Number of %s req. (total) |%7i | %8.4f%% | %8.4f%% | %8.4f%%\n\n", verb, total, 100, 100, 0) if $params[:totals] | |
sum_perc = 0.0 | |
if $params[:empty] | |
perc = nils / total.to_f * 100 | |
sum_perc = sum_perc + perc | |
printf("Empty or miss. #{verb} score | %6d | %8.4f%% | %8.4f%% | %8.4f%%\n", nils, perc, sum_perc, 100 - sum_perc) | |
end | |
freq.sort_by{ |key, value| key }.each { |key, value| | |
perc = value / total.to_f * 100 | |
sum_perc = sum_perc + perc | |
printf("Reqs with #{verb} score of %3d | %6d | %8.4f%% | %8.4f%% | %8.4f%%\n", key, value, round(perc, 4), round(sum_perc, 4), 100 - round(sum_perc, 4)) | |
} | |
printf("\n#{verb.capitalize} average: %8.4f Median %8.4f Standard deviation %8.4f\n", avg(stats), median(stats), standard_deviation(stats)) if $params[:summary] | |
else | |
# Variable max is empty. This happens when no values were received for incoming or outgoing. | |
# So there is nothing to print really. But this is quite rare. | |
puts "No values received for #{verb.upcase}, only empty entries. Thus nothing to print." | |
puts "Please check your input." | |
end | |
puts if ($params[:incoming] && $params[:outgoing] && verb == "incoming" && $params[:outgoing]) | |
puts if ($params[:incoming] && $params[:outgoing] && verb == "incoming" && $params[:outgoing] && ($params[:header] || $params[:summary])) | |
end | |
print_stats("incoming", nils_in, stats_in) if $params[:incoming] | |
print_stats("outgoing", nils_out, stats_out) if $params[:outgoing] | |
vprint "Done printing statistics" | |
end | |
# ----------------------------------------------------------- | |
# GENERIC SUB-FUNCTIONS (those that come with every script) | |
# ----------------------------------------------------------- | |
def vprint(text) | |
# Purpose: output text if global variable $verbose is set. | |
# Input : String input | |
# Output : stdout | |
# Remarks: none | |
if $params[:verbose] | |
puts text + "\n" | |
end | |
end | |
def dprint(text) | |
# Purpose: output text if global variable $debug is set. | |
# Input : String input | |
# Output : stdout | |
# Remarks: none | |
if $params[:debug] | |
puts text + "\n" | |
end | |
end | |
def check_stdin () | |
# Purpose: Check for access to STDIN | |
# Input : none | |
# Output : bool | |
# Remarks: none | |
if STDIN.tty? | |
# no stdin | |
return false | |
else | |
# stdin | |
return true | |
end | |
end | |
def check_parameters() | |
# Purpose: check parameters | |
# Input : global variable params | |
# Output : stderr in case there is a problem with one of the parameters | |
# Return : true if there is an error with one of the parameters; or false in absence of errors | |
# Remarks: None | |
err_status = false | |
# unless /^foo$/.match($params["x"]) | |
# $stderr.puts "Error in parameter x ..." | |
# err_status = true | |
# end | |
unless check_stdin() | |
puts_error("No STDIN available. This is fatal. Aborting.", nil) | |
exit 1 | |
end | |
unless $params[:baseline].to_s.to_i == $params[:baseline] | |
puts_error("Baseline parameter is not integer. This is fatal. Aborting.", nil) | |
exit 1 | |
end | |
return err_status | |
end | |
def puts_error(msg, detail=nil) | |
# Purpose: Print error message | |
# Input : string msg and detail exception object | |
# Output : $stderr | |
# Return : None | |
# Remarks: There is a ruby exception class hierarchy. | |
# See http://makandracards.com/makandra/4851-ruby-exception-class-hierarchy | |
err_status = false | |
$stderr.puts msg | |
$stderr.puts "Error: #{detail.message}" if detail | |
$stderr.puts "Backtrace:" if detail | |
$stderr.puts detail.backtrace.join("\n") if detail | |
$stderr.puts "--------------------------" | |
end | |
# ----------------------------------------------------------- | |
# COMMAND LINE PARAMETER EXTRACTION | |
# ----------------------------------------------------------- | |
# | |
begin | |
parser = OptionParser.new do|opts| | |
opts.banner = <<EOF | |
A ruby script to analyse modsecurity anomaly scores in STDIN. | |
The data is extracted and summarized in a statistical table | |
that gives an overview over the anomaly scores in the data. | |
This script was written by Christian Folini and put into the | |
public domain. Feel free to use it and to adopt it to your needs. | |
Usage: #{__FILE__} [options] | |
EOF | |
opts.banner.gsub!(/^\t/, "") | |
opts.separator "" | |
opts.separator "Options:" | |
opts.on('-d', '--debug', 'Display debugging infos') do |none| | |
$params[:debug] = true; | |
end | |
opts.on('-b', '--baseline MAN', 'Indicate baseline of additional requests with score 0/0') do |baseline| | |
$params[:baseline_str] = baseline; | |
end | |
opts.on('-v', '--verbose', 'Be verbose') do |none| | |
$params[:verbose] = true; | |
end | |
opts.on('-i', '--incoming', 'Display only incoming statistics') do |none| | |
$params[:outgoing] = false; | |
end | |
opts.on('-o', '--outgoing', 'Display only outgoing statistics') do |none| | |
$params[:incoming] = false; | |
end | |
opts.on('-H', '--noheaders', 'Do not display column headers') do |none| | |
$params[:headers] = false; | |
end | |
opts.on('-T', '--nototal', 'Do not display total number of requests') do |none| | |
$params[:totals] = false; | |
end | |
opts.on('-E', '--noempty', 'Do not display number of empty values') do |none| | |
$params[:empty] = false; | |
end | |
opts.on('-S', '--nosummary', 'Do not display stat. summary (avg, median, etc.)') do |none| | |
$params[:summary] = false; | |
end | |
opts.on('-h', '--help', 'Displays Help') do | |
puts opts | |
exit | |
end | |
# Usage notes (to be printed in help text after cli options) | |
notes = <<EOF | |
Notes: | |
Input is supposed to by in CSV format, | |
one request with the two scores per line: | |
<incoming_anomaly_score>;<outgoing_anomaly_score> | |
I.e.: | |
0;0 | |
1;0 | |
0;0 | |
0;2 | |
12;3 | |
0;0 | |
2;0 | |
0;1 | |
2;5 | |
... | |
ATTENTION: Missing anomaly scores are excluded from the calculation | |
of the average, the median and the standard deviation. | |
You get this stream of scores by defining the webserver's access log | |
accordingly and then extract the data out of that format. | |
Note that you can add an additional baseline of STR requests to the | |
statistics. This makes sense if your STDIN comes without the requests | |
which did not trigger any rules, but you want to include them in | |
the calculation. | |
Example: | |
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \\ | |
%v %A %p %R %{BALANCER_WORKER_ROUTE}e \ %X \"%{cookie}n\" \\ | |
%{UNIQUE_ID}e %I %O %{ratio}n%% %D \\ | |
%{TX.perf_modsecinbound}M %{TX.perf_application}M %{TX.perf_modsecoutbound}M \\ | |
%{TX.INBOUND_ANOMALY_SCORE}M %{TX.OUTBOUND_ANOMALY_SCORE}M" extended | |
$> cat access.log | egrep -o "[0-9]+ [0-9]+$" | tr " " ";" | modsec-positive-stats.rb | |
INCOMING Num of req. | % of req. | Sum of % | Missing % | |
Number of incoming req. (total) | 10000 | 100.0000% | 100.0000% | 0.0000% | |
Empty or miss. incoming score | 0 | 0.0000% | 0.0000% | 100.0000% | |
Reqs with incoming score of 0 | 9970 | 99.7000% | 99.7000% | 0.3000% | |
Reqs with incoming score of 1 | 4 | 0.0400% | 99.7400% | 0.2600% | |
Reqs with incoming score of 2 | 21 | 0.2100% | 99.9500% | 0.0500% | |
Reqs with incoming score of 3 | 0 | 0.0000% | 99.9500% | 0.0500% | |
Reqs with incoming score of 4 | 4 | 0.0400% | 99.9900% | 0.0100% | |
Reqs with incoming score of 5 | 1 | 0.0100% | 100.0000% | 0.0000% | |
Incoming average: 0.0067 Median 0.0000 Standard deviation 0.1329 | |
OUTGOING Num of req. | % of req. | Sum of % | Missing % | |
Number of outgoing req. (total) | 10000 | 100.0000% | 100.0000% | 0.0000% | |
Empty or miss. outgoing score | 0 | 0.0000% | 0.0000% | 100.0000% | |
Reqs with outgoing score of 0 | 9997 | 99.9700% | 99.9700% | 0.0300% | |
Reqs with outgoing score of 1 | 0 | 0.0000% | 99.9700% | 0.0300% | |
Reqs with outgoing score of 2 | 0 | 0.0000% | 99.9700% | 0.0300% | |
Reqs with outgoing score of 3 | 0 | 0.0000% | 99.9700% | 0.0300% | |
Reqs with outgoing score of 4 | 2 | 0.0200% | 99.9900% | 0.0100% | |
Reqs with outgoing score of 5 | 1 | 0.0100% | 100.0000% | 0.0000% | |
Outgoing average: 0.0013 Median 0.0000 Standard deviation 0.0755 | |
Note that you can add an additional baseline of STR requests to the | |
statistics. This makes sense if your STDIN comes without the requests | |
with a score of 0/0, but you want to include them in the calculation | |
for the sake of a clean statistic. | |
Filtering out scores of 0/0 is very useful on big logfiles where script | |
takes a long time to run. | |
EOF | |
opts.on_tail(notes) | |
end | |
parser.parse! | |
unless $params[:baseline_str].nil? | |
if $params[:baseline_str].to_i.to_s != $params[:baseline_str] | |
$stderr.puts "Baseline parameter is not integer. This is fatal. Aborting." | |
exit 1 | |
else | |
$params[:baseline] = $params[:baseline_str].to_i | |
end | |
end | |
rescue OptionParser::InvalidOption => detail | |
puts_error("Invalid Option in command line parameter extraction. This is fatal. Aborting.", detail) | |
exit 1 | |
rescue => detail | |
puts_error("Unknown error in command line parameter extraction. This is fatal. Aborting.", detail) | |
exit 1 | |
end | |
# ----------------------------------------------------------- | |
# MAIN | |
# ----------------------------------------------------------- | |
vprint "Starting parameter checking" | |
exit 1 if (check_parameters) | |
vprint "Starting main program" | |
nils_in, stats_in, nils_out, stats_out = read_stdin() | |
print_stats_wrapper(nils_in, stats_in, nils_out, stats_out) | |
vprint "Done. Bailing out." | |