Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment ac rust/v11 #9911

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 61 additions & 0 deletions benches/profile-mpm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/bash

set -e
set -x

while getopts ":c:p:r:a:h" option; do
case $option in
h)
echo "$0 -c <suricata yaml> -p <pcap file> -r <rule file> -a <mpm algo(s)>"
echo
echo "Example:"
echo "$0 -c suricata.yaml -p /path/to/pcapfile -r /path/to/rulefile -a \"hs ac ac-ks\""
echo
echo "Run from your Suricata directory, with Suricata compiled with --enable-profiling"
exit 0
;;
a)
ALGOS="$OPTARG"
;;
c)
YAML=$OPTARG
;;
p)
PCAP=$OPTARG
;;
r)
RULES=$OPTARG
;;
esac
done
YAML=${YAML:-"suricata.yaml"}

if [ ! -f src/suricata ]; then
echo "ERROR src/suricata not found"
exit 1
fi
HAS_PROFILING=$(src/suricata --build-info|grep PROFILING|wc -l)
if [ $HAS_PROFILING -ne 1]; then
echo "ERROR suricata should be built with --enable-profiling"
exit 1
fi

if [ -z $ALGOS ] || [ -z $PCAP ] || [ -z $RULES ]; then
echo "ERROR need -a -p and -r"
exit 1
fi

for A in $ALGOS; do
for P in $PCAP; do
for R in $RULES; do
PCAP_BASE=$(basename $P)
RULE_BASE=$(basename $R)
DIRBASE="profile-mpm-$PCAP_BASE-$RULE_BASE"
DIRNAME="$DIRBASE-$A"
mkdir -p $DIRNAME
src/suricata -c $YAML -r $P -S $R --set mpm-algo=$A -l $DIRNAME --runmode=single -v --set profiling.packets.append=no --set profiling.prefilter.append=no
done
done
done

grep -E "DETECT_PF_PAYLOAD.*\s6\s " profile-mpm-*/packet_stats.log
1 change: 1 addition & 0 deletions rust/Cargo.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ brotli = "~3.4.0"
hkdf = "~0.12.3"
aes = "~0.7.5"
aes-gcm = "~0.9.4"
aho-corasick = "1.1.2"

der-parser = "~8.2.0"
kerberos-parser = { version = "~0.7.1", default_features = false }
Expand Down
17 changes: 17 additions & 0 deletions rust/src/common.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
/* Copyright (C) 2023 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

//! Utility library module for commonly used strings, hexadecimals and other elements.

use super::build_slice;
Expand Down
1 change: 1 addition & 0 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ pub mod plugin;
pub mod lzma;
pub mod util;
pub mod ffi;
pub mod mpm_ac_rs;
157 changes: 157 additions & 0 deletions rust/src/mpm_ac_rs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (C) 2023 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

/* Wrapper around the aho-corasick crate to expose to the Suricata's
* MPM API. */

use aho_corasick::AhoCorasick;

#[derive(Debug,Clone)]
struct AhoCorasickPatternData {
pat: Vec<u8>,
ci: bool,
offset: u16,
depth: u16,
sids: Vec<u32>,
}

impl AhoCorasickPatternData {
fn new(pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) -> Self {
Self { pat, ci, sids, offset, depth }
}
}

#[derive(Default)]
pub struct AhoCorasickStateBuilder {
/// vector of patterns. The final pattern id will depend on the position in this
/// vector, starting at 0.
patterns: Vec<Vec<u8>>,
/// Hash of patterns with their settings. Will be copied to AhoCorasickStateBuilder
/// in the prepare step.
pattern_data: Vec<AhoCorasickPatternData>,
/// track if we have case insensitive patterns. If so, we need to tell AC and
/// do a bit more work in validation.
has_ci: bool,
}

impl AhoCorasickStateBuilder {
fn new() -> Self {
Self { ..Default::default() }
}
fn add_pattern(&mut self, pat: Vec<u8>, ci: bool, sids: Vec<u32>, offset: u16, depth: u16) {
self.patterns.push(pat.clone());
if ci {
self.has_ci = true;
}
self.pattern_data.push(AhoCorasickPatternData::new(pat.clone(), ci, sids, offset, depth));
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_new_builder() -> *mut std::os::raw::c_void {
let state = AhoCorasickStateBuilder::new();
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_free_builder(state: *mut std::os::raw::c_void) {
let mut _state = Box::from_raw(state as *mut AhoCorasickStateBuilder);
}

#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_add_pattern(state: &mut AhoCorasickStateBuilder,
pat: *mut u8, pat_len: u16, sids: *mut u32, sids_len: u32, ci: bool, offset: u16, depth: u16) -> i32 {
let p = build_slice!(pat, pat_len as usize);
let s = build_slice!(sids, sids_len as usize);
state.add_pattern(p.to_vec(), ci, s.to_vec(), offset, depth);
return 0;
}

pub struct AhoCorasickState {
pattern_cnt: u32,
ac: AhoCorasick,
pattern_data: Vec<AhoCorasickPatternData>,
has_ci: bool,
}

impl AhoCorasickState {
/// build the AC state from the builder
fn prepare(builder: &AhoCorasickStateBuilder) -> Self {
let ac = AhoCorasick::builder()
.ascii_case_insensitive(builder.has_ci)
.build(&builder.patterns)
.unwrap();
Self { ac, has_ci: builder.has_ci, pattern_cnt: builder.patterns.len() as u32, pattern_data: builder.pattern_data.clone() }
}
}

#[no_mangle]
pub extern "C" fn rs_mpm_acrs_prepare_builder(builder: &AhoCorasickStateBuilder) -> *mut std::os::raw::c_void {
let state = AhoCorasickState::prepare(builder);
let boxed = Box::new(state);
return Box::into_raw(boxed) as *mut _;
}
#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_state_free(state: *mut std::os::raw::c_void) {
let mut _state = Box::from_raw(state as *mut AhoCorasickState);
}

/// Search for the patterns. Returns number of matches.
/// Per pattern found sids are only appended once.
#[no_mangle]
pub unsafe extern "C" fn rs_mpm_acrs_search(state: &AhoCorasickState, data: *const u8, data_len: u32,
cb: unsafe extern "C" fn(*mut std::os::raw::c_void, *const u32, u32),
cbdata: *mut std::os::raw::c_void) -> u32
{
let haystack = build_slice!(data, data_len as usize);
let mut match_cnt : u32 = 0;
// array of bools for patterns we found
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment is wrong, it's used as a bitarray now

let mut matches = vec![0u8; (state.pattern_cnt as usize / 8) + 1];
for mat in state.ac.find_overlapping_iter(haystack) {
let pat_id = mat.pattern().as_u32();
/* bail if we found this pattern before */
if matches[(pat_id / 8) as usize] & (1 << (pat_id % 8) as usize) != 0 {
SCLogDebug!("pattern {:?} already found", pat_id);
continue;
}

let pattern = &state.pattern_data[mat.pattern()];
if state.has_ci && !pattern.ci {
let found = &haystack[mat.start()..mat.end()];
if found != pattern.pat {
SCLogDebug!("pattern {:?} failed: not an exact match", pat_id);
continue;
}
}

/* enforce offset and depth */
if pattern.offset as usize > mat.start() {
SCLogDebug!("pattern {:?} failed: found before offset", pat_id);
continue;
}
if pattern.depth != 0 && mat.end() > pattern.depth as usize {
SCLogDebug!("pattern {:?} failed: after depth", pat_id);
continue;
}
matches[(pat_id / 8) as usize] |= 1 << (pat_id % 8) as usize;
SCLogDebug!("match! {:?}: {:?}", pat_id, pattern);
cb(cbdata, pattern.sids.as_ptr(), pattern.sids.len() as u32);
match_cnt += 1;
}
match_cnt
}
4 changes: 2 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,9 @@ noinst_HEADERS = \
util-mem.h \
util-memrchr.h \
util-misc.h \
util-mpm-ac-bs.h \
util-mpm-ac.h \
util-mpm-ac-ks.h \
util-mpm-ac-rs.h \
util-mpm.h \
util-mpm-hs.h \
util-napatech.h \
Expand Down Expand Up @@ -1177,10 +1177,10 @@ libsuricata_c_a_SOURCES = \
util-memcmp.c \
util-memrchr.c \
util-misc.c \
util-mpm-ac-bs.c \
util-mpm-ac.c \
util-mpm-ac-ks.c \
util-mpm-ac-ks-small.c \
util-mpm-ac-rs.c \
util-mpm.c \
util-mpm-hs.c \
util-napatech.c \
Expand Down
3 changes: 3 additions & 0 deletions src/detect-engine-mpm.c
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,9 @@ uint8_t PatternMatchDefaultMatcher(void)
#endif
if (strcmp("auto", mpm_algo) == 0) {
goto done;
} else if (strcmp("ac-bs", mpm_algo) == 0) {
SCLogWarning("mpm-algo \"ac-bs\" has been removed. See ticket #6586.");
goto done;
}
for (uint8_t u = 0; u < MPM_TABLE_SIZE; u++) {
if (mpm_table[u].name == NULL)
Expand Down
5 changes: 1 addition & 4 deletions src/detect-engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -2744,10 +2744,7 @@ static int DetectEngineCtxLoadConf(DetectEngineCtx *de_ctx)
/* for now, since we still haven't implemented any intelligence into
* understanding the patterns and distributing mpm_ctx across sgh */
if (de_ctx->mpm_matcher == MPM_AC || de_ctx->mpm_matcher == MPM_AC_KS ||
#ifdef BUILD_HYPERSCAN
de_ctx->mpm_matcher == MPM_HS ||
#endif
de_ctx->mpm_matcher == MPM_AC_BS) {
de_ctx->mpm_matcher == MPM_HS || de_ctx->mpm_matcher == MPM_AC_RS) {
de_ctx->sgh_mpm_ctx_cnf = ENGINE_SGH_MPM_FACTORY_CONTEXT_SINGLE;
} else {
de_ctx->sgh_mpm_ctx_cnf = ENGINE_SGH_MPM_FACTORY_CONTEXT_FULL;
Expand Down