diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 0f04045694..ce796d60ba 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -114,6 +114,7 @@ jobs: with: profile: minimal toolchain: stable + components: llvm-tools - name: Install and cache deps uses: awalsh128/cache-apt-pkgs-action@v1.1.0 with: diff --git a/.gitignore b/.gitignore index 47c99960d0..73b176e72d 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,8 @@ perf.data.old .vscode test.dict +.idea/ + # Ignore all built fuzzers fuzzer_* AFLplusplus diff --git a/Cargo.toml b/Cargo.toml index 82bda6dcfb..1f67e09493 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ members = [ "libafl_concolic/test/runtime_test", "libafl_derive", "libafl_frida", + "libafl_libfuzzer", "libafl_nyx", "libafl_qemu", "libafl_sugar", diff --git a/Dockerfile b/Dockerfile index 0ffa0438e3..9c0dcc9c31 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,6 +78,10 @@ COPY scripts/dummy.rs libafl_nyx/src/lib.rs COPY libafl_tinyinst/Cargo.toml libafl_tinyinst/ COPY scripts/dummy.rs libafl_tinyinst/src/lib.rs +# avoid pulling in the runtime, as this is quite an expensive build, until later +COPY libafl_libfuzzer/Cargo.toml libafl_libfuzzer/ +COPY scripts/dummy.rs libafl_libfuzzer/src/lib.rs + COPY utils utils RUN cargo build && cargo build --release @@ -117,6 +121,10 @@ COPY libafl_concolic/symcc_runtime libafl_concolic/symcc_runtime COPY libafl_concolic/test libafl_concolic/test COPY libafl_nyx/src libafl_nyx/src RUN touch libafl_nyx/src/lib.rs +COPY libafl_libfuzzer/src libafl_libfuzzer/src +COPY libafl_libfuzzer/libafl_libfuzzer_runtime libafl_libfuzzer/libafl_libfuzzer_runtime +COPY libafl_libfuzzer/build.rs libafl_libfuzzer/build.rs +RUN touch libafl_libfuzzer/src/lib.rs RUN cargo build && cargo build --release # Copy fuzzers over diff --git a/fuzzers/baby_fuzzer_wasm/pkg/package.json b/fuzzers/baby_fuzzer_wasm/pkg/package.json index 9a87bd8141..cdfba2f3b8 100644 --- a/fuzzers/baby_fuzzer_wasm/pkg/package.json +++ b/fuzzers/baby_fuzzer_wasm/pkg/package.json @@ -11,5 +11,7 @@ ], "module": "baby_fuzzer_wasm.js", "types": "baby_fuzzer_wasm.d.ts", - "sideEffects": false + "sideEffects": [ + "./snippets/*" + ] } \ No newline at end of file diff --git a/fuzzers/baby_fuzzer_wasm/src/lib.rs b/fuzzers/baby_fuzzer_wasm/src/lib.rs index 5c4db77cd3..434dcb7329 100644 --- a/fuzzers/baby_fuzzer_wasm/src/lib.rs +++ b/fuzzers/baby_fuzzer_wasm/src/lib.rs @@ -4,7 +4,7 @@ use libafl::{ corpus::{Corpus, InMemoryCorpus}, events::SimpleEventManager, executors::{ExitKind, InProcessExecutor}, - feedbacks::{CrashFeedback, MaxMapFeedback}, + feedbacks::{CrashFeedback, MapFeedbackMetadata, MaxMapFeedback}, generators::RandPrintablesGenerator, inputs::{BytesInput, HasTargetBytes}, monitors::SimpleMonitor, @@ -15,7 +15,9 @@ use libafl::{ state::{HasSolutions, StdState}, Fuzzer, StdFuzzer, }; -use libafl_bolts::{current_nanos, rands::StdRand, tuples::tuple_list, AsSlice}; +use libafl_bolts::{ + current_nanos, rands::StdRand, serdeany::RegistryBuilder, tuples::tuple_list, AsSlice, +}; use wasm_bindgen::prelude::*; use web_sys::{Performance, Window}; @@ -37,6 +39,10 @@ pub extern "C" fn external_current_millis() -> u64 { pub fn fuzz() { set_panic_hook(); + unsafe { + RegistryBuilder::register::>(); + } + let mut signals = [0u8; 64]; let signals_ptr = signals.as_mut_ptr(); let signals_set = |i: usize| unsafe { diff --git a/fuzzers/fuzzbench/src/lib.rs b/fuzzers/fuzzbench/src/lib.rs index e92350ba6b..6c29839798 100644 --- a/fuzzers/fuzzbench/src/lib.rs +++ b/fuzzers/fuzzbench/src/lib.rs @@ -60,7 +60,7 @@ use nix::{self, unistd::dup}; pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let res = match Command::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) diff --git a/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs b/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs index dc771b9dac..9f3b940f39 100644 --- a/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs +++ b/fuzzers/fuzzbench_fork_qemu/src/fuzzer.rs @@ -60,7 +60,7 @@ use nix::{self, unistd::dup}; pub fn main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let res = match Command::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) diff --git a/fuzzers/fuzzbench_qemu/src/fuzzer.rs b/fuzzers/fuzzbench_qemu/src/fuzzer.rs index 0a8b9363cb..ce0a304186 100644 --- a/fuzzers/fuzzbench_qemu/src/fuzzer.rs +++ b/fuzzers/fuzzbench_qemu/src/fuzzer.rs @@ -69,7 +69,7 @@ pub const MAX_INPUT_SIZE: usize = 1048576; // 1MB pub fn main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let res = match Command::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) diff --git a/fuzzers/fuzzbench_text/src/lib.rs b/fuzzers/fuzzbench_text/src/lib.rs index fb04caa021..7f32121773 100644 --- a/fuzzers/fuzzbench_text/src/lib.rs +++ b/fuzzers/fuzzbench_text/src/lib.rs @@ -67,7 +67,7 @@ use nix::{self, unistd::dup}; pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let res = match Command::new(env!("CARGO_PKG_NAME")) .version(env!("CARGO_PKG_VERSION")) diff --git a/fuzzers/libafl_atheris/src/lib.rs b/fuzzers/libafl_atheris/src/lib.rs index b284d4fc9a..0ed39741e3 100644 --- a/fuzzers/libafl_atheris/src/lib.rs +++ b/fuzzers/libafl_atheris/src/lib.rs @@ -110,7 +110,7 @@ pub extern "C" fn LLVMFuzzerRunDriver( ) { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } assert!(harness_fn.is_some(), "No harness callback provided"); let harness_fn = harness_fn.unwrap(); diff --git a/fuzzers/libfuzzer_libmozjpeg/src/lib.rs b/fuzzers/libfuzzer_libmozjpeg/src/lib.rs index 24772b5e5a..458aab5cbd 100644 --- a/fuzzers/libfuzzer_libmozjpeg/src/lib.rs +++ b/fuzzers/libfuzzer_libmozjpeg/src/lib.rs @@ -45,7 +45,7 @@ extern "C" { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_libpng/src/lib.rs b/fuzzers/libfuzzer_libpng/src/lib.rs index c229129b0a..0605ec288d 100644 --- a/fuzzers/libfuzzer_libpng/src/lib.rs +++ b/fuzzers/libfuzzer_libpng/src/lib.rs @@ -44,7 +44,7 @@ use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input, EDGES_MAP, pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_libpng_accounting/src/lib.rs b/fuzzers/libfuzzer_libpng_accounting/src/lib.rs index 58bdbd92ea..c91ec89663 100644 --- a/fuzzers/libfuzzer_libpng_accounting/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_accounting/src/lib.rs @@ -115,7 +115,7 @@ struct Opt { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/libfuzzer_libpng_centralized/src/lib.rs b/fuzzers/libfuzzer_libpng_centralized/src/lib.rs index a1759c7cd7..4a1bbe5360 100644 --- a/fuzzers/libfuzzer_libpng_centralized/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_centralized/src/lib.rs @@ -115,7 +115,7 @@ pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/libfuzzer_libpng_cmin/src/lib.rs b/fuzzers/libfuzzer_libpng_cmin/src/lib.rs index 6c58d8625a..ad88f42e47 100644 --- a/fuzzers/libfuzzer_libpng_cmin/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_cmin/src/lib.rs @@ -47,7 +47,7 @@ use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input, std_edges_m pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_libpng_ctx/src/lib.rs b/fuzzers/libfuzzer_libpng_ctx/src/lib.rs index bdb22db1e8..01e897a955 100644 --- a/fuzzers/libfuzzer_libpng_ctx/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_ctx/src/lib.rs @@ -110,7 +110,7 @@ struct Opt { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/libfuzzer_libpng_launcher/src/lib.rs b/fuzzers/libfuzzer_libpng_launcher/src/lib.rs index 616341b9da..60ef106ba8 100644 --- a/fuzzers/libfuzzer_libpng_launcher/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_launcher/src/lib.rs @@ -113,7 +113,7 @@ struct Opt { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/libfuzzer_libpng_norestart/src/lib.rs b/fuzzers/libfuzzer_libpng_norestart/src/lib.rs index 1268009c9c..1aeb3cd914 100644 --- a/fuzzers/libfuzzer_libpng_norestart/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_norestart/src/lib.rs @@ -131,7 +131,7 @@ struct Opt { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/libfuzzer_libpng_tcp_manager/src/lib.rs b/fuzzers/libfuzzer_libpng_tcp_manager/src/lib.rs index 6c19c485b3..3125b062ac 100644 --- a/fuzzers/libfuzzer_libpng_tcp_manager/src/lib.rs +++ b/fuzzers/libfuzzer_libpng_tcp_manager/src/lib.rs @@ -43,7 +43,7 @@ use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input, EDGES_MAP, pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_reachability/src/lib.rs b/fuzzers/libfuzzer_reachability/src/lib.rs index d1f4de5e41..1093888e3b 100644 --- a/fuzzers/libfuzzer_reachability/src/lib.rs +++ b/fuzzers/libfuzzer_reachability/src/lib.rs @@ -35,7 +35,7 @@ extern "C" { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_stb_image/src/main.rs b/fuzzers/libfuzzer_stb_image/src/main.rs index cb395dfca4..913eeaf7ad 100644 --- a/fuzzers/libfuzzer_stb_image/src/main.rs +++ b/fuzzers/libfuzzer_stb_image/src/main.rs @@ -33,7 +33,7 @@ use libafl_targets::{ pub fn main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/libfuzzer_stb_image_concolic/fuzzer/src/main.rs b/fuzzers/libfuzzer_stb_image_concolic/fuzzer/src/main.rs index 5c09379419..faa6b74014 100644 --- a/fuzzers/libfuzzer_stb_image_concolic/fuzzer/src/main.rs +++ b/fuzzers/libfuzzer_stb_image_concolic/fuzzer/src/main.rs @@ -63,7 +63,7 @@ struct Opt { pub fn main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); diff --git a/fuzzers/libfuzzer_stb_image_sugar/src/main.rs b/fuzzers/libfuzzer_stb_image_sugar/src/main.rs index c9c1e65ab7..ef4f169fd1 100644 --- a/fuzzers/libfuzzer_stb_image_sugar/src/main.rs +++ b/fuzzers/libfuzzer_stb_image_sugar/src/main.rs @@ -13,7 +13,7 @@ use libafl_targets::{libfuzzer_initialize, libfuzzer_test_one_input}; pub fn main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/fuzzers/nautilus_sync/src/lib.rs b/fuzzers/nautilus_sync/src/lib.rs index 54d7bb3ab1..e4f8c95c3c 100644 --- a/fuzzers/nautilus_sync/src/lib.rs +++ b/fuzzers/nautilus_sync/src/lib.rs @@ -102,7 +102,7 @@ struct Opt { pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } let opt = Opt::parse(); let broker_port = opt.broker_port; diff --git a/fuzzers/tutorial/src/lib.rs b/fuzzers/tutorial/src/lib.rs index 10c7ff9a76..53552376f5 100644 --- a/fuzzers/tutorial/src/lib.rs +++ b/fuzzers/tutorial/src/lib.rs @@ -39,7 +39,7 @@ use metadata::{PacketLenFeedback, PacketLenMinimizerScheduler}; pub extern "C" fn libafl_main() { // Registry the metadata types used in this fuzzer // Needed only on no_std - //RegistryBuilder::register::(); + // unsafe { RegistryBuilder::register::(); } println!( "Workdir: {:?}", diff --git a/libafl/src/corpus/cached.rs b/libafl/src/corpus/cached.rs index fcfe09fe3a..c960a58a23 100644 --- a/libafl/src/corpus/cached.rs +++ b/libafl/src/corpus/cached.rs @@ -1,6 +1,6 @@ //! The [`CachedOnDiskCorpus`] stores [`Testcase`]s to disk, keeping a subset of them in memory/cache, evicting in a FIFO manner. -use alloc::collections::vec_deque::VecDeque; +use alloc::{collections::vec_deque::VecDeque, string::String}; use core::cell::RefCell; use std::path::Path; @@ -193,7 +193,7 @@ where pub fn with_meta_format

( dir_path: P, cache_max_len: usize, - meta_format: OnDiskMetadataFormat, + meta_format: Option, ) -> Result where P: AsRef, @@ -204,6 +204,31 @@ where ) } + /// Creates the [`CachedOnDiskCorpus`] specifying the metadata format and the prefix to prepend + /// to each testcase. + /// + /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. + pub fn with_meta_format_and_prefix

( + dir_path: P, + cache_max_len: usize, + meta_format: Option, + prefix: Option, + locking: bool, + ) -> Result + where + P: AsRef, + { + Self::_new( + InMemoryOnDiskCorpus::with_meta_format_and_prefix( + dir_path, + meta_format, + prefix, + locking, + )?, + cache_max_len, + ) + } + /// Internal constructor `fn` fn _new(on_disk_corpus: InMemoryOnDiskCorpus, cache_max_len: usize) -> Result { if cache_max_len == 0 { @@ -217,6 +242,11 @@ where cache_max_len, }) } + + /// Fetch the inner corpus + pub fn inner(&self) -> &InMemoryOnDiskCorpus { + &self.inner + } } /// ``CachedOnDiskCorpus`` Python bindings diff --git a/libafl/src/corpus/inmemory_ondisk.rs b/libafl/src/corpus/inmemory_ondisk.rs index 3fa7aecde8..73a28b4846 100644 --- a/libafl/src/corpus/inmemory_ondisk.rs +++ b/libafl/src/corpus/inmemory_ondisk.rs @@ -50,6 +50,8 @@ where inner: InMemoryCorpus, dir_path: PathBuf, meta_format: Option, + prefix: Option, + locking: bool, } impl UsesInput for InMemoryOnDiskCorpus @@ -209,7 +211,12 @@ where where P: AsRef, { - Self::_new(dir_path.as_ref(), Some(OnDiskMetadataFormat::JsonPretty)) + Self::_new( + dir_path.as_ref(), + Some(OnDiskMetadataFormat::JsonPretty), + None, + true, + ) } /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. @@ -217,12 +224,28 @@ where /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. pub fn with_meta_format

( dir_path: P, - meta_format: OnDiskMetadataFormat, + meta_format: Option, ) -> Result where P: AsRef, { - Self::_new(dir_path.as_ref(), Some(meta_format)) + Self::_new(dir_path.as_ref(), meta_format, None, true) + } + + /// Creates the [`InMemoryOnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk + /// and the prefix for the filenames. + /// + /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. + pub fn with_meta_format_and_prefix

( + dir_path: P, + meta_format: Option, + prefix: Option, + locking: bool, + ) -> Result + where + P: AsRef, + { + Self::_new(dir_path.as_ref(), meta_format, prefix, locking) } /// Creates an [`InMemoryOnDiskCorpus`] that will not store .metadata files @@ -232,16 +255,27 @@ where where P: AsRef, { - Self::_new(dir_path.as_ref(), None) + Self::_new(dir_path.as_ref(), None, None, true) } /// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format` - fn _new(dir_path: &Path, meta_format: Option) -> Result { - fs::create_dir_all(dir_path)?; + fn _new( + dir_path: &Path, + meta_format: Option, + prefix: Option, + locking: bool, + ) -> Result { + match fs::create_dir_all(dir_path) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {} + Err(e) => return Err(e.into()), + } Ok(InMemoryOnDiskCorpus { inner: InMemoryCorpus::new(), dir_path: dir_path.into(), meta_format, + prefix, + locking, }) } @@ -265,19 +299,21 @@ where return Ok(()); } - let new_lock_filename = format!(".{new_filename}.lafl_lock"); + if self.locking { + let new_lock_filename = format!(".{new_filename}.lafl_lock"); - // Try to create lock file for new testcases - if OpenOptions::new() - .create(true) - .write(true) - .open(self.dir_path.join(new_lock_filename)) - .is_err() - { - *testcase.filename_mut() = Some(old_filename); - return Err(Error::illegal_state( - "unable to create lock file for new testcase", - )); + // Try to create lock file for new testcases + if OpenOptions::new() + .create(true) + .write(true) + .open(self.dir_path.join(new_lock_filename)) + .is_err() + { + *testcase.filename_mut() = Some(old_filename); + return Err(Error::illegal_state( + "unable to create lock file for new testcase", + )); + } } let new_file_path = self.dir_path.join(&new_filename); @@ -311,18 +347,15 @@ where fn save_testcase(&self, testcase: &mut Testcase, idx: CorpusId) -> Result<(), Error> { let file_name_orig = testcase.filename_mut().take().unwrap_or_else(|| { // TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL) - testcase.input().as_ref().unwrap().generate_name(idx.0) }); - if testcase.file_path().is_some() { - // We already have a valid path, no need to do calculate anything - *testcase.filename_mut() = Some(file_name_orig); - } else { - // New testcase, we need to save it. - let mut file_name = file_name_orig.clone(); - let mut ctr = 2; - let file_name = loop { + // New testcase, we need to save it. + let mut file_name = file_name_orig.clone(); + + let mut ctr = 2; + let file_name = if self.locking { + loop { let lockfile_name = format!(".{file_name}.lafl_lock"); let lockfile_path = self.dir_path.join(lockfile_name); @@ -337,11 +370,19 @@ where file_name = format!("{file_name_orig}-{ctr}"); ctr += 1; - }; + } + } else { + file_name + }; + if testcase + .file_path() + .as_ref() + .map_or(true, |path| !path.starts_with(&self.dir_path)) + { *testcase.file_path_mut() = Some(self.dir_path.join(&file_name)); - *testcase.filename_mut() = Some(file_name); } + *testcase.filename_mut() = Some(file_name); if self.meta_format.is_some() { let metafile_name = format!(".{}.metadata", testcase.filename().as_ref().unwrap()); @@ -389,6 +430,12 @@ where } Ok(()) } + + /// Path to the corpus directory associated with this corpus + #[must_use] + pub fn dir_path(&self) -> &PathBuf { + &self.dir_path + } } #[cfg(feature = "python")] diff --git a/libafl/src/corpus/minimizer.rs b/libafl/src/corpus/minimizer.rs index 35dcb01ef4..cdea397290 100644 --- a/libafl/src/corpus/minimizer.rs +++ b/libafl/src/corpus/minimizer.rs @@ -8,16 +8,18 @@ use alloc::{ use core::{hash::Hash, marker::PhantomData}; use hashbrown::{HashMap, HashSet}; -use libafl_bolts::{tuples::MatchName, AsIter, Named}; +use libafl_bolts::{current_time, tuples::MatchName, AsIter, Named}; use num_traits::ToPrimitive; use z3::{ast::Bool, Config, Context, Optimize}; use crate::{ corpus::Corpus, + events::{Event, EventFirer, LogSeverity}, executors::{Executor, HasObservers}, + monitors::UserStats, observers::{MapObserver, ObserversTuple}, schedulers::{LenTimeMulTestcaseScore, RemovableScheduler, Scheduler, TestcaseScore}, - state::{HasCorpus, HasMetadata, UsesState}, + state::{HasCorpus, HasExecutions, HasMetadata, UsesState}, Error, HasScheduler, }; @@ -39,7 +41,7 @@ where where E: Executor + HasObservers, CS: Scheduler + RemovableScheduler, // schedulers that has on_remove/on_replace only! - EM: UsesState, + EM: EventFirer, Z: HasScheduler; } @@ -84,10 +86,11 @@ impl CorpusMinimizer for MapCorpusMinimizer where E: UsesState, for<'a> O: MapObserver + AsIter<'a, Item = T>, - E::State: HasMetadata + HasCorpus, + E::State: HasMetadata + HasCorpus + HasExecutions, T: Copy + Hash + Eq, TS: TestcaseScore, { + #[allow(clippy::too_many_lines)] fn minimize( &self, fuzzer: &mut Z, @@ -98,7 +101,7 @@ where where E: Executor + HasObservers, CS: Scheduler + RemovableScheduler, - EM: UsesState, + EM: EventFirer, Z: HasScheduler, { let cfg = Config::default(); @@ -109,6 +112,15 @@ where let mut cov_map = HashMap::new(); let mut cur_id = state.corpus().first(); + + manager.log( + state, + LogSeverity::Info, + "Executing each input...".to_string(), + )?; + + let total = state.corpus().count() as u64; + let mut curr = 0; while let Some(idx) = cur_id { let (weight, input) = { let mut testcase = state.corpus().get(idx)?.borrow_mut(); @@ -130,6 +142,29 @@ where .observers_mut() .post_exec_all(state, &input, &kind)?; + *state.executions_mut() += 1; + let executions = *state.executions(); + + curr += 1; + + manager.fire( + state, + Event::UpdateUserStats { + name: "minimisation exec pass".to_string(), + value: UserStats::Ratio(curr, total), + phantom: PhantomData, + }, + )?; + + manager.fire( + state, + Event::UpdateExecStats { + time: current_time(), + phantom: PhantomData, + executions, + }, + )?; + let seed_expr = Bool::fresh_const(&ctx, "seed"); let obs: &O = executor .observers() @@ -155,6 +190,12 @@ where cur_id = state.corpus().next(idx); } + manager.log( + state, + LogSeverity::Info, + "Preparing Z3 assertions...".to_string(), + )?; + for (_, cov) in cov_map { for (_, seeds) in cov { // At least one seed for each hit count of each coverage map index @@ -176,6 +217,7 @@ where opt.assert_soft(&!seed, *weight, None); } + manager.log(state, LogSeverity::Info, "Performing MaxSAT...".to_string())?; // Perform the optimization! opt.check(&[]); diff --git a/libafl/src/corpus/ondisk.rs b/libafl/src/corpus/ondisk.rs index 95145830cb..e7b7cccada 100644 --- a/libafl/src/corpus/ondisk.rs +++ b/libafl/src/corpus/ondisk.rs @@ -4,6 +4,7 @@ //! For any other occasions, consider using [`crate::corpus::CachedOnDiskCorpus`] //! which stores a certain number of testcases in memory and removes additional ones in a FIFO manner. +use alloc::string::String; use core::{cell::RefCell, time::Duration}; use std::path::{Path, PathBuf}; @@ -34,7 +35,6 @@ pub enum OnDiskMetadataFormat { } /// The [`Testcase`] metadata that'll be stored to disk -#[cfg(feature = "std")] #[derive(Debug, Serialize)] pub struct OnDiskMetadata<'a> { /// The dynamic metadata [`SerdeAnyMap`] stored to disk @@ -48,7 +48,6 @@ pub struct OnDiskMetadata<'a> { /// A corpus able to store [`Testcase`]s to disk, and load them from disk, when they are being used. /// /// Metadata is written to a `..metadata` file in the same folder by default. -#[cfg(feature = "std")] #[derive(Default, Serialize, Deserialize, Clone, Debug)] #[serde(bound = "I: serde::de::DeserializeOwned")] pub struct OnDiskCorpus @@ -188,7 +187,27 @@ where where P: AsRef, { - Self::_new(dir_path.as_ref(), OnDiskMetadataFormat::JsonPretty) + Self::with_meta_format_and_prefix( + dir_path.as_ref(), + Some(OnDiskMetadataFormat::JsonPretty), + None, + true, + ) + } + + /// Creates the [`OnDiskCorpus`] with a filename prefix. + /// + /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. + pub fn with_prefix

(dir_path: P, prefix: Option) -> Result + where + P: AsRef, + { + Self::with_meta_format_and_prefix( + dir_path.as_ref(), + Some(OnDiskMetadataFormat::JsonPretty), + prefix, + true, + ) } /// Creates the [`OnDiskCorpus`] specifying the format in which `Metadata` will be saved to disk. @@ -201,16 +220,45 @@ where where P: AsRef, { - Self::_new(dir_path.as_ref(), meta_format) + Self::with_meta_format_and_prefix(dir_path.as_ref(), Some(meta_format), None, true) + } + + /// Creates an [`OnDiskCorpus`] that will not store .metadata files + /// + /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. + pub fn no_meta

(dir_path: P) -> Result + where + P: AsRef, + { + Self::with_meta_format_and_prefix(dir_path.as_ref(), None, None, true) } - /// Private fn to crate a new corpus at the given (non-generic) path with the given optional `meta_format` - fn _new(dir_path: &Path, meta_format: OnDiskMetadataFormat) -> Result { + /// Creates a new corpus at the given (non-generic) path with the given optional `meta_format` + /// and `prefix`. + /// + /// Will error, if [`std::fs::create_dir_all()`] failed for `dir_path`. + pub fn with_meta_format_and_prefix( + dir_path: &Path, + meta_format: Option, + prefix: Option, + locking: bool, + ) -> Result { Ok(OnDiskCorpus { dir_path: dir_path.into(), - inner: CachedOnDiskCorpus::with_meta_format(dir_path, 1, meta_format)?, + inner: CachedOnDiskCorpus::with_meta_format_and_prefix( + dir_path, + 1, + meta_format, + prefix, + locking, + )?, }) } + + /// Path to the corpus directory associated with this corpus + pub fn dir_path(&self) -> &PathBuf { + &self.dir_path + } } #[cfg(feature = "python")] diff --git a/libafl/src/corpus/testcase.rs b/libafl/src/corpus/testcase.rs index c8c5368bc9..63f9eb0e56 100644 --- a/libafl/src/corpus/testcase.rs +++ b/libafl/src/corpus/testcase.rs @@ -204,7 +204,17 @@ where input.wrapped_as_testcase(); Self { input: Some(input), - ..Testcase::default() + filename: None, + #[cfg(feature = "std")] + file_path: None, + metadata: SerdeAnyMap::default(), + #[cfg(feature = "std")] + metadata_path: None, + exec_time: None, + cached_len: None, + executions: 0, + scheduled_count: 0, + parent_id: None, } } @@ -212,10 +222,19 @@ where /// that this [`Testcase`] was derived from on creation pub fn with_parent_id(mut input: I, parent_id: CorpusId) -> Self { input.wrapped_as_testcase(); - Self { + Testcase { input: Some(input), + filename: None, + #[cfg(feature = "std")] + file_path: None, + metadata: SerdeAnyMap::default(), + #[cfg(feature = "std")] + metadata_path: None, + exec_time: None, + cached_len: None, + executions: 0, + scheduled_count: 0, parent_id: Some(parent_id), - ..Testcase::default() } } @@ -226,7 +245,16 @@ where Self { input: Some(input), filename: Some(filename), - ..Testcase::default() + #[cfg(feature = "std")] + file_path: None, + metadata: SerdeAnyMap::default(), + #[cfg(feature = "std")] + metadata_path: None, + exec_time: None, + cached_len: None, + executions: 0, + scheduled_count: 0, + parent_id: None, } } @@ -236,8 +264,17 @@ where input.wrapped_as_testcase(); Self { input: Some(input), + filename: None, + #[cfg(feature = "std")] + file_path: None, + metadata: SerdeAnyMap::default(), + #[cfg(feature = "std")] + metadata_path: None, + exec_time: None, + cached_len: None, executions, - ..Testcase::default() + scheduled_count: 0, + parent_id: None, } } @@ -436,6 +473,21 @@ impl SchedulerTestcaseMetadata { libafl_bolts::impl_serdeany!(SchedulerTestcaseMetadata); +#[cfg(feature = "std")] +impl Drop for Testcase +where + I: Input, +{ + fn drop(&mut self) { + if let Some(filename) = &self.filename { + let mut path = PathBuf::from(filename); + let lockname = format!(".{}.lafl_lock", path.file_name().unwrap().to_str().unwrap()); + path.set_file_name(lockname); + let _ = std::fs::remove_file(path); + } + } +} + #[cfg(feature = "python")] #[allow(missing_docs)] /// `Testcase` Python bindings diff --git a/libafl/src/executors/inprocess.rs b/libafl/src/executors/inprocess.rs index 591dd9b9b6..d773ca7e62 100644 --- a/libafl/src/executors/inprocess.rs +++ b/libafl/src/executors/inprocess.rs @@ -847,8 +847,6 @@ mod unix_signal_handler { ExitKind::Timeout, ); - event_mgr.await_restart_safe(); - libc::_exit(55); } diff --git a/libafl/src/feedbacks/map.rs b/libafl/src/feedbacks/map.rs index c8d3c8ec85..681b982fe6 100644 --- a/libafl/src/feedbacks/map.rs +++ b/libafl/src/feedbacks/map.rs @@ -322,7 +322,7 @@ where libafl_bolts::impl_serdeany!( MapFeedbackMetadata, - ,,,,,,,,,,, + ,,,,,,,,,,,, ); impl MapFeedbackMetadata @@ -456,6 +456,10 @@ where .named_metadata_map_mut() .get_mut::>(&self.name) .unwrap(); + let len = observer.len(); + if map_state.history_map.len() < len { + map_state.history_map.resize(len, observer.initial()); + } let history_map = map_state.history_map.as_mut_slice(); if self.indexes { diff --git a/libafl/src/fuzzer/mod.rs b/libafl/src/fuzzer/mod.rs index 9717011593..174c70d27c 100644 --- a/libafl/src/fuzzer/mod.rs +++ b/libafl/src/fuzzer/mod.rs @@ -338,9 +338,9 @@ where /// Evaluate if a set of observation channels has an interesting state fn process_execution( &mut self, - state: &mut CS::State, + state: &mut Self::State, manager: &mut EM, - input: ::Input, + input: ::Input, observers: &OT, exit_kind: &ExitKind, send_events: bool, @@ -489,10 +489,10 @@ where #[inline] fn evaluate_input_events( &mut self, - state: &mut CS::State, + state: &mut Self::State, executor: &mut E, manager: &mut EM, - input: ::Input, + input: ::Input, send_events: bool, ) -> Result<(ExecuteInputResult, Option), Error> { self.evaluate_input_with_observers(state, executor, manager, input, send_events) @@ -501,26 +501,46 @@ where /// Adds an input, even if it's not considered `interesting` by any of the executors fn add_input( &mut self, - state: &mut CS::State, + state: &mut Self::State, executor: &mut E, manager: &mut EM, - input: ::Input, + input: ::Input, ) -> Result { let exit_kind = self.execute_input(state, executor, manager, &input)?; let observers = executor.observers(); // Always consider this to be "interesting" + let mut testcase = Testcase::with_executions(input.clone(), *state.executions()); - // However, we still want to trigger the side effects of objectives and feedbacks. + // Maybe a solution #[cfg(not(feature = "introspection"))] - let _is_solution = self + let is_solution = self .objective_mut() .is_interesting(state, manager, &input, observers, &exit_kind)?; #[cfg(feature = "introspection")] - let _is_solution = self + let is_solution = self .objective_mut() .is_interesting_introspection(state, manager, &input, observers, &exit_kind)?; + if is_solution { + self.objective_mut() + .append_metadata(state, observers, &mut testcase)?; + let idx = state.solutions_mut().add(testcase)?; + + manager.fire( + state, + Event::Objective { + objective_size: state.solutions().count(), + }, + )?; + return Ok(idx); + } + + // Not a solution + self.objective_mut().discard_metadata(state, &input)?; + + // several is_interesting implementations collect some data about the run, later used in + // append_metadata; we *must* invoke is_interesting here to collect it #[cfg(not(feature = "introspection"))] let _is_corpus = self .feedback_mut() @@ -531,17 +551,7 @@ where .feedback_mut() .is_interesting_introspection(state, manager, &input, observers, &exit_kind)?; - // Not a solution - self.objective_mut().discard_metadata(state, &input)?; - - // several is_interesting implementations collect some data about the run, later used in - // append_metadata; we *must* invoke is_interesting here to collect it - let _: bool = self - .feedback_mut() - .is_interesting(state, manager, &input, observers, &exit_kind)?; - // Add the input to the main corpus - let mut testcase = Testcase::with_executions(input.clone(), *state.executions()); self.feedback_mut() .append_metadata(state, observers, &mut testcase)?; let idx = state.corpus_mut().add(testcase)?; diff --git a/libafl/src/lib.rs b/libafl/src/lib.rs index 0f6de21f8d..66850226c9 100644 --- a/libafl/src/lib.rs +++ b/libafl/src/lib.rs @@ -26,7 +26,8 @@ Welcome to `LibAFL` clippy::missing_panics_doc, clippy::missing_docs_in_private_items, clippy::module_name_repetitions, - clippy::ptr_cast_constness + clippy::ptr_cast_constness, + clippy::unsafe_derive_deserialize )] #![cfg_attr(not(test), warn( missing_debug_implementations, diff --git a/libafl/src/monitors/mod.rs b/libafl/src/monitors/mod.rs index de8268ed3f..95a0f8027a 100644 --- a/libafl/src/monitors/mod.rs +++ b/libafl/src/monitors/mod.rs @@ -371,8 +371,14 @@ impl Monitor for SimplePrintingMonitor { } fn display(&mut self, event_msg: String, sender_id: ClientId) { + let mut userstats = self.client_stats()[sender_id.0 as usize] + .user_monitor + .iter() + .map(|(key, value)| format!("{key}: {value}")) + .collect::>(); + userstats.sort(); println!( - "[{} #{}] run time: {}, clients: {}, corpus: {}, objectives: {}, executions: {}, exec/sec: {}", + "[{} #{}] run time: {}, clients: {}, corpus: {}, objectives: {}, executions: {}, exec/sec: {}, {}", event_msg, sender_id.0, format_duration_hms(&(current_time() - self.start_time)), @@ -380,7 +386,8 @@ impl Monitor for SimplePrintingMonitor { self.corpus_size(), self.objective_size(), self.total_execs(), - self.execs_per_sec_pretty() + self.execs_per_sec_pretty(), + userstats.join(", ") ); // Only print perf monitor if the feature is enabled diff --git a/libafl/src/monitors/multi.rs b/libafl/src/monitors/multi.rs index 30837b6363..7fd6edafcf 100644 --- a/libafl/src/monitors/multi.rs +++ b/libafl/src/monitors/multi.rs @@ -3,14 +3,17 @@ #[cfg(feature = "introspection")] use alloc::string::ToString; use alloc::{string::String, vec::Vec}; -use core::{fmt::Write, time::Duration}; +use core::{ + fmt::{Debug, Formatter, Write}, + time::Duration, +}; use libafl_bolts::{current_time, format_duration_hms, ClientId}; use crate::monitors::{ClientStats, Monitor}; /// Tracking monitor during fuzzing and display both per-client and cumulative info. -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct MultiMonitor where F: FnMut(String), @@ -20,6 +23,18 @@ where client_stats: Vec, } +impl Debug for MultiMonitor +where + F: FnMut(String), +{ + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("MultiMonitor") + .field("start_time", &self.start_time) + .field("client_stats", &self.client_stats) + .finish_non_exhaustive() + } +} + impl Monitor for MultiMonitor where F: FnMut(String), diff --git a/libafl/src/mutators/scheduled.rs b/libafl/src/mutators/scheduled.rs index 9ac7e43c5d..05a5e2ca75 100644 --- a/libafl/src/mutators/scheduled.rs +++ b/libafl/src/mutators/scheduled.rs @@ -8,7 +8,7 @@ use core::{ use libafl_bolts::{ rands::Rand, - tuples::{tuple_list, tuple_list_type, NamedTuple}, + tuples::{tuple_list, tuple_list_type, Merge, NamedTuple}, AsMutSlice, AsSlice, Named, }; use serde::{Deserialize, Serialize}; @@ -218,6 +218,38 @@ where } } +/// Tuple type of the mutations that compose the Havoc mutator without crossover mutations +pub type HavocMutationsNoCrossoverType = tuple_list_type!( + BitFlipMutator, + ByteFlipMutator, + ByteIncMutator, + ByteDecMutator, + ByteNegMutator, + ByteRandMutator, + ByteAddMutator, + WordAddMutator, + DwordAddMutator, + QwordAddMutator, + ByteInterestingMutator, + WordInterestingMutator, + DwordInterestingMutator, + BytesDeleteMutator, + BytesDeleteMutator, + BytesDeleteMutator, + BytesDeleteMutator, + BytesExpandMutator, + BytesInsertMutator, + BytesRandInsertMutator, + BytesSetMutator, + BytesRandSetMutator, + BytesCopyMutator, + BytesInsertCopyMutator, + BytesSwapMutator, +); + +/// Tuple type of the mutations that compose the Havoc mutator's crossover mutations +pub type HavocCrossoverType = tuple_list_type!(CrossoverInsertMutator, CrossoverReplaceMutator); + /// Tuple type of the mutations that compose the Havoc mutator pub type HavocMutationsType = tuple_list_type!( BitFlipMutator, @@ -249,9 +281,9 @@ pub type HavocMutationsType = tuple_list_type!( CrossoverReplaceMutator, ); -/// Get the mutations that compose the Havoc mutator +/// Get the mutations that compose the Havoc mutator (only applied to single inputs) #[must_use] -pub fn havoc_mutations() -> HavocMutationsType { +pub fn havoc_mutations_no_crossover() -> HavocMutationsNoCrossoverType { tuple_list!( BitFlipMutator::new(), ByteFlipMutator::new(), @@ -278,11 +310,24 @@ pub fn havoc_mutations() -> HavocMutationsType { BytesCopyMutator::new(), BytesInsertCopyMutator::new(), BytesSwapMutator::new(), + ) +} + +/// Get the mutations that compose the Havoc mutator's crossover strategy +#[must_use] +pub fn havoc_crossover() -> HavocCrossoverType { + tuple_list!( CrossoverInsertMutator::new(), CrossoverReplaceMutator::new(), ) } +/// Get the mutations that compose the Havoc mutator +#[must_use] +pub fn havoc_mutations() -> HavocMutationsType { + havoc_mutations_no_crossover().merge(havoc_crossover()) +} + /// Get the mutations that uses the Tokens metadata #[must_use] pub fn tokens_mutations() -> tuple_list_type!(TokenInsert, TokenReplace) { diff --git a/libafl/src/schedulers/probabilistic_sampling.rs b/libafl/src/schedulers/probabilistic_sampling.rs index 8de52a80f7..84cf779c8f 100644 --- a/libafl/src/schedulers/probabilistic_sampling.rs +++ b/libafl/src/schedulers/probabilistic_sampling.rs @@ -191,6 +191,11 @@ mod tests { #[test] fn test_prob_sampling() { + #[cfg(any(not(feature = "serdeany_autoreg"), miri))] + unsafe { + super::ProbabilityMetadata::register(); + } + // the first 3 probabilities will be .69, .86, .44 let rand = StdRand::with_seed(12); diff --git a/libafl/src/schedulers/testcase_score.rs b/libafl/src/schedulers/testcase_score.rs index 8541fb1342..cbaf5d4783 100644 --- a/libafl/src/schedulers/testcase_score.rs +++ b/libafl/src/schedulers/testcase_score.rs @@ -112,7 +112,11 @@ where .as_nanos() as f64; let avg_exec_us = psmeta.exec_time().as_nanos() as f64 / psmeta.cycles() as f64; - let avg_bitmap_size = psmeta.bitmap_size() / psmeta.bitmap_entries(); + let avg_bitmap_size = if psmeta.bitmap_entries() == 0 { + 1 + } else { + psmeta.bitmap_size() / psmeta.bitmap_entries() + }; let favored = entry.has_metadata::(); let tcmeta = entry.metadata::()?; diff --git a/libafl/src/stages/generalization.rs b/libafl/src/stages/generalization.rs index 1378ecd28d..6f0db3d577 100644 --- a/libafl/src/stages/generalization.rs +++ b/libafl/src/stages/generalization.rs @@ -100,6 +100,9 @@ where "MapNoveltiesMetadata needed for GeneralizationStage not found in testcase #{corpus_idx} (check the arguments of MapFeedback::new(...))" )) })?; + if meta.as_slice().is_empty() { + return Ok(()); // don't generalise inputs which don't have novelties + } (payload, original, meta.as_slice().to_vec()) }; diff --git a/libafl/src/state/mod.rs b/libafl/src/state/mod.rs index 8d8fa5eed9..6c31991d38 100644 --- a/libafl/src/state/mod.rs +++ b/libafl/src/state/mod.rs @@ -260,6 +260,9 @@ pub struct StdState { #[cfg(feature = "std")] /// Remaining initial inputs to load, if any remaining_initial_files: Option>, + #[cfg(feature = "std")] + /// Remaining initial inputs to load, if any + dont_reenter: Option>, /// The last time we reported progress (if available/used). /// This information is used by fuzzer `maybe_report_progress`. last_report_time: Option, @@ -458,30 +461,48 @@ where } /// List initial inputs from a directory. - fn visit_initial_directory(files: &mut Vec, in_dir: &Path) -> Result<(), Error> { - for entry in fs::read_dir(in_dir)? { - let entry = entry?; - let path = entry.path(); - if path.file_name().unwrap().to_string_lossy().starts_with('.') { - continue; - } - - let attributes = fs::metadata(&path); + fn next_file(&mut self) -> Result { + loop { + if let Some(path) = self.remaining_initial_files.as_mut().and_then(Vec::pop) { + let filename = path.file_name().unwrap().to_string_lossy(); + if filename.starts_with('.') + // || filename + // .rsplit_once('-') + // .map_or(false, |(_, s)| u64::from_str(s).is_ok()) + { + continue; + } - if attributes.is_err() { - continue; - } + let attributes = fs::metadata(&path); - let attr = attributes?; + if attributes.is_err() { + continue; + } - if attr.is_file() && attr.len() > 0 { - files.push(path); - } else if attr.is_dir() { - Self::visit_initial_directory(files, &path)?; + let attr = attributes?; + + if attr.is_file() && attr.len() > 0 { + return Ok(path); + } else if attr.is_dir() { + let files = self.remaining_initial_files.as_mut().unwrap(); + path.read_dir()? + .try_for_each(|entry| entry.map(|e| files.push(e.path())))?; + } else if attr.is_symlink() { + let path = fs::canonicalize(path)?; + let dont_reenter = self.dont_reenter.get_or_insert_with(Default::default); + if dont_reenter.iter().any(|p| path.starts_with(p)) { + continue; + } + if path.is_dir() { + dont_reenter.push(path.clone()); + } + let files = self.remaining_initial_files.as_mut().unwrap(); + files.push(path); + } + } else { + return Err(Error::iterator_end("No remaining files to load.")); } } - - Ok(()) } /// Loads initial inputs from the passed-in `in_dirs`. @@ -506,11 +527,13 @@ where return Ok(()); } } else { - let mut files = vec![]; - for in_dir in in_dirs { - Self::visit_initial_directory(&mut files, in_dir)?; - } - + let files = in_dirs.iter().try_fold(Vec::new(), |mut res, file| { + file.canonicalize().map(|canonicalized| { + res.push(canonicalized); + res + }) + })?; + self.dont_reenter = Some(files.clone()); self.remaining_initial_files = Some(files); } @@ -562,20 +585,22 @@ where EM: EventFirer, Z: Evaluator, { - if self.remaining_initial_files.is_none() { - return Err(Error::illegal_state("No initial files were loaded, cannot continue loading. Call a `load_initial_input` fn first!")); - } - - while let Some(path) = self.remaining_initial_files.as_mut().unwrap().pop() { - log::info!("Loading file {:?} ...", &path); - let input = loader(fuzzer, self, &path)?; - if forced { - let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?; - } else { - let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?; - if res == ExecuteInputResult::None { - log::warn!("File {:?} was not interesting, skipped.", &path); + loop { + match self.next_file() { + Ok(path) => { + log::info!("Loading file {:?} ...", &path); + let input = loader(fuzzer, self, &path)?; + if forced { + let _: CorpusId = fuzzer.add_input(self, executor, manager, input)?; + } else { + let (res, _) = fuzzer.evaluate_input(self, executor, manager, input)?; + if res == ExecuteInputResult::None { + log::warn!("File {:?} was not interesting, skipped.", &path); + } + } } + Err(Error::IteratorEnd(_, _)) => break, + Err(e) => return Err(e), } } @@ -797,6 +822,8 @@ where introspection_monitor: ClientPerfMonitor::new(), #[cfg(feature = "std")] remaining_initial_files: None, + #[cfg(feature = "std")] + dont_reenter: None, last_report_time: None, phantom: PhantomData, }; diff --git a/libafl_bolts/src/anymap.rs b/libafl_bolts/src/anymap.rs index 3dc2572323..43e05c0b97 100644 --- a/libafl_bolts/src/anymap.rs +++ b/libafl_bolts/src/anymap.rs @@ -52,7 +52,11 @@ pub const fn pack_type_id(id: u128) -> TypeId { match size_of::() { 8 => { let id_64 = id as u64; - unsafe { *(addr_of!(id_64) as *const TypeId) } + // false positive: this branch only executes on 64 bit `TypeId`s + #[allow(clippy::cast_ptr_alignment)] + unsafe { + *(addr_of!(id_64) as *const TypeId) + } } 16 => unsafe { *(addr_of!(id) as *const TypeId) }, _ => { diff --git a/libafl_bolts/src/serdeany.rs b/libafl_bolts/src/serdeany.rs index f63e4f850e..bdb5269ee4 100644 --- a/libafl_bolts/src/serdeany.rs +++ b/libafl_bolts/src/serdeany.rs @@ -178,6 +178,7 @@ macro_rules! create_serde_registry_for_trait { /// A (de)serializable anymap containing (de)serializable trait objects registered /// in the registry + #[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, Serialize, Deserialize)] pub struct SerdeAnyMap { map: HashMap>, @@ -252,8 +253,7 @@ macro_rules! create_serde_registry_for_trait { where T: $trait_name, { - self.map - .insert(unpack_type_id(TypeId::of::()), Box::new(t)); + self.insert_boxed(Box::new(t)); } /// Insert a boxed element into the map. @@ -262,7 +262,21 @@ macro_rules! create_serde_registry_for_trait { where T: $trait_name, { - self.map.insert(unpack_type_id(TypeId::of::()), t); + let id = unpack_type_id(TypeId::of::()); + assert!( + unsafe { + REGISTRY + .deserializers + .as_ref() + .expect("Empty types registry") + .get(&id) + .is_some() + }, + "Type {} was inserted without registration! Call {}::register or use serde_autoreg.", + core::any::type_name::(), + core::any::type_name::() + ); + self.map.insert(id, t); } /// Returns the count of elements in this map. @@ -304,6 +318,7 @@ macro_rules! create_serde_registry_for_trait { } /// A serializable [`HashMap`] wrapper for [`SerdeAny`] types, addressable by name. + #[allow(clippy::unsafe_derive_deserialize)] #[allow(unused_qualifications)] #[derive(Debug, Serialize, Deserialize)] pub struct NamedSerdeAnyMap { @@ -518,6 +533,19 @@ macro_rules! create_serde_registry_for_trait { T: $trait_name, { let id = unpack_type_id(TypeId::of::()); + assert!( + unsafe { + REGISTRY + .deserializers + .as_ref() + .expect("Empty types registry") + .get(&id) + .is_some() + }, + "Type {} was inserted without registration! Call {}::register or use serde_autoreg.", + core::any::type_name::(), + core::any::type_name::() + ); if !self.map.contains_key(&id) { self.map.insert(id, HashMap::default()); } @@ -619,7 +647,7 @@ macro_rules! create_register { ($struct_type:ty) => { const _: () = { /// Automatically register this type - #[cfg(feature = "serdeany_autoreg")] + #[cfg(all(feature = "serdeany_autoreg", not(miri)))] #[$crate::ctor] fn register() { // # Safety diff --git a/libafl_libfuzzer/Cargo.toml b/libafl_libfuzzer/Cargo.toml new file mode 100644 index 0000000000..f387bbe15d --- /dev/null +++ b/libafl_libfuzzer/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "libafl_libfuzzer" +version.workspace = true +description = "libFuzzer shim which uses LibAFL with common defaults" +repository = "https://github.com/AFLplusplus/LibAFL/" +readme = "../README.md" +license = "MIT OR Apache-2.0" +keywords = ["fuzzing", "testing", "security"] +edition = "2021" +categories = ["development-tools::testing", "fuzzing"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] +cc = "1.0" +rustversion = "1.0" + +[features] +arbitrary-derive = ["libfuzzer-sys/arbitrary-derive"] +introspection = [] +whole-archive = [] + +[dependencies] +libfuzzer-sys = { version = "0.4.7", default-features = false } diff --git a/libafl_libfuzzer/build.rs b/libafl_libfuzzer/build.rs new file mode 100644 index 0000000000..c670b73e6c --- /dev/null +++ b/libafl_libfuzzer/build.rs @@ -0,0 +1,116 @@ +use std::{path::PathBuf, process::Command}; + +fn main() { + if cfg!(feature = "cargo-clippy") { + return; // skip when clippy is running + } + if cfg!(not(target_os = "linux")) { + println!( + "cargo:error=The libafl_libfuzzer runtime may only be built for linux; failing fast." + ); + return; + } + println!("cargo:rerun-if-changed=libafl_libfuzzer_runtime/src"); + println!("cargo:rerun-if-changed=libafl_libfuzzer_runtime/Cargo.toml"); + println!("cargo:rerun-if-changed=libafl_libfuzzer_runtime/build.rs"); + + let custom_lib_dir = + PathBuf::from(std::env::var_os("OUT_DIR").unwrap()).join("libafl_libfuzzer"); + std::fs::create_dir_all(&custom_lib_dir) + .expect("Couldn't create the output directory for the fuzzer runtime build"); + + let mut lib_src = PathBuf::from(std::env::var_os("CARGO_MANIFEST_DIR").unwrap()); + lib_src.push("libafl_libfuzzer_runtime"); + + let mut command = Command::new(std::env::var_os("CARGO").unwrap()); + command + .env_remove("RUSTFLAGS") + .env_remove("CARGO_ENCODED_RUSTFLAGS"); + + for (var, _) in std::env::vars() { + if var.starts_with("CARGO_PKG_") || var.starts_with("CARGO_FEATURE_") { + command.env_remove(var); + } + } + + command + .env("PATH", std::env::var_os("PATH").unwrap()) + .current_dir(&lib_src); + + command.arg("build"); + + let mut features = vec!["serdeany_autoreg"]; + + if cfg!(any(feature = "fork")) { + features.push("fork"); + } + if cfg!(any(feature = "introspection")) { + features.push("libafl/introspection"); + } + + command + .arg("--release") + .arg("--no-default-features") + .arg("--target-dir") + .arg(&custom_lib_dir) + .arg("--features") + .arg(features.join(",")) + .arg("--target") + .arg(std::env::var_os("TARGET").unwrap()); + + assert!( + !command.status().map(|s| !s.success()).unwrap_or(true), + "Couldn't build runtime crate! Did you remember to use nightly?" + ); + + let mut lib_path = custom_lib_dir.join(std::env::var_os("TARGET").unwrap()); + lib_path.push("release"); + lib_path.push("libafl_libfuzzer_runtime.a"); + + // // TODO this is definitely not compat with macOS/Windows... + if cfg!(feature = "whole-archive") { + use std::path::Path; + let target_libdir = Command::new("rustc") + .args(["--print", "target-libdir"]) + .output() + .expect("Couldn't find rustc's target-libdir"); + let target_libdir = String::from_utf8(target_libdir.stdout).unwrap(); + let target_libdir = Path::new(target_libdir.trim()); + + let rust_lld = target_libdir.join("../bin/rust-lld"); + let rust_ar = target_libdir.join("../bin/llvm-ar"); // NOTE: depends on llvm-tools + + let mut command = Command::new(rust_lld); + command + .args(["-flavor", "gnu"]) + .arg("-r") + .arg("--whole-archive") + .arg(lib_path) + .args(["-o", custom_lib_dir.join("libFuzzer.o").to_str().expect("Invalid path characters present in your current directory prevent us from linking to the runtime")]); + + assert!( + !command.status().map(|s| !s.success()).unwrap_or(true), + "Couldn't link runtime crate! Do you have the llvm-tools component installed?" + ); + + let mut command = Command::new(rust_ar); + command + .arg("cr") + .arg(custom_lib_dir.join("libFuzzer.a")) + .arg(custom_lib_dir.join("libFuzzer.o")); + + assert!( + !command.status().map(|s| !s.success()).unwrap_or(true), + "Couldn't create runtime archive!" + ); + } else { + std::fs::copy(lib_path, custom_lib_dir.join("libFuzzer.a")).unwrap(); + } + + println!( + "cargo:rustc-link-search=native={}", + custom_lib_dir.to_str().unwrap() + ); + println!("cargo:rustc-link-lib=static=Fuzzer"); + println!("cargo:rustc-link-lib=stdc++"); +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/Cargo.toml b/libafl_libfuzzer/libafl_libfuzzer_runtime/Cargo.toml new file mode 100644 index 0000000000..474667974e --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/Cargo.toml @@ -0,0 +1,50 @@ +[package] +name = "libafl_libfuzzer_runtime" +version = "0.1.0" +edition = "2021" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +default = ["fork", "serdeany_autoreg"] +fork = ["libafl/fork"] # Enables forking mode for the LibAFL launcher (instead of starting new processes) + +serdeany_autoreg = [] # TODO: drop this when fixed in libafl proper + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +debug = true + + +[lib] +name = "afl_libfuzzer_runtime" # TODO fix name once cargo-fuzz stops stripping double-prefixes +path = "src/lib.rs" +crate-type = ["staticlib", "rlib"] + +[dependencies] +libafl = { path = "../../libafl", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "errors_backtrace", "regex", "serdeany_autoreg", "tui_monitor"] } +libafl_bolts = { path = "../../libafl_bolts", default-features = false, features = ["std", "derive", "llmp_compression", "rand_trait", "errors_backtrace"] } +libafl_targets = { path = "../../libafl_targets", features = ["sancov_8bit", "sancov_cmplog", "libfuzzer", "libfuzzer_oom", "libfuzzer_define_run_driver", "sanitizers_flags"] } + +ahash = { version = "0.8.3", default-features = false } +libc = "0.2.139" +log = "0.4.17" +mimalloc = { version = "0.1.34", default-features = false, optional = true } +num-traits = "0.2.15" +rand = "0.8.5" +serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] } # serialization lib + +# clippy-suggested optimised byte counter +bytecount = "0.6.3" + +# for identifying if we can grimoire-ify +utf8-chars = "2.0.3" + +[build-dependencies] +bindgen = "0.65.1" +cc = { version = "1.0", features = ["parallel"] } + +[workspace] diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/build.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/build.rs new file mode 100644 index 0000000000..85b12f47bc --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/build.rs @@ -0,0 +1,24 @@ +use std::{env, path::Path}; + +fn main() { + let out_dir = env::var_os("OUT_DIR").unwrap(); + + println!("cargo:rerun-if-changed=src/harness_wrap.h"); + println!("cargo:rerun-if-changed=src/harness_wrap.cpp"); + + let build = bindgen::builder() + .header("src/harness_wrap.h") + .generate_comments(true) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .expect("Couldn't generate the harness wrapper!"); + + build + .write_to_file(Path::new(&out_dir).join("harness_wrap.rs")) + .expect("Couldn't write the harness wrapper!"); + + cc::Build::new() + .cpp(true) + .file("src/harness_wrap.cpp") + .compile("harness_wrap"); +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/feedbacks.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/feedbacks.rs new file mode 100644 index 0000000000..0cbdd91fc2 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/feedbacks.rs @@ -0,0 +1,181 @@ +use alloc::rc::Rc; +use core::{cell::RefCell, fmt::Debug}; +use std::path::PathBuf; + +use libafl::{ + alloc, + corpus::Testcase, + events::EventFirer, + executors::ExitKind, + feedbacks::{Feedback, MinMapFeedback}, + inputs::{BytesInput, Input, UsesInput}, + observers::ObserversTuple, + state::{HasClientPerfMonitor, HasMetadata}, + Error, +}; +use libafl_bolts::{impl_serdeany, Named}; +use libafl_targets::OomFeedback; +use serde::{Deserialize, Serialize}; + +use crate::{observers::MappedEdgeMapObserver, options::ArtifactPrefix}; + +#[derive(Debug)] +pub struct LibfuzzerKeepFeedback { + keep: Rc>, +} + +impl LibfuzzerKeepFeedback { + pub fn new() -> Self { + Self { + keep: Rc::new(RefCell::new(false)), + } + } + + pub fn keep(&self) -> Rc> { + self.keep.clone() + } +} + +impl Named for LibfuzzerKeepFeedback { + fn name(&self) -> &str { + "libfuzzer-keep" + } +} + +impl Feedback for LibfuzzerKeepFeedback +where + S: UsesInput + HasClientPerfMonitor, +{ + fn is_interesting( + &mut self, + _state: &mut S, + _manager: &mut EM, + _input: &S::Input, + _observers: &OT, + _exit_kind: &ExitKind, + ) -> Result + where + EM: EventFirer, + OT: ObserversTuple, + { + Ok(*self.keep.borrow()) + } +} + +#[derive(Deserialize, Serialize, Debug)] +pub struct LibfuzzerCrashCauseMetadata { + kind: ExitKind, +} + +impl_serdeany!(LibfuzzerCrashCauseMetadata); + +impl LibfuzzerCrashCauseMetadata { + pub fn kind(&self) -> ExitKind { + self.kind + } +} + +#[derive(Debug)] +pub struct LibfuzzerCrashCauseFeedback { + artifact_prefix: Option, + exit_kind: ExitKind, +} + +impl LibfuzzerCrashCauseFeedback { + pub fn new(artifact_prefix: Option) -> Self { + Self { + artifact_prefix, + exit_kind: ExitKind::Ok, + } + } +} + +impl Named for LibfuzzerCrashCauseFeedback { + fn name(&self) -> &str { + "crash-cause" + } +} + +impl LibfuzzerCrashCauseFeedback { + fn set_filename(&self, prefix: &str, testcase: &mut Testcase) { + let base = if let Some(filename) = testcase.filename() { + filename.clone() + } else { + let name = testcase.input().as_ref().unwrap().generate_name(0); + name + }; + let file_path = if let Some(artifact_prefix) = self.artifact_prefix.as_ref() { + if let Some(filename_prefix) = artifact_prefix.filename_prefix() { + artifact_prefix + .dir() + .join(format!("{filename_prefix}{prefix}-{base}")) + } else { + artifact_prefix.dir().join(format!("{prefix}-{base}")) + } + } else { + PathBuf::from(format!("{prefix}-{base}")) + }; + *testcase.file_path_mut() = Some(file_path); + } +} + +impl Feedback for LibfuzzerCrashCauseFeedback +where + S: UsesInput + HasClientPerfMonitor, +{ + fn is_interesting( + &mut self, + _state: &mut S, + _manager: &mut EM, + _input: &S::Input, + _observers: &OT, + exit_kind: &ExitKind, + ) -> Result + where + EM: EventFirer, + OT: ObserversTuple, + { + self.exit_kind = *exit_kind; + Ok(false) + } + + fn append_metadata( + &mut self, + _state: &mut S, + _observers: &OT, + testcase: &mut Testcase, + ) -> Result<(), Error> + where + OT: ObserversTuple, + { + match self.exit_kind { + ExitKind::Crash | ExitKind::Oom if OomFeedback::oomed() => { + self.set_filename("oom", testcase); + testcase.add_metadata(LibfuzzerCrashCauseMetadata { + kind: ExitKind::Oom, + }); + } + ExitKind::Crash => { + self.set_filename("crash", testcase); + testcase.add_metadata(LibfuzzerCrashCauseMetadata { + kind: ExitKind::Crash, + }); + } + ExitKind::Timeout => { + self.set_filename("timeout", testcase); + testcase.add_metadata(LibfuzzerCrashCauseMetadata { + kind: ExitKind::Timeout, + }); + } + _ => { + self.set_filename("uncategorized", testcase); + testcase.add_metadata(LibfuzzerCrashCauseMetadata { + kind: self.exit_kind, + }); + } + } + Ok(()) + } +} + +pub type ShrinkMapFeedback = MinMapFeedback, S, usize>; diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/fuzz.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/fuzz.rs new file mode 100644 index 0000000000..b49bffd751 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/fuzz.rs @@ -0,0 +1,224 @@ +use core::ffi::c_int; +use std::{ + fmt::Debug, + fs::File, + net::TcpListener, + time::{SystemTime, UNIX_EPOCH}, +}; +#[cfg(unix)] +use std::{ + io::Write, + os::fd::{AsRawFd, FromRawFd, IntoRawFd}, +}; + +use libafl::{ + corpus::Corpus, + events::{ + launcher::Launcher, EventConfig, ProgressReporter, SimpleEventManager, + SimpleRestartingEventManager, + }, + executors::ExitKind, + inputs::UsesInput, + monitors::{ + tui::{ui::TuiUI, TuiMonitor}, + Monitor, MultiMonitor, SimpleMonitor, + }, + stages::StagesTuple, + state::{ + HasClientPerfMonitor, HasExecutions, HasLastReportTime, HasMetadata, HasSolutions, + UsesState, + }, + Error, Fuzzer, +}; +use libafl_bolts::{ + core_affinity::Cores, + shmem::{ShMemProvider, StdShMemProvider}, +}; + +use crate::{feedbacks::LibfuzzerCrashCauseMetadata, fuzz_with, options::LibfuzzerOptions}; + +fn do_fuzz( + options: &LibfuzzerOptions, + fuzzer: &mut F, + stages: &mut ST, + executor: &mut E, + state: &mut S, + mgr: &mut EM, +) -> Result<(), Error> +where + F: Fuzzer, + S: HasClientPerfMonitor + + HasMetadata + + HasExecutions + + UsesInput + + HasSolutions + + HasLastReportTime, + E: UsesState, + EM: ProgressReporter, + ST: StagesTuple, +{ + if let Some(solution) = state.solutions().last() { + let kind = state + .solutions() + .get(solution) + .expect("Last solution was not available") + .borrow() + .metadata::() + .expect("Crash cause not attached to solution") + .kind(); + let mut halt = false; + match kind { + ExitKind::Oom if !options.ignore_ooms() => halt = true, + ExitKind::Crash if !options.ignore_crashes() => halt = true, + ExitKind::Timeout if !options.ignore_timeouts() => halt = true, + _ => { + log::info!("Ignoring {kind:?} according to requested ignore rules."); + } + } + if halt { + log::info!("Halting; the error on the next line is actually okay. :)"); + return Err(Error::shutting_down()); + } + } + fuzzer.fuzz_loop(stages, executor, state, mgr)?; + Ok(()) +} + +fn fuzz_single_forking( + options: &LibfuzzerOptions, + harness: &extern "C" fn(*const u8, usize) -> c_int, + mut shmem_provider: StdShMemProvider, + monitor: M, +) -> Result<(), Error> +where + M: Monitor + Debug, +{ + fuzz_with!(options, harness, do_fuzz, |fuzz_single| { + let (state, mgr): ( + Option>, + SimpleRestartingEventManager<_, StdState<_, _, _, _>, _>, + ) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {err}"); + } + }, + }; + #[cfg(unix)] + { + if options.close_fd_mask() != 0 { + let file_null = File::open("/dev/null")?; + unsafe { + if options.close_fd_mask() & 1 != 0 { + libc::dup2(file_null.as_raw_fd(), 1); + } + if options.close_fd_mask() & 2 != 0 { + libc::dup2(file_null.as_raw_fd(), 2); + } + } + } + } + crate::start_fuzzing_single(fuzz_single, state, mgr) + }) +} + +fn fuzz_many_forking( + options: &LibfuzzerOptions, + harness: &extern "C" fn(*const u8, usize) -> c_int, + shmem_provider: StdShMemProvider, + forks: usize, + monitor: M, +) -> Result<(), Error> +where + M: Monitor + Clone + Debug, +{ + fuzz_with!(options, harness, do_fuzz, |mut run_client| { + let cores = Cores::from((0..forks).collect::>()); + let broker_port = TcpListener::bind("127.0.0.1:0")? + .local_addr() + .unwrap() + .port(); + + match Launcher::builder() + .shmem_provider(shmem_provider) + .configuration(EventConfig::from_name(options.fuzzer_name())) + .monitor(monitor) + .run_client(&mut run_client) + .cores(&cores) + .broker_port(broker_port) + // TODO .remote_broker_addr(opt.remote_broker_addr) + .stdout_file(Some("/dev/null")) + .build() + .launch() + { + Ok(()) => (), + Err(Error::ShuttingDown) => println!("Fuzzing stopped by user. Good bye."), + res @ Err(_) => return res, + } + Ok(()) + }) +} + +pub fn fuzz( + options: &LibfuzzerOptions, + harness: &extern "C" fn(*const u8, usize) -> c_int, +) -> Result<(), Error> { + if let Some(forks) = options.forks() { + let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory"); + if options.tui() { + let monitor = TuiMonitor::new(TuiUI::new(options.fuzzer_name().to_string(), true)); + fuzz_many_forking(options, harness, shmem_provider, forks, monitor) + } else if forks == 1 { + #[cfg(unix)] + let mut stderr = unsafe { + let new_fd = libc::dup(std::io::stderr().as_raw_fd()); + File::from_raw_fd(new_fd) + }; + let monitor = MultiMonitor::with_time( + move |s| { + #[cfg(unix)] + writeln!(stderr, "{s}").expect("Could not write to stderr???"); + #[cfg(not(unix))] + eprintln!("{s}"); + }, + SystemTime::now().duration_since(UNIX_EPOCH).unwrap(), + ); + fuzz_single_forking(options, harness, shmem_provider, monitor) + } else { + #[cfg(unix)] + let stderr_fd = unsafe { libc::dup(std::io::stderr().as_raw_fd()) }; + let monitor = MultiMonitor::with_time( + move |s| { + #[cfg(unix)] + { + // unfortunate requirement to meet Clone... thankfully, this does not + // generate effectively any overhead (no allocations, calls get merged) + let mut stderr = unsafe { File::from_raw_fd(stderr_fd) }; + writeln!(stderr, "{s}").expect("Could not write to stderr???"); + let _ = stderr.into_raw_fd(); // discard the file without closing + } + #[cfg(not(unix))] + eprintln!("{s}"); + }, + SystemTime::now().duration_since(UNIX_EPOCH).unwrap(), + ); + fuzz_many_forking(options, harness, shmem_provider, forks, monitor) + } + } else if options.tui() { + // if the user specifies TUI, we assume they want to fork; it would not be possible to use + // TUI safely otherwise + let shmem_provider = StdShMemProvider::new().expect("Failed to init shared memory"); + let monitor = TuiMonitor::new(TuiUI::new(options.fuzzer_name().to_string(), true)); + fuzz_many_forking(options, harness, shmem_provider, 1, monitor) + } else { + fuzz_with!(options, harness, do_fuzz, |fuzz_single| { + let mgr = SimpleEventManager::new(SimpleMonitor::new(|s| eprintln!("{s}"))); + crate::start_fuzzing_single(fuzz_single, None, mgr) + }) + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.cpp b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.cpp new file mode 100644 index 0000000000..21e3b62540 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.cpp @@ -0,0 +1,10 @@ +#include "harness_wrap.h" + +extern "C" int libafl_libfuzzer_test_one_input( + int (*harness)(const uint8_t *, size_t), const uint8_t *data, size_t len) { + try { + return harness(data, len); + } catch (...) { + return -2; // custom code for "we died!" + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.h b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.h new file mode 100644 index 0000000000..a3a4f2cbfa --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/harness_wrap.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int libafl_libfuzzer_test_one_input(int (*harness)(const uint8_t *, size_t), + const uint8_t *data, size_t len); + +#ifdef __cplusplus +} +#endif diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/lib.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/lib.rs new file mode 100644 index 0000000000..4e22cacee3 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/lib.rs @@ -0,0 +1,599 @@ +//! The `LibAFL` `LibFuzzer` runtime, exposing the same functions as the original [`LibFuzzer`](https://llvm.org/docs/LibFuzzer.html). + +#![allow(incomplete_features)] +// For `type_eq` +#![cfg_attr(unstable_feature, feature(specialization))] +// For `type_id` and owned things +#![cfg_attr(unstable_feature, feature(intrinsics))] +// For `std::simd` +#![cfg_attr(unstable_feature, feature(portable_simd))] +#![warn(clippy::cargo)] +#![allow(ambiguous_glob_reexports)] +#![deny(clippy::cargo_common_metadata)] +#![deny(rustdoc::broken_intra_doc_links)] +#![deny(clippy::all)] +#![deny(clippy::pedantic)] +#![allow( + clippy::unreadable_literal, + clippy::type_repetition_in_bounds, + clippy::missing_errors_doc, + clippy::cast_possible_truncation, + clippy::used_underscore_binding, + clippy::ptr_as_ptr, + clippy::missing_panics_doc, + clippy::missing_docs_in_private_items, + clippy::module_name_repetitions, + clippy::ptr_cast_constness, + clippy::unsafe_derive_deserialize +)] +#![cfg_attr(not(test), warn( +missing_debug_implementations, +missing_docs, +//trivial_casts, +trivial_numeric_casts, +unused_extern_crates, +unused_import_braces, +unused_qualifications, +//unused_results +))] +#![cfg_attr(test, deny( +missing_debug_implementations, +missing_docs, +//trivial_casts, +trivial_numeric_casts, +unused_extern_crates, +unused_import_braces, +unused_qualifications, +unused_must_use, +//unused_results +))] +#![cfg_attr( + test, + deny( + bad_style, + dead_code, + improper_ctypes, + non_shorthand_field_patterns, + no_mangle_generic_items, + overflowing_literals, + path_statements, + patterns_in_fns_without_body, + private_in_public, + unconditional_recursion, + unused, + unused_allocation, + unused_comparisons, + unused_parens, + while_true + ) +)] +// Till they fix this buggy lint in clippy +#![allow(clippy::borrow_as_ptr)] +#![allow(clippy::borrow_deref_ref)] + +use core::ffi::{c_char, c_int, CStr}; + +use libafl::{ + inputs::{BytesInput, HasTargetBytes, Input}, + Error, +}; +use libafl_bolts::AsSlice; + +use crate::options::{LibfuzzerMode, LibfuzzerOptions}; + +#[cfg(not(feature = "serdeany_autoreg"))] +compile_error!("serdeany_autoreg feature must be enabled."); + +mod feedbacks; +mod fuzz; +mod merge; +mod misc; +mod observers; +mod options; +mod report; +mod schedulers; +mod tmin; + +mod harness_wrap { + #![allow(non_snake_case)] + #![allow(non_camel_case_types)] + #![allow(non_upper_case_globals)] + #![allow(unused)] + #![allow(improper_ctypes)] + #![allow(clippy::unreadable_literal)] + #![allow(missing_docs)] + include!(concat!(env!("OUT_DIR"), "/harness_wrap.rs")); +} + +pub(crate) use harness_wrap::libafl_libfuzzer_test_one_input; +#[cfg(feature = "mimalloc")] +use mimalloc::MiMalloc; +#[global_allocator] +#[cfg(feature = "mimalloc")] +static GLOBAL: MiMalloc = MiMalloc; + +static mut BACKTRACE: Option = None; + +#[allow(clippy::struct_excessive_bools)] +struct CustomMutationStatus { + std_mutational: bool, + std_no_mutate: bool, + std_no_crossover: bool, + custom_mutation: bool, + custom_crossover: bool, +} + +impl CustomMutationStatus { + fn new() -> Self { + let custom_mutation = libafl_targets::libfuzzer::has_custom_mutator(); + let custom_crossover = libafl_targets::libfuzzer::has_custom_crossover(); + + // we use all libafl mutations + let std_mutational = !(custom_mutation || custom_crossover); + // we use libafl crossover, but not libafl mutations + let std_no_mutate = !std_mutational && custom_mutation && !custom_crossover; + // we use libafl mutations, but not libafl crossover + let std_no_crossover = !std_mutational && !custom_mutation && custom_crossover; + + Self { + std_mutational, + std_no_mutate, + std_no_crossover, + custom_mutation, + custom_crossover, + } + } +} + +macro_rules! fuzz_with { + ($options:ident, $harness:ident, $operation:expr, $and_then:expr, $edge_maker:expr) => {{ + use libafl_bolts::{ + current_nanos, + rands::StdRand, + tuples::{Merge, tuple_list}, + AsSlice, + }; + use libafl::{ + corpus::{CachedOnDiskCorpus, Corpus, OnDiskCorpus}, + executors::{ExitKind, InProcessExecutor, TimeoutExecutor}, + feedback_and_fast, feedback_not, feedback_or, feedback_or_fast, + feedbacks::{ConstFeedback, CrashFeedback, MaxMapFeedback, NewHashFeedback, TimeFeedback, TimeoutFeedback}, + generators::RandBytesGenerator, + inputs::{BytesInput, HasTargetBytes}, + mutators::{ + GrimoireExtensionMutator, GrimoireRecursiveReplacementMutator, GrimoireRandomDeleteMutator, + GrimoireStringReplacementMutator, havoc_crossover, havoc_mutations, havoc_mutations_no_crossover, + I2SRandReplace, StdScheduledMutator, Tokens, tokens_mutations + }, + observers::{stacktrace::BacktraceObserver, TimeObserver}, + schedulers::{ + IndexesLenTimeMinimizerScheduler, powersched::PowerSchedule, PowerQueueScheduler, + }, + stages::{ + CalibrationStage, GeneralizationStage, IfStage, StdMutationalStage, + StdPowerMutationalStage, TracingStage, + }, + state::{HasCorpus, StdState}, + StdFuzzer, + }; + use libafl_targets::{CmpLogObserver, LLVMCustomMutator, OomFeedback, OomObserver}; + use rand::{thread_rng, RngCore}; + use std::{env::temp_dir, fs::create_dir, path::PathBuf}; + + use crate::{BACKTRACE, CustomMutationStatus}; + use crate::feedbacks::{LibfuzzerCrashCauseFeedback, LibfuzzerKeepFeedback, ShrinkMapFeedback}; + use crate::misc::should_use_grimoire; + use crate::observers::{MappedEdgeMapObserver, SizeValueObserver}; + + let edge_maker = &$edge_maker; + + let closure = |mut state: Option<_>, mut mgr, _cpu_id| { + let mutator_status = CustomMutationStatus::new(); + let grimoire_metadata = should_use_grimoire(&mut state, &$options, &mutator_status)?; + let grimoire = grimoire_metadata.should(); + + let edges_observer = edge_maker(); + let size_edges_observer = MappedEdgeMapObserver::new(edge_maker(), SizeValueObserver::default()); + + let keep_observer = LibfuzzerKeepFeedback::new(); + let keep = keep_observer.keep(); + + // Create an observation channel to keep track of the execution time + let time_observer = TimeObserver::new("time"); + + // Create an OOM observer to monitor if an OOM has occurred + let oom_observer = OomObserver::new($options.rss_limit(), $options.malloc_limit()); + + // Create the Cmp observer + let cmplog_observer = CmpLogObserver::new("cmplog", true); + + // Create a stacktrace observer + let backtrace_observer = BacktraceObserver::new( + "BacktraceObserver", + unsafe { &mut BACKTRACE }, + libafl::observers::HarnessType::InProcess, + ); + + // New maximization map feedback linked to the edges observer + let map_feedback = MaxMapFeedback::tracking(&edges_observer, true, true); + let shrinking_map_feedback = ShrinkMapFeedback::tracking(&size_edges_observer, false, false); + + // Set up a generalization stage for grimoire + let generalization = GeneralizationStage::new(&edges_observer); + let generalization = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), (generalization, ())); + + let calibration = CalibrationStage::new(&map_feedback); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_and_fast!( + feedback_not!( + feedback_or_fast!( + OomFeedback, + CrashFeedback::new(), + TimeoutFeedback::new() + ) + ), + keep_observer, + feedback_or!( + map_feedback, + feedback_and_fast!(ConstFeedback::new($options.shrink()), shrinking_map_feedback), + // Time feedback, this one does not need a feedback state + TimeFeedback::with_observer(&time_observer) + ) + ); + + // A feedback to choose if an input is a solution or not + let mut objective = feedback_or_fast!( + LibfuzzerCrashCauseFeedback::new($options.artifact_prefix().cloned()), + OomFeedback, + feedback_and_fast!( + CrashFeedback::new(), + feedback_or_fast!(ConstFeedback::new(!$options.dedup()), NewHashFeedback::new(&backtrace_observer)) + ), + TimeoutFeedback::new() + ); + + let corpus_dir = if let Some(main) = $options.dirs().first() { + main.clone() + } else { + let mut rng = thread_rng(); + let mut dir = PathBuf::new(); + let mut last = Ok(()); + for _ in 0..8 { + dir = temp_dir().join(format!("libafl-corpus-{}", rng.next_u64())); + last = create_dir(&dir); + if last.is_ok() { + break; + } + } + last?; + dir + }; + + let crash_corpus = if let Some(prefix) = $options.artifact_prefix() { + OnDiskCorpus::with_meta_format_and_prefix(prefix.dir(), None, prefix.filename_prefix().clone(), false) + .unwrap() + } else { + OnDiskCorpus::with_meta_format_and_prefix(&std::env::current_dir().unwrap(), None, None, false) + .unwrap() + }; + + // If not restarting, create a State from scratch + let mut state = state.unwrap_or_else(|| { + StdState::new( + // RNG + StdRand::with_seed(current_nanos()), + // Corpus that will be evolved, we keep it in memory for performance + CachedOnDiskCorpus::with_meta_format_and_prefix(corpus_dir.clone(), 4096, None, None, true).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + crash_corpus, + // A reference to the feedbacks, to create their feedback state + &mut feedback, + // A reference to the objectives, to create their objective state + &mut objective, + ) + .expect("Failed to create state") + }); + state.metadata_map_mut().insert_boxed(grimoire_metadata); + + // Attempt to use tokens from libfuzzer dicts + if !state.has_metadata::() { + let mut toks = if let Some(tokens) = $options.dict() { + tokens.clone() + } else { + Tokens::default() + }; + #[cfg(any(target_os = "linux", target_vendor = "apple"))] + { + toks += libafl_targets::autotokens()?; + } + + if !toks.is_empty() { + state.add_metadata(toks); + } + } + + // Setup a randomic Input2State stage, conditionally within a custom mutator + let i2s = + StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new()))); + let i2s = IfStage::new(|_, _, _, _, _| Ok((!mutator_status.custom_mutation).into()), (i2s, ())); + let cm_i2s = StdMutationalStage::new(unsafe { + LLVMCustomMutator::mutate_unchecked(StdScheduledMutator::new(tuple_list!( + I2SRandReplace::new() + ))) + }); + let cm_i2s = IfStage::new(|_, _, _, _, _| Ok(mutator_status.custom_mutation.into()), (cm_i2s, ())); + + // TODO configure with mutation stacking options from libfuzzer + let std_mutator = StdScheduledMutator::new(havoc_mutations().merge(tokens_mutations())); + + let std_power = StdPowerMutationalStage::new(std_mutator); + let std_power = IfStage::new(|_, _, _, _, _| Ok(mutator_status.std_mutational.into()), (std_power, ())); + + // for custom mutator and crossover, each have access to the LLVMFuzzerMutate -- but it appears + // that this method doesn't normally offer stacked mutations where one may expect them + // we offer stacked mutations since this appears to be expected; see: + // https://github.com/google/fuzzing/blob/bb05211c12328cb16327bb0d58c0c67a9a44576f/docs/structure-aware-fuzzing.md#example-compression + // additionally, we perform mutation and crossover in two separate stages due to possible + // errors introduced by incorrectly handling custom mutations; see explanation below + + // a custom mutator is defined + // note: in libfuzzer, crossover is enabled by default, but this appears to be unintended + // and erroneous if custom mutators are defined as it inserts bytes from other test cases + // without performing the custom mutator's preprocessing beforehand + // we opt not to use crossover in the LLVMFuzzerMutate and instead have a second crossover pass, + // though it is likely an error for fuzzers to provide custom mutators but not custom crossovers + let custom_mutator = unsafe { + LLVMCustomMutator::mutate_unchecked(StdScheduledMutator::new(havoc_mutations_no_crossover().merge(tokens_mutations()))) + }; + let std_mutator_no_mutate = StdScheduledMutator::with_max_stack_pow(havoc_crossover(), 3); + + let cm_power = StdPowerMutationalStage::new(custom_mutator); + let cm_power = IfStage::new(|_, _, _, _, _| Ok(mutator_status.custom_mutation.into()), (cm_power, ())); + let cm_std_power = StdMutationalStage::new(std_mutator_no_mutate); + let cm_std_power = + IfStage::new(|_, _, _, _, _| Ok(mutator_status.std_no_mutate.into()), (cm_std_power, ())); + + // a custom crossover is defined + // while the scenario that a custom crossover is defined without a custom mutator is unlikely + // we handle it here explicitly anyways + let custom_crossover = unsafe { + LLVMCustomMutator::crossover_unchecked(StdScheduledMutator::with_max_stack_pow( + havoc_mutations_no_crossover().merge(tokens_mutations()), + 3, + )) + }; + let std_mutator_no_crossover = StdScheduledMutator::new(havoc_mutations_no_crossover().merge(tokens_mutations())); + + let cc_power = StdMutationalStage::new(custom_crossover); + let cc_power = IfStage::new(|_, _, _, _, _| Ok(mutator_status.custom_crossover.into()), (cc_power, ())); + let cc_std_power = StdPowerMutationalStage::new(std_mutator_no_crossover); + let cc_std_power = + IfStage::new(|_, _, _, _, _| Ok(mutator_status.std_no_crossover.into()), (cc_std_power, ())); + + let grimoire_mutator = StdScheduledMutator::with_max_stack_pow( + tuple_list!( + GrimoireExtensionMutator::new(), + GrimoireRecursiveReplacementMutator::new(), + GrimoireStringReplacementMutator::new(), + // give more probability to avoid large inputs + GrimoireRandomDeleteMutator::new(), + GrimoireRandomDeleteMutator::new(), + ), + 3, + ); + let grimoire = IfStage::new(|_, _, _, _, _| Ok(grimoire.into()), (StdMutationalStage::transforming(grimoire_mutator), ())); + + // A minimization+queue policy to get testcasess from the corpus + let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new(&mut state, &edges_observer, PowerSchedule::FAST)); + + // A fuzzer with feedbacks and a corpus scheduler + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); + + // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + + let result = unsafe { crate::libafl_libfuzzer_test_one_input(Some(*$harness), buf.as_ptr(), buf.len()) }; + match result { + -2 => ExitKind::Crash, + _ => { + *keep.borrow_mut() = result == 0; + ExitKind::Ok + } + } + }; + + let mut tracing_harness = harness; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new( + &mut harness, + tuple_list!(edges_observer, size_edges_observer, time_observer, backtrace_observer, oom_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?, + $options.timeout(), + ); + + // In case the corpus is empty (on first run) or crashed while loading, reset + if state.must_load_initial_inputs() { + if !$options.dirs().is_empty() { + // Load from disk + state + .load_initial_inputs_forced(&mut fuzzer, &mut executor, &mut mgr, $options.dirs()) + .unwrap_or_else(|e| { + panic!("Failed to load initial corpus at {:?}: {}", $options.dirs(), e) + }); + println!("We imported {} inputs from disk.", state.corpus().count()); + } + if state.corpus().count() < 1 { + // Generator of bytearrays of max size 64 + let mut generator = RandBytesGenerator::from(RandBytesGenerator::new(64)); + + // Generate 1024 initial inputs + state + .generate_initial_inputs( + &mut fuzzer, + &mut executor, + &mut generator, + &mut mgr, + 1 << 10, + ) + .expect("Failed to generate the initial corpus"); + println!( + "We imported {} inputs from the generator.", + state.corpus().count() + ); + } + } + + + // Setup a tracing stage in which we log comparisons + let tracing = IfStage::new(|_, _, _, _, _| Ok(!$options.skip_tracing()), (TracingStage::new(InProcessExecutor::new( + &mut tracing_harness, + tuple_list!(cmplog_observer), + &mut fuzzer, + &mut state, + &mut mgr, + )?), ())); + + // The order of the stages matter! + let mut stages = tuple_list!( + calibration, + generalization, + tracing, + i2s, + cm_i2s, + std_power, + cm_power, + cm_std_power, + cc_std_power, + cc_power, + grimoire, + ); + + #[allow(clippy::unnecessary_mut_passed)] // the functions may not require these many `mut`s + $operation(&$options, &mut fuzzer, &mut stages, &mut executor, &mut state, &mut mgr) + }; + + #[allow(clippy::redundant_closure_call)] + $and_then(closure) + }}; + + ($options:ident, $harness:ident, $operation:expr, $and_then:expr) => {{ + use libafl::observers::{ + HitcountsIterableMapObserver, HitcountsMapObserver, MultiMapObserver, StdMapObserver, + }; + use libafl_targets::{COUNTERS_MAPS, extra_counters}; + + // Create an observation channel using the coverage map + if unsafe { COUNTERS_MAPS.len() } == 1 { + fuzz_with!($options, $harness, $operation, $and_then, || { + let edges = unsafe { extra_counters() }; + let edges_observer = + HitcountsMapObserver::new(StdMapObserver::from_mut_slice("edges", edges.into_iter().next().unwrap())); + edges_observer + }) + } else if unsafe { COUNTERS_MAPS.len() } > 1 { + fuzz_with!($options, $harness, $operation, $and_then, || { + let edges = unsafe { extra_counters() }; + let edges_observer = + HitcountsIterableMapObserver::new(MultiMapObserver::new("edges", edges)); + edges_observer + }) + } else { + panic!("No maps available; cannot fuzz!") + } + }}; +} + +pub(crate) use fuzz_with; + +/// Starts to fuzz on a single node +pub fn start_fuzzing_single( + mut fuzz_single: F, + initial_state: Option, + mgr: EM, +) -> Result<(), Error> +where + F: FnMut(Option, EM, usize) -> Result<(), Error>, +{ + fuzz_single(initial_state, mgr, 0) +} + +extern "C" { + // redeclaration against libafl_targets because the pointers in our case may be mutable + fn libafl_targets_libfuzzer_init(argc: *mut c_int, argv: *mut *mut *const c_char) -> i32; +} + +/// A method to start the fuzzer at a later point in time from a library. +/// To quote the `libfuzzer` docs: +/// > when it’s ready to start fuzzing, it can call `LLVMFuzzerRunDriver`, passing in the program arguments and a callback. This callback is invoked just like `LLVMFuzzerTestOneInput`, and has the same signature. +/// +/// # Safety +/// Will dereference all parameters. +/// This will then call the (potentially unsafe) harness. +/// The fuzzer itself should catch any side effects and, hence be reasonably safe, if the `harness_fn` parameter is correct. +#[allow(non_snake_case, clippy::similar_names, clippy::missing_safety_doc)] +#[no_mangle] +pub unsafe extern "C" fn LLVMFuzzerRunDriver( + argc: *mut c_int, + argv: *mut *mut *const c_char, + harness_fn: Option c_int>, +) -> c_int { + let harness = harness_fn + .as_ref() + .expect("Illegal harness provided to libafl."); + + // it appears that no one, not even libfuzzer, uses this return value + // https://github.com/llvm/llvm-project/blob/llvmorg-15.0.7/compiler-rt/lib/fuzzer/FuzzerDriver.cpp#L648 + libafl_targets_libfuzzer_init(argc, argv); + + let argc = unsafe { *argc } as isize; + let argv = unsafe { *argv }; + + let options = LibfuzzerOptions::new( + (0..argc) + .map(|i| unsafe { *argv.offset(i) }) + .map(|cstr| unsafe { CStr::from_ptr(cstr) }) + .map(|cstr| cstr.to_str().unwrap()), + ) + .unwrap(); + + if !options.unknown().is_empty() { + println!("Unrecognised options: {:?}", options.unknown()); + } + + if *options.mode() != LibfuzzerMode::Tmin + && !options.dirs().is_empty() + && options.dirs().iter().all(|maybe_dir| maybe_dir.is_file()) + { + // we've been requested to just run some inputs. Do so. + for input in options.dirs() { + let input = BytesInput::from_file(input).unwrap_or_else(|_| { + panic!("Couldn't load input {}", input.to_string_lossy().as_ref()) + }); + libafl_targets::libfuzzer::libfuzzer_test_one_input(input.target_bytes().as_slice()); + } + return 0; + } + let res = match options.mode() { + LibfuzzerMode::Fuzz => fuzz::fuzz(&options, harness), + LibfuzzerMode::Merge => merge::merge(&options, harness), + LibfuzzerMode::Tmin => tmin::minimize_crash(&options, *harness), + LibfuzzerMode::Report => report::report(&options, harness), + }; + match res { + Ok(()) | Err(Error::ShuttingDown) => 0, + Err(err) => { + eprintln!("Encountered error while performing libfuzzer shimming: {err}"); + 1 + } + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/merge.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/merge.rs new file mode 100644 index 0000000000..92b9854f13 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/merge.rs @@ -0,0 +1,278 @@ +use std::{ + env::temp_dir, + ffi::c_int, + fs::{rename, File}, + io::Write, + os::fd::{AsRawFd, FromRawFd}, + time::{SystemTime, UNIX_EPOCH}, +}; + +use libafl::{ + corpus::{Corpus, OnDiskCorpus}, + events::{EventRestarter, SimpleRestartingEventManager}, + executors::{ExitKind, InProcessExecutor, TimeoutExecutor}, + feedback_and_fast, feedback_or_fast, + feedbacks::{CrashFeedback, MinMapFeedback, TimeoutFeedback}, + inputs::{BytesInput, HasTargetBytes}, + monitors::MultiMonitor, + observers::{MultiMapObserver, TimeObserver}, + schedulers::RemovableScheduler, + state::{HasCorpus, HasRand, StdState}, + Error, HasScheduler, StdFuzzer, +}; +use libafl_bolts::{ + rands::{Rand, RandomSeed, StdRand}, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::tuple_list, + AsSlice, +}; +use libafl_targets::{OomFeedback, OomObserver, COUNTERS_MAPS}; + +use crate::{ + feedbacks::{LibfuzzerCrashCauseFeedback, LibfuzzerKeepFeedback}, + observers::{MappedEdgeMapObserver, SizeTimeValueObserver}, + options::LibfuzzerOptions, + schedulers::MergeScheduler, +}; + +#[allow(clippy::too_many_lines)] +pub fn merge( + options: &LibfuzzerOptions, + harness: &extern "C" fn(*const u8, usize) -> c_int, +) -> Result<(), Error> { + if options.dirs().is_empty() { + return Err(Error::illegal_argument("Missing corpora to minimize; you should provide one directory to minimize into and one-to-many from which the inputs are loaded.")); + } + + let crash_corpus = if let Some(prefix) = options.artifact_prefix() { + OnDiskCorpus::with_meta_format_and_prefix( + prefix.dir(), + None, + prefix.filename_prefix().clone(), + true, + ) + .unwrap() + } else { + OnDiskCorpus::with_meta_format_and_prefix( + &std::env::current_dir().unwrap(), + None, + None, + true, + ) + .unwrap() + }; + + let keep_observer = LibfuzzerKeepFeedback::new(); + let keep = keep_observer.keep(); + + let mut shmem_provider = StdShMemProvider::new().unwrap(); + + #[cfg(unix)] + let mut stderr = unsafe { + let new_fd = libc::dup(std::io::stderr().as_raw_fd()); + File::from_raw_fd(new_fd) + }; + let monitor = MultiMonitor::with_time( + move |s| { + #[cfg(unix)] + writeln!(stderr, "{s}").expect("Could not write to stderr???"); + #[cfg(not(unix))] + eprintln!("{s}"); + }, + SystemTime::now().duration_since(UNIX_EPOCH).unwrap(), + ); + + let (state, mut mgr): ( + Option>, + SimpleRestartingEventManager<_, StdState<_, _, _, _>, _>, + ) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider) { + // The restarting state will spawn the same process again as child, then restarted it each time it crashes. + Ok(res) => res, + Err(err) => match err { + Error::ShuttingDown => { + return Ok(()); + } + _ => { + panic!("Failed to setup the restarter: {err}"); + } + }, + }; + #[cfg(unix)] + { + if options.close_fd_mask() != 0 { + let file_null = File::open("/dev/null")?; + unsafe { + if options.close_fd_mask() & 1 != 0 { + libc::dup2(file_null.as_raw_fd(), 1); + } + if options.close_fd_mask() & 2 != 0 { + libc::dup2(file_null.as_raw_fd(), 2); + } + } + } + } + + let edges = unsafe { core::mem::take(&mut COUNTERS_MAPS) }; + let edges_observer = MultiMapObserver::new("edges", edges); + + let time = TimeObserver::new("time"); + let edges_observer = + MappedEdgeMapObserver::new(edges_observer, SizeTimeValueObserver::new(time)); + + let map_feedback = MinMapFeedback::tracking(&edges_observer, false, true); + + // Create an OOM observer to monitor if an OOM has occurred + let oom_observer = OomObserver::new(options.rss_limit(), options.malloc_limit()); + + // Feedback to rate the interestingness of an input + // This one is composed by two Feedbacks in OR + let mut feedback = feedback_and_fast!(keep_observer, map_feedback); + + // A feedback to choose if an input is a solution or not + let mut objective = feedback_or_fast!( + LibfuzzerCrashCauseFeedback::new(options.artifact_prefix().cloned()), + OomFeedback, + CrashFeedback::new(), + TimeoutFeedback::new() + ); + + let observers = tuple_list!(edges_observer, oom_observer); + + // scheduler doesn't really matter here + let scheduler = MergeScheduler::new(); + + let mut state = state.map_or_else(|| { + let mut rand = StdRand::new(); + + let corpus_dir = if options.dirs().first().unwrap().exists() + && options + .dirs() + .first() + .unwrap() + .read_dir()? + .any(|entry| entry.map_or(true, |e| !(e.file_name() == "." || e.file_name() == ".."))) + { + let temp = temp_dir().join(format!("libafl-merge-{}{}", rand.next(), rand.next())); + eprintln!("Warning: creating an intermediary directory for minimisation at {}. We will move your existing corpus dir to.", temp.to_str().unwrap()); + temp + } else { + options.dirs().first().cloned().unwrap() + }; + + StdState::new( + // RNG + StdRand::new(), + // Corpus that will be evolved, we keep it in memory for performance + OnDiskCorpus::with_meta_format_and_prefix(&corpus_dir, None, None, true).unwrap(), + // Corpus in which we store solutions (crashes in this example), + // on disk so the user can get them after stopping the fuzzer + crash_corpus, + // A reference to the feedbacks, to create their feedback state + &mut feedback, + // A reference to the objectives, to create their objective state + &mut objective, + ) + }, Ok)?; + + let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective); // The wrapped harness function, calling out to the LLVM-style harness + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + + let result = unsafe { + crate::libafl_libfuzzer_test_one_input(Some(*harness), buf.as_ptr(), buf.len()) + }; + if result == -2 { + ExitKind::Crash + } else { + *keep.borrow_mut() = result == 0; + ExitKind::Ok + } + }; + + // Create the executor for an in-process function with one observer for edge coverage and one for the execution time + let mut executor = TimeoutExecutor::new( + InProcessExecutor::new(&mut harness, observers, &mut fuzzer, &mut state, &mut mgr)?, + options.timeout(), + ); + + // In case the corpus is empty (on first run) or crashed while loading, reset + if state.must_load_initial_inputs() && !options.dirs().is_empty() { + let loaded_dirs = options + .dirs() + .iter() + .cloned() + .filter(|dir| state.corpus().dir_path() != dir) + .collect::>(); + // Load from disk + state + .load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &loaded_dirs) + .unwrap_or_else(|e| { + panic!( + "Failed to load initial corpus at {:?}: {}", + options.dirs(), + e + ) + }); + } + + for idx in fuzzer.scheduler().removable() { + let testcase = state.corpus_mut().remove(idx)?; + fuzzer + .scheduler_mut() + .on_remove(&mut state, idx, &Some(testcase))?; + } + + for idx in fuzzer.scheduler().current().clone() { + let mut testcase = state.corpus_mut().get(idx)?.borrow_mut(); + let file_path = testcase + .file_path_mut() + .as_mut() + .expect("No file backing for corpus entry"); + if let Some((base, _)) = file_path + .file_name() + .unwrap() + .to_str() + .unwrap() + .rsplit_once('-') + { + let mut new_file_path = file_path.clone(); + new_file_path.pop(); + new_file_path.push(base); + if new_file_path.exists() { + drop(testcase); + let testcase = state.corpus_mut().remove(idx)?; + fuzzer + .scheduler_mut() + .on_remove(&mut state, idx, &Some(testcase))?; + } else { + rename(&file_path, &new_file_path)?; + *file_path = new_file_path; + } + } + } + + println!( + "Minimization complete; reduced to {} inputs!", + state.corpus().count() + ); + + let corpus_dir = state.corpus().dir_path().clone(); + if corpus_dir != options.dirs()[0] { + let temp = temp_dir().join(format!( + "libafl-merge-orig-{}{}", + state.rand_mut().next(), + state.rand_mut().next() + )); + eprintln!( + "Moving original corpus directory {} to {} and replacing it with minimisation result ({}).", + options.dirs()[0].to_str().unwrap(), + temp.to_str().unwrap(), + corpus_dir.to_str().unwrap() + ); + rename(&options.dirs()[0], temp)?; + rename(corpus_dir, &options.dirs()[0])?; + } + + mgr.send_exiting() +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/misc.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/misc.rs new file mode 100644 index 0000000000..7b496a8283 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/misc.rs @@ -0,0 +1,88 @@ +use std::{ + collections::{HashSet, VecDeque}, + path::PathBuf, +}; + +use libafl::{state::HasMetadata, Error}; +use libafl_bolts::impl_serdeany; +use serde::{Deserialize, Serialize}; +use utf8_chars::BufReadCharsExt; + +use crate::{options::LibfuzzerOptions, CustomMutationStatus}; + +#[derive(Deserialize, Serialize, Debug, Default)] +pub(crate) struct ShouldUseGrimoireMetadata { + should: bool, + non_utf8: usize, + utf8: usize, + checked: HashSet, +} + +impl_serdeany!(ShouldUseGrimoireMetadata); + +impl ShouldUseGrimoireMetadata { + pub fn should(&self) -> bool { + self.should + } +} + +pub(crate) fn should_use_grimoire( + maybe_state: &mut Option, + options: &LibfuzzerOptions, + mutator_status: &CustomMutationStatus, +) -> Result, Error> +where + S: HasMetadata, +{ + let mut metadata: Box = maybe_state + .as_mut() + .and_then(|state| state.metadata_map_mut().remove()) + .unwrap_or_default(); + let grimoire = if let Some(grimoire) = options.grimoire() { + if grimoire && !mutator_status.std_mutational { + eprintln!("WARNING: cowardly refusing to use grimoire after detecting the presence of a custom mutator"); + } + metadata.should = grimoire && mutator_status.std_mutational; + metadata + } else if mutator_status.std_mutational { + if options.dirs().is_empty() { + eprintln!("WARNING: cowardly refusing to use grimoire since we cannot determine if the input is primarily text; set -grimoire=1 or provide a corpus directory."); + metadata + } else { + let mut input_queue = VecDeque::new(); + input_queue.extend(options.dirs().iter().cloned()); + while let Some(entry) = input_queue.pop_front() { + if entry.is_dir() { + if let Ok(entries) = std::fs::read_dir(entry) { + for entry in entries { + let entry = entry?; + input_queue.push_back(entry.path()); + } + } + } else if entry.is_file() + && entry + .extension() + .map_or(true, |ext| ext != "metadata" && ext != "lafl_lock") + && !metadata.checked.contains(&entry) + { + let mut reader = std::io::BufReader::new(std::fs::File::open(&entry)?); + if reader.chars().all(|maybe_c| maybe_c.is_ok()) { + metadata.utf8 += 1; + } else { + metadata.non_utf8 += 1; + } + metadata.checked.insert(entry); + } + } + metadata.should = metadata.utf8 > metadata.non_utf8; // greater-than so zero testcases doesn't enable + if metadata.should { + eprintln!("INFO: inferred grimoire mutator (found {}/{} UTF-8 inputs); if this is undesired, set -grimoire=0", metadata.utf8, metadata.utf8 + metadata.non_utf8); + } + metadata + } + } else { + metadata + }; + + Ok(grimoire) +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/observers.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/observers.rs new file mode 100644 index 0000000000..22ada45c5a --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/observers.rs @@ -0,0 +1,366 @@ +use std::{ + fmt::Debug, + hash::{Hash, Hasher}, +}; + +use ahash::AHasher; +use libafl::{ + executors::ExitKind, + inputs::UsesInput, + observers::{MapObserver, Observer, TimeObserver}, + state::UsesState, + Error, +}; +use libafl_bolts::{AsIter, HasLen, Named}; +use num_traits::Bounded; +use serde::{Deserialize, Serialize}; + +static INITIAL_SIZE: usize = usize::MAX; +static INITIAL_TIME: u64 = u64::MAX; + +pub trait ValueObserver: for<'de> Deserialize<'de> + Serialize + Debug + Named { + type ValueType: Bounded + + Default + + Copy + + Serialize + + for<'de> Deserialize<'de> + + PartialEq + + Hash + + Debug + + 'static; + + fn value(&self) -> &Self::ValueType; + + fn default_value(&self) -> &Self::ValueType; +} + +#[derive(Deserialize, Serialize, Debug)] +pub struct MappedEdgeMapObserver { + inner: M, + name: String, + value_observer: O, +} + +impl MappedEdgeMapObserver +where + M: MapObserver, + O: ValueObserver, +{ + pub fn new(obs: M, value_obs: O) -> Self { + Self { + name: format!("{}_{}", value_obs.name(), obs.name()), + inner: obs, + value_observer: value_obs, + } + } +} + +impl HasLen for MappedEdgeMapObserver +where + M: HasLen, +{ + fn len(&self) -> usize { + self.inner.len() + } +} + +impl Named for MappedEdgeMapObserver { + fn name(&self) -> &str { + &self.name + } +} + +impl MapObserver for MappedEdgeMapObserver +where + M: MapObserver + for<'it> AsIter<'it, Item = M::Entry>, + O: ValueObserver, +{ + type Entry = O::ValueType; + + fn get(&self, idx: usize) -> &Self::Entry { + let initial = self.inner.initial(); + if *self.inner.get(idx) == initial { + self.value_observer.default_value() + } else { + self.value_observer.value() + } + } + + fn get_mut(&mut self, _idx: usize) -> &mut Self::Entry { + unimplemented!("Impossible to implement for a proxy map.") + } + + fn usable_count(&self) -> usize { + self.inner.usable_count() + } + + fn count_bytes(&self) -> u64 { + self.inner.count_bytes() + } + + fn hash(&self) -> u64 { + let mut hasher = AHasher::default(); + let initial = self.inner.initial(); + for e in self.inner.as_iter() { + if *e == initial { + self.value_observer.default_value().hash(&mut hasher); + } else { + self.value_observer.value().hash(&mut hasher); + } + } + hasher.finish() + } + + fn initial(&self) -> Self::Entry { + *self.value_observer.default_value() + } + + fn reset_map(&mut self) -> Result<(), Error> { + self.inner.reset_map() + } + + fn to_vec(&self) -> Vec { + let initial = self.inner.initial(); + let default = *self.value_observer.default_value(); + let value = *self.value_observer.value(); + self.inner + .as_iter() + .map(|&e| if e == initial { default } else { value }) + .collect() + } + + fn how_many_set(&self, indexes: &[usize]) -> usize { + self.inner.how_many_set(indexes) + } +} + +impl UsesState for MappedEdgeMapObserver +where + M: UsesState, +{ + type State = M::State; +} + +impl Observer for MappedEdgeMapObserver +where + M: Observer + Debug, + O: Observer + Debug, + S: UsesInput, +{ + fn pre_exec(&mut self, state: &mut S, input: &S::Input) -> Result<(), Error> { + self.inner.pre_exec(state, input)?; + self.value_observer.pre_exec(state, input) + } + + fn post_exec( + &mut self, + state: &mut S, + input: &S::Input, + exit_kind: &ExitKind, + ) -> Result<(), Error> { + self.inner.post_exec(state, input, exit_kind)?; + self.value_observer.post_exec(state, input, exit_kind) + } +} + +pub struct MappedEdgeMapIter<'it, I, O, T> { + inner: I, + initial: T, + value_obs: &'it O, +} + +impl<'it, I, O, T> MappedEdgeMapIter<'it, I, O, T> { + fn new(iter: I, initial: T, value_obs: &'it O) -> Self { + Self { + inner: iter, + initial, + value_obs, + } + } +} + +impl<'it, I, O, T> Iterator for MappedEdgeMapIter<'it, I, O, T> +where + I: Iterator, + T: PartialEq + 'it, + O: ValueObserver, +{ + type Item = &'it O::ValueType; + + fn next(&mut self) -> Option { + self.inner.next().map(|e| { + (*e == self.initial) + .then(|| self.value_obs.default_value()) + .unwrap_or_else(|| self.value_obs.value()) + }) + } +} + +impl<'it, M, O> AsIter<'it> for MappedEdgeMapObserver +where + M: MapObserver + for<'a> AsIter<'a, Item = M::Entry>, + O: ValueObserver + 'it, +{ + type Item = O::ValueType; + type IntoIter = MappedEdgeMapIter<'it, >::IntoIter, O, M::Entry>; + + fn as_iter(&'it self) -> Self::IntoIter { + let iter = self.inner.as_iter(); + let initial = self.inner.initial(); + MappedEdgeMapIter::new(iter, initial, &self.value_observer) + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, Debug, Default)] +pub struct SizeValueObserver { + size: usize, +} + +impl ValueObserver for SizeValueObserver { + type ValueType = usize; + + fn value(&self) -> &Self::ValueType { + &self.size + } + + fn default_value(&self) -> &Self::ValueType { + &INITIAL_SIZE + } +} + +impl Named for SizeValueObserver { + fn name(&self) -> &str { + "size" + } +} + +impl Observer for SizeValueObserver +where + S: UsesInput, + S::Input: HasLen, +{ + fn pre_exec(&mut self, _state: &mut S, input: &S::Input) -> Result<(), Error> { + self.size = input.len(); + Ok(()) + } +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct TimeValueObserver { + time: u64, + time_obs: TimeObserver, +} + +impl TimeValueObserver { + pub fn new(time_obs: TimeObserver) -> Self { + Self { + time: INITIAL_TIME, + time_obs, + } + } +} + +impl ValueObserver for TimeValueObserver { + type ValueType = u64; + + fn value(&self) -> &Self::ValueType { + &self.time + } + + fn default_value(&self) -> &Self::ValueType { + &INITIAL_TIME + } +} + +impl Named for TimeValueObserver { + fn name(&self) -> &str { + self.time_obs.name() + } +} + +impl Observer for TimeValueObserver +where + S: UsesInput, +{ + fn pre_exec(&mut self, state: &mut S, input: &S::Input) -> Result<(), Error> { + self.time_obs.pre_exec(state, input) + } + + fn post_exec( + &mut self, + state: &mut S, + input: &S::Input, + exit_kind: &ExitKind, + ) -> Result<(), Error> { + self.time_obs.post_exec(state, input, exit_kind)?; + self.time = self + .time_obs + .last_runtime() + .as_ref() + .map_or(INITIAL_TIME, |duration| { + u64::try_from(duration.as_micros()).unwrap_or(INITIAL_TIME) + }); + Ok(()) + } +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct SizeTimeValueObserver { + value: u64, + size_obs: SizeValueObserver, + time_obs: TimeValueObserver, +} + +impl SizeTimeValueObserver { + pub fn new(time_obs: TimeObserver) -> Self { + Self { + value: INITIAL_TIME, + size_obs: SizeValueObserver::default(), + time_obs: TimeValueObserver::new(time_obs), + } + } +} + +impl ValueObserver for SizeTimeValueObserver { + type ValueType = u64; + + fn value(&self) -> &Self::ValueType { + &self.value + } + + fn default_value(&self) -> &Self::ValueType { + &INITIAL_TIME + } +} + +impl Named for SizeTimeValueObserver { + fn name(&self) -> &str { + "size_time" + } +} + +impl Observer for SizeTimeValueObserver +where + S: UsesInput, + S::Input: HasLen, +{ + fn pre_exec(&mut self, state: &mut S, input: &S::Input) -> Result<(), Error> { + self.size_obs.pre_exec(state, input)?; + self.time_obs.pre_exec(state, input) + } + + fn post_exec( + &mut self, + state: &mut S, + input: &S::Input, + exit_kind: &ExitKind, + ) -> Result<(), Error> { + self.time_obs.post_exec(state, input, exit_kind)?; + self.size_obs.post_exec(state, input, exit_kind)?; + self.value = self + .time_obs + .value() + .saturating_mul(*self.size_obs.value() as u64); + Ok(()) + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/options.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/options.rs new file mode 100644 index 0000000000..e9fdab7521 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/options.rs @@ -0,0 +1,368 @@ +use core::fmt::{Display, Formatter}; +use std::{path::PathBuf, time::Duration}; + +use libafl::mutators::Tokens; + +use crate::options::RawOption::{Directory, Flag}; + +enum RawOption<'a> { + Directory(&'a str), + Flag { name: &'a str, value: &'a str }, +} + +fn parse_option(arg: &str) -> Option { + if arg.starts_with("--") { + None + } else if arg.starts_with('-') { + if let Some((name, value)) = arg.split_at(1).1.split_once('=') { + Some(Flag { name, value }) + } else { + eprintln!("warning: flag {arg} provided without a value; did you mean `{arg}=1'?"); + None + } + } else { + Some(Directory(arg)) + } +} + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum LibfuzzerMode { + Fuzz, + Merge, + Tmin, + Report, +} + +#[derive(Debug)] +pub enum OptionsParseError<'a> { + MultipleModesSelected, + OptionValueParseFailed(&'a str, &'a str), +} + +impl<'a> Display for OptionsParseError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + match self { + OptionsParseError::MultipleModesSelected => { + f.write_str("multiple modes selected in options") + } + OptionsParseError::OptionValueParseFailed(name, value) => { + f.write_fmt(format_args!("couldn't parse value `{value}' for {name}")) + } + } + } +} + +#[derive(Debug, Clone)] +pub struct ArtifactPrefix { + dir: PathBuf, + filename_prefix: Option, +} + +impl ArtifactPrefix { + fn new(path: &str) -> ArtifactPrefix { + let mut dir = PathBuf::from(path); + if path.ends_with(std::path::MAIN_SEPARATOR) { + Self { + dir, + filename_prefix: None, + } + } else { + let filename_prefix = dir.file_name().map(|s| { + s.to_os_string() + .into_string() + .expect("Provided artifact prefix is not usable") + }); + dir.pop(); + Self { + dir, + filename_prefix, + } + } + } + + pub fn dir(&self) -> &PathBuf { + &self.dir + } + + pub fn filename_prefix(&self) -> &Option { + &self.filename_prefix + } +} + +#[derive(Debug, Clone)] +#[allow(clippy::struct_excessive_bools)] +pub struct LibfuzzerOptions { + fuzzer_name: String, + mode: LibfuzzerMode, + artifact_prefix: Option, + timeout: Duration, + grimoire: Option, + forks: Option, + dict: Option, + dirs: Vec, + ignore_crashes: bool, + ignore_timeouts: bool, + ignore_ooms: bool, + rss_limit: usize, + malloc_limit: usize, + dedup: bool, + shrink: bool, + skip_tracing: bool, + tui: bool, + runs: usize, + close_fd_mask: u8, + unknown: Vec, +} + +impl LibfuzzerOptions { + pub fn new<'a>(mut args: impl Iterator) -> Result> { + let name = args.next().unwrap(); + let name = if let Some(executable) = std::env::current_exe().ok().and_then(|path| { + path.file_name() + .and_then(std::ffi::OsStr::to_str) + .map(std::string::ToString::to_string) + }) { + executable + } else { + name.to_string() + }; + args.try_fold(LibfuzzerOptionsBuilder::default(), |builder, arg| { + builder.consume(arg) + }) + .map(|builder| builder.build(name)) + } + + pub fn fuzzer_name(&self) -> &str { + &self.fuzzer_name + } + + pub fn mode(&self) -> &LibfuzzerMode { + &self.mode + } + + pub fn artifact_prefix(&self) -> Option<&ArtifactPrefix> { + self.artifact_prefix.as_ref() + } + + pub fn timeout(&self) -> Duration { + self.timeout + } + + pub fn grimoire(&self) -> Option { + self.grimoire + } + + pub fn forks(&self) -> Option { + self.forks + } + + pub fn dict(&self) -> Option<&Tokens> { + self.dict.as_ref() + } + + pub fn dirs(&self) -> &[PathBuf] { + &self.dirs + } + + pub fn ignore_crashes(&self) -> bool { + self.ignore_crashes + } + + pub fn ignore_timeouts(&self) -> bool { + self.ignore_timeouts + } + + pub fn ignore_ooms(&self) -> bool { + self.ignore_ooms + } + + pub fn rss_limit(&self) -> usize { + self.rss_limit + } + + pub fn malloc_limit(&self) -> usize { + self.malloc_limit + } + + pub fn dedup(&self) -> bool { + self.dedup + } + + pub fn shrink(&self) -> bool { + self.shrink + } + + pub fn skip_tracing(&self) -> bool { + self.skip_tracing + } + + pub fn tui(&self) -> bool { + self.tui + } + + pub fn runs(&self) -> usize { + self.runs + } + + pub fn close_fd_mask(&self) -> u8 { + self.close_fd_mask + } + + pub fn unknown(&self) -> &[String] { + &self.unknown + } +} + +#[derive(Debug, Default)] +#[allow(clippy::struct_excessive_bools)] +struct LibfuzzerOptionsBuilder<'a> { + mode: Option, + artifact_prefix: Option<&'a str>, + timeout: Option, + grimoire: Option, + forks: Option, + dict: Option<&'a str>, + dirs: Vec<&'a str>, + ignore_crashes: bool, + ignore_timeouts: bool, + ignore_ooms: bool, + rss_limit: Option, + malloc_limit: Option, + ignore_remaining: bool, + dedup: bool, + shrink: bool, + skip_tracing: bool, + tui: bool, + runs: usize, + close_fd_mask: u8, + unknown: Vec<&'a str>, +} + +macro_rules! parse_or_bail { + ($name:expr, $parsed:expr, $ty:ty) => {{ + if let Ok(val) = $parsed.parse::<$ty>() { + val + } else { + return Err(OptionsParseError::OptionValueParseFailed($name, $parsed)); + } + }}; +} + +impl<'a> LibfuzzerOptionsBuilder<'a> { + fn consume(mut self, arg: &'a str) -> Result> { + if !self.ignore_remaining { + if let Some(option) = parse_option(arg) { + match option { + Directory(dir) => { + self.dirs.push(dir); + } + Flag { name, value } => match name { + "merge" => { + if parse_or_bail!(name, value, u64) > 0 + && *self.mode.get_or_insert(LibfuzzerMode::Merge) + != LibfuzzerMode::Merge + { + return Err(OptionsParseError::MultipleModesSelected); + } + } + "minimize_crash" => { + if parse_or_bail!(name, value, u64) > 0 + && *self.mode.get_or_insert(LibfuzzerMode::Tmin) + != LibfuzzerMode::Tmin + { + return Err(OptionsParseError::MultipleModesSelected); + } + } + "report" => { + if parse_or_bail!(name, value, u64) > 0 + && *self.mode.get_or_insert(LibfuzzerMode::Report) + != LibfuzzerMode::Report + { + return Err(OptionsParseError::MultipleModesSelected); + } + } + "grimoire" => self.grimoire = Some(parse_or_bail!(name, value, u64) > 0), + "artifact_prefix" => { + self.artifact_prefix = Some(value); + } + "timeout" => { + self.timeout = + Some(value.parse().map(Duration::from_secs_f64).map_err(|_| { + OptionsParseError::OptionValueParseFailed(name, value) + })?); + } + "dict" => self.dict = Some(value), + "fork" | "jobs" => { + self.forks = Some(parse_or_bail!(name, value, usize)); + } + "ignore_crashes" => { + self.ignore_crashes = parse_or_bail!(name, value, u64) > 0; + } + "ignore_timeouts" => { + self.ignore_timeouts = parse_or_bail!(name, value, u64) > 0; + } + "ignore_ooms" => self.ignore_ooms = parse_or_bail!(name, value, u64) > 0, + "rss_limit_mb" => { + self.rss_limit = Some(parse_or_bail!(name, value, usize) << 20); + } + "malloc_limit_mb" => { + self.malloc_limit = Some(parse_or_bail!(name, value, usize) << 20); + } + "ignore_remaining_args" => { + self.ignore_remaining = parse_or_bail!(name, value, u64) > 0; + } + "dedup" => self.dedup = parse_or_bail!(name, value, u64) > 0, + "shrink" => self.shrink = parse_or_bail!(name, value, u64) > 0, + "skip_tracing" => self.skip_tracing = parse_or_bail!(name, value, u64) > 0, + "tui" => self.tui = parse_or_bail!(name, value, u64) > 0, + "runs" => self.runs = parse_or_bail!(name, value, usize), + "close_fd_mask" => self.close_fd_mask = parse_or_bail!(name, value, u8), + _ => { + self.unknown.push(arg); + } + }, + } + } else { + self.unknown.push(arg); + } + } + Ok(self) + } + + fn build(self, fuzzer_name: String) -> LibfuzzerOptions { + LibfuzzerOptions { + fuzzer_name, + mode: self.mode.unwrap_or(LibfuzzerMode::Fuzz), + artifact_prefix: self.artifact_prefix.map(ArtifactPrefix::new), + timeout: self.timeout.unwrap_or(Duration::from_secs(1200)), + grimoire: self.grimoire, + forks: self.forks, + dict: self.dict.map(|path| { + Tokens::from_file(path).expect("Couldn't load tokens from specified dictionary") + }), + dirs: self.dirs.into_iter().map(PathBuf::from).collect(), + ignore_crashes: self.ignore_crashes, + ignore_timeouts: self.ignore_timeouts, + ignore_ooms: self.ignore_ooms, + rss_limit: match self.rss_limit.unwrap_or(2 << 30) { + 0 => usize::MAX, + value => value, + }, + malloc_limit: match self.malloc_limit.or(self.rss_limit).unwrap_or(2 << 30) { + 0 => usize::MAX, + value => value, + }, + dedup: self.dedup, + shrink: self.shrink, + skip_tracing: self.skip_tracing, + tui: self.tui, + runs: self.runs, + close_fd_mask: self.close_fd_mask, + unknown: self + .unknown + .into_iter() + .map(std::string::ToString::to_string) + .collect(), + } + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/report.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/report.rs new file mode 100644 index 0000000000..4db23cd539 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/report.rs @@ -0,0 +1,61 @@ +use std::ffi::c_int; + +use libafl::{ + events::{ProgressReporter, SimpleEventManager}, + executors::HasObservers, + feedbacks::{MapFeedbackMetadata, MAPFEEDBACK_PREFIX}, + inputs::UsesInput, + monitors::SimpleMonitor, + stages::StagesTuple, + state::{ + HasClientPerfMonitor, HasExecutions, HasLastReportTime, HasMetadata, HasNamedMetadata, + }, + Error, Fuzzer, +}; + +use crate::{fuzz_with, options::LibfuzzerOptions}; + +#[allow(clippy::unnecessary_wraps, clippy::cast_precision_loss)] +fn do_report( + _options: &LibfuzzerOptions, + _fuzzer: &mut F, + _stages: &mut ST, + _executor: &mut E, + state: &S, + _mgr: &mut EM, +) -> Result<(), Error> +where + F: Fuzzer, + S: HasClientPerfMonitor + + HasMetadata + + HasNamedMetadata + + HasExecutions + + UsesInput + + HasLastReportTime, + E: HasObservers, + EM: ProgressReporter, + ST: StagesTuple, +{ + let meta = state + .named_metadata::>(&(MAPFEEDBACK_PREFIX.to_string() + "edges")) + .unwrap(); + let observed = meta.history_map.iter().filter(|&&e| e != 0).count(); + let total = meta.history_map.len(); + + println!( + "Observed {observed}/{total} edges ({}%)", + observed as f64 / total as f64 + ); + + Ok(()) +} + +pub fn report( + options: &LibfuzzerOptions, + harness: &extern "C" fn(*const u8, usize) -> c_int, +) -> Result<(), Error> { + fuzz_with!(options, harness, do_report, |reporter| { + let mgr = SimpleEventManager::new(SimpleMonitor::new(|s| eprintln!("{s}"))); + crate::start_fuzzing_single(reporter, None, mgr) + }) +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/schedulers.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/schedulers.rs new file mode 100644 index 0000000000..bcd89dc8ca --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/schedulers.rs @@ -0,0 +1,82 @@ +use std::{ + collections::{BTreeSet, HashMap}, + marker::PhantomData, +}; + +use libafl::{ + corpus::{Corpus, CorpusId, Testcase}, + feedbacks::MapNoveltiesMetadata, + inputs::UsesInput, + schedulers::{RemovableScheduler, Scheduler}, + state::{HasCorpus, HasMetadata, UsesState}, + Error, +}; + +#[derive(Clone, Debug)] +pub struct MergeScheduler { + mapping: HashMap, + all: BTreeSet, + phantom: PhantomData, +} + +impl UsesState for MergeScheduler +where + S: UsesInput, +{ + type State = S; +} + +impl RemovableScheduler for MergeScheduler +where + S: UsesInput + HasCorpus, +{ + fn on_remove( + &mut self, + _state: &mut Self::State, + idx: CorpusId, + _testcase: &Option::Input>>, + ) -> Result<(), Error> { + self.all.remove(&idx); + Ok(()) + } +} + +impl Scheduler for MergeScheduler +where + S: UsesInput + HasCorpus, +{ + fn on_add(&mut self, state: &mut Self::State, idx: CorpusId) -> Result<(), Error> { + self.all.insert(idx); + let testcase = state.corpus().get(idx)?.borrow(); + let meta = testcase.metadata::()?; + for cov_idx in &meta.list { + self.mapping.insert(*cov_idx, idx); + } + Ok(()) + } + + fn next(&mut self, _state: &mut Self::State) -> Result { + unimplemented!("Not suitable for actual scheduling."); + } +} + +impl MergeScheduler { + pub fn new() -> Self { + Self { + mapping: HashMap::default(), + all: BTreeSet::default(), + phantom: PhantomData, + } + } + + pub fn removable(&self) -> BTreeSet { + self.all + .difference(&self.mapping.values().copied().collect()) + .copied() + .collect() + } + + pub fn current(&self) -> &BTreeSet { + &self.all + } +} diff --git a/libafl_libfuzzer/libafl_libfuzzer_runtime/src/tmin.rs b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/tmin.rs new file mode 100644 index 0000000000..7ad5ab5546 --- /dev/null +++ b/libafl_libfuzzer/libafl_libfuzzer_runtime/src/tmin.rs @@ -0,0 +1,186 @@ +use std::{ + ffi::c_int, + fs::{read, write}, + path::PathBuf, +}; + +use libafl::{ + corpus::{Corpus, HasTestcase, InMemoryCorpus, Testcase}, + events::SimpleEventManager, + executors::{inprocess::TimeoutInProcessForkExecutor, ExitKind}, + feedbacks::{CrashFeedbackFactory, TimeoutFeedbackFactory}, + inputs::{BytesInput, HasBytesVec, HasTargetBytes}, + mutators::{havoc_mutations_no_crossover, Mutator, StdScheduledMutator}, + schedulers::QueueScheduler, + stages::StdTMinMutationalStage, + state::{HasCorpus, StdState}, + Error, Fuzzer, StdFuzzer, +}; +use libafl_bolts::{ + rands::{RandomSeed, RomuDuoJrRand, StdRand}, + shmem::{ShMemProvider, StdShMemProvider}, + tuples::tuple_list, + AsSlice, HasLen, +}; +use libafl_targets::LLVMCustomMutator; + +use crate::{options::LibfuzzerOptions, CustomMutationStatus}; + +type TMinState = + StdState, RomuDuoJrRand, InMemoryCorpus>; + +fn minimize_crash_with_mutator>( + options: &LibfuzzerOptions, + harness: extern "C" fn(*const u8, usize) -> c_int, + mutator: M, + mut state: TMinState, +) -> Result<(), Error> { + let mut mgr = SimpleEventManager::printing(); + + assert_eq!( + options.dirs().len(), + 1, + "Must provide exactly one input to minimise" + ); + assert!(options.dirs()[0].exists(), "Input specified does not exist"); + assert!(options.dirs()[0].is_file(), "Input specified is not a file"); + + let input = BytesInput::new(read(&options.dirs()[0])?); + + let mut harness = |input: &BytesInput| { + let target = input.target_bytes(); + let buf = target.as_slice(); + + let result = unsafe { + crate::libafl_libfuzzer_test_one_input(Some(harness), buf.as_ptr(), buf.len()) + }; + match result { + -2 => ExitKind::Crash, + _ => ExitKind::Ok, + } + }; + + let mut fuzzer = StdFuzzer::new(QueueScheduler::new(), (), ()); + + let shmem_provider = StdShMemProvider::new()?; + let mut executor = TimeoutInProcessForkExecutor::new( + &mut harness, + (), + &mut fuzzer, + &mut state, + &mut mgr, + options.timeout(), + shmem_provider, + )?; + + let exit_kind = fuzzer.execute_input(&mut state, &mut executor, &mut mgr, &input)?; + + let size = input.len(); + let id = state.corpus_mut().add(Testcase::new(input))?; + + match exit_kind { + ExitKind::Crash => { + let factory = CrashFeedbackFactory::default(); + let tmin = StdTMinMutationalStage::new( + mutator, + factory, + if options.runs() == 0 { + 128 + } else { + options.runs() + }, + ); + let mut stages = tuple_list!(tmin); + fuzzer.fuzz_one(&mut stages, &mut executor, &mut state, &mut mgr)?; + } + ExitKind::Timeout => { + let factory = TimeoutFeedbackFactory::default(); + let tmin = StdTMinMutationalStage::new( + mutator, + factory, + if options.runs() == 0 { + 128 + } else { + options.runs() + }, + ); + let mut stages = tuple_list!(tmin); + fuzzer.fuzz_one(&mut stages, &mut executor, &mut state, &mut mgr)?; + } + kind => unimplemented!("Unsupported exit kind for test minification: {:?}", kind), + } + + let mut testcase = state.testcase_mut(id)?; + let input = testcase.load_input(state.corpus())?.bytes().to_vec(); + drop(testcase); + if input.len() >= size { + eprintln!( + "Unable to reduce {}", + options.dirs()[0].as_path().as_os_str().to_str().unwrap() + ); + } else { + let (mut dest, filename_prefix) = options.artifact_prefix().map_or_else( + || (PathBuf::default(), ""), + |artifact_prefix| { + ( + artifact_prefix.dir().clone(), + artifact_prefix + .filename_prefix() + .as_ref() + .map_or("", String::as_str), + ) + }, + ); + dest.push(format!( + "{}minimized-from-{}", + filename_prefix, + options.dirs()[0].file_name().unwrap().to_str().unwrap() + )); + write(&dest, input)?; + println!( + "Wrote minimised input to {}", + dest.file_name().unwrap().to_str().unwrap() + ); + } + + Ok(()) +} + +pub fn minimize_crash( + options: &LibfuzzerOptions, + harness: extern "C" fn(*const u8, usize) -> c_int, +) -> Result<(), Error> { + println!( + "Attempting to minimise a crash: {}", + options + .dirs() + .iter() + .map(|p| p + .to_str() + .expect("Couldn't render the filename as a string!")) + .collect::>() + .join(", ") + ); + let mutator_status = CustomMutationStatus::new(); + + let state = StdState::new( + StdRand::new(), + InMemoryCorpus::::new(), + InMemoryCorpus::new(), + &mut (), + &mut (), + )?; + + // TODO configure with mutation stacking options from libfuzzer + if mutator_status.custom_mutation { + let custom_mutator = unsafe { + LLVMCustomMutator::mutate_unchecked(StdScheduledMutator::new( + havoc_mutations_no_crossover(), + )) + }; + minimize_crash_with_mutator(options, harness, custom_mutator, state) + } else { + let std_mutator = StdScheduledMutator::new(havoc_mutations_no_crossover()); + minimize_crash_with_mutator(options, harness, std_mutator, state) + } +} diff --git a/libafl_libfuzzer/src/lib.rs b/libafl_libfuzzer/src/lib.rs new file mode 100644 index 0000000000..d12c8991dd --- /dev/null +++ b/libafl_libfuzzer/src/lib.rs @@ -0,0 +1,21 @@ +//! `libafl_libfuzzer` offers a "permanent" replacement for the now-deprecated libfuzzer +//! +//! This crate only offers sufficient functionality to replace libfuzzer for cargo-fuzz in its +//! current state, but may be expanded to handle other flags in the future. +//! +//! This crate links to a (separately built) internal crate which affords the actual functionality. +//! The internal crate must be built separately to ensure flags from dependent crates are not leaked +//! to the runtime (e.g., to prevent coverage being collected on the runtime). + +use std::ffi::{c_char, c_int}; + +pub use libfuzzer_sys::*; + +extern "C" { + /// `LLVMFuzzerRunDriver` allows for harnesses which specify their own main. See: https://llvm.org/docs/LibFuzzer.html#using-libfuzzer-as-a-library + pub fn LLVMFuzzerRunDriver( + argc: *mut c_int, + argv: *mut *mut *const c_char, + harness_fn: Option c_int>, + ) -> c_int; +} diff --git a/libafl_targets/Cargo.toml b/libafl_targets/Cargo.toml index 3767e23d65..9ac61ec5e7 100644 --- a/libafl_targets/Cargo.toml +++ b/libafl_targets/Cargo.toml @@ -18,8 +18,10 @@ all-features = true [features] default = ["std", "sanitizers_flags"] std = ["libafl/std"] -libfuzzer = [] +libfuzzer = ["std", "sanitizer_interfaces"] libfuzzer_no_link_main = ["libfuzzer"] +libfuzzer_define_run_driver = ["libfuzzer"] +libfuzzer_oom = ["libfuzzer"] sanitizers_flags = [] pointer_maps = [] sancov_pcguard_edges = [] @@ -28,15 +30,18 @@ sancov_value_profile = [] sancov_8bit = [] sancov_cmplog = [] sancov_pcguard = ["sancov_pcguard_hitcounts"] +sanitizer_interfaces = [] clippy = [] # Ignore compiler warnings during clippy observers = ["intervaltree", "ahash"] [build-dependencies] +bindgen = "0.64.0" cc = { version = "1.0", features = ["parallel"] } [dependencies] libafl = { path = "../libafl", version = "0.10.1", default-features = false, features = [] } libafl_bolts = { path = "../libafl_bolts", version = "0.10.1", default-features = false, features = [] } +libc = "0.2" log = "0.4.20" rangemap = "1.3" diff --git a/libafl_targets/build.rs b/libafl_targets/build.rs index b7583ad140..0cbce8e3bf 100644 --- a/libafl_targets/build.rs +++ b/libafl_targets/build.rs @@ -52,9 +52,6 @@ fn main() { println!("cargo:rerun-if-env-changed=LIBAFL_CMPLOG_MAP_H"); println!("cargo:rerun-if-env-changed=LIBAFL_ACCOUNTING_MAP_SIZE"); - //std::env::set_var("CC", "clang"); - //std::env::set_var("CXX", "clang++"); - #[cfg(any(feature = "sancov_value_profile", feature = "sancov_cmplog"))] { println!("cargo:rerun-if-changed=src/sancov_cmp.c"); @@ -89,6 +86,8 @@ fn main() { #[cfg(feature = "libfuzzer_no_link_main")] libfuzzer.define("FUZZER_NO_LINK_MAIN", "1"); + #[cfg(feature = "libfuzzer_define_run_driver")] + libfuzzer.define("FUZZER_DEFINE_RUN_DRIVER", "1"); libfuzzer.compile("libfuzzer"); } @@ -96,6 +95,23 @@ fn main() { println!("cargo:rerun-if-changed=src/common.h"); println!("cargo:rerun-if-changed=src/common.c"); + #[cfg(feature = "sanitizer_interfaces")] + { + println!("cargo:rerun-if-changed=src/sanitizer_interfaces.h"); + + let build = bindgen::builder() + .header("src/sanitizer_interfaces.h") + .use_core() + .generate_comments(true) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .expect("Couldn't generate the sanitizer headers!"); + + build + .write_to_file(Path::new(&out_dir).join("sanitizer_interfaces.rs")) + .expect("Couldn't write the sanitizer headers!"); + } + let mut common = cc::Build::new(); #[cfg(feature = "sanitizers_flags")] diff --git a/libafl_targets/src/cmplog.c b/libafl_targets/src/cmplog.c index eba3ad703b..893f7cc51a 100644 --- a/libafl_targets/src/cmplog.c +++ b/libafl_targets/src/cmplog.c @@ -6,72 +6,68 @@ #if defined(_WIN32) -#include + #include void *__libafl_asan_region_is_poisoned(void *beg, size_t size) { - (void)beg; (void)size; return NULL; - } -#pragma comment(linker, "/alternatename:__asan_region_is_poisoned=__libafl_asan_region_is_poisoned") + #pragma comment( \ + linker, \ + "/alternatename:__asan_region_is_poisoned=__libafl_asan_region_is_poisoned") #elif defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) -#include -#include -#include + #include + #include + #include static int dummy_fd[2] = {2, 2}; static int dymmy_initialized = 0; -__attribute__((weak)) void *__asan_region_is_poisoned(void *beg, size_t size) { - +__attribute__((weak)) void *__asan_region_is_poisoned(const void *beg, + size_t size) { (void)beg; (void)size; return NULL; - } #endif -CmpLogMap* libafl_cmplog_map_ptr = &libafl_cmplog_map; +CmpLogMap *libafl_cmplog_map_ptr = &libafl_cmplog_map; -void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, uint64_t arg1, uint64_t arg2) { +void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, + uint64_t arg1, uint64_t arg2) { + STATIC_ASSERT(sizeof(libafl_cmplog_map_ptr->vals.operands) == + sizeof(libafl_cmplog_map_ptr->vals.routines)); - STATIC_ASSERT(sizeof(libafl_cmplog_map_ptr->vals.operands) == sizeof(libafl_cmplog_map_ptr->vals.routines)); - __libafl_targets_cmplog(k, shape, arg1, arg2); - } // POSIX shenanigan to see if an area is mapped. // If it is mapped as X-only, we have a problem, so maybe we should add a check // to avoid to call it on .text addresses -static long area_is_valid(void *ptr, size_t len) { - - if (!ptr || __asan_region_is_poisoned(ptr, len)) {return 0;} +static long area_is_valid(const void *ptr, size_t len) { + if (!ptr || __asan_region_is_poisoned(ptr, len)) { return 0; } long valid_len; #if defined(_WIN32) - if (IsBadReadPtr(ptr, len)) {return 0;} + if (IsBadReadPtr(ptr, len)) { return 0; } valid_len = (long)len; #elif defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) if (!dymmy_initialized) { if ((dummy_fd[1] = open("/dev/null", O_WRONLY)) < 0) { - if (pipe(dummy_fd) < 0) { - dummy_fd[1] = 1; - } + if (pipe(dummy_fd) < 0) { dummy_fd[1] = 1; } } dymmy_initialized = 1; } valid_len = syscall(SYS_write, dummy_fd[1], ptr, len); - - if (valid_len <= 0 || valid_len > (long)len) {return 0;} + + if (valid_len <= 0 || valid_len > (long)len) { return 0; } #endif // even if the write succeed this can be a false positive if we cross @@ -79,9 +75,9 @@ static long area_is_valid(void *ptr, size_t len) { char *p = (char *)ptr; #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) - long page_size = sysconf(_SC_PAGE_SIZE); + long page_size = sysconf(_SC_PAGE_SIZE); #else - long page_size = 4096; // Yolo + long page_size = 4096; // Yolo #endif char *page = (char *)((uintptr_t)p & ~(page_size - 1)) + page_size; @@ -94,20 +90,11 @@ static long area_is_valid(void *ptr, size_t len) { // or not, neither by SYS_write nor msync() :-( return (long)(page - p); } - } -void __libafl_targets_cmplog_routines(uintptr_t k, uint8_t *ptr1, uint8_t *ptr2) { - - if (!libafl_cmplog_enabled) { return; } - - int l1, l2; - if ((l1 = area_is_valid(ptr1, CMPLOG_RTN_LEN)) <= 0 || - (l2 = area_is_valid(ptr2, CMPLOG_RTN_LEN)) <= 0) { - return; - } - int len = MIN(l1, l2); - +void __libafl_targets_cmplog_routines_checked(uintptr_t k, const uint8_t *ptr1, + const uint8_t *ptr2, size_t len) { + libafl_cmplog_enabled = false; uint32_t hits; if (libafl_cmplog_map_ptr->headers[k].kind != CMPLOG_KIND_RTN) { @@ -125,23 +112,46 @@ void __libafl_targets_cmplog_routines(uintptr_t k, uint8_t *ptr1, uint8_t *ptr2) hits &= CMPLOG_MAP_RTN_H - 1; MEMCPY(libafl_cmplog_map_ptr->vals.routines[k][hits].v0, ptr1, len); MEMCPY(libafl_cmplog_map_ptr->vals.routines[k][hits].v1, ptr2, len); + libafl_cmplog_enabled = true; +} + +void __libafl_targets_cmplog_routines(uintptr_t k, const uint8_t *ptr1, + const uint8_t *ptr2) { + if (!libafl_cmplog_enabled) { return; } + + int l1, l2; + if ((l1 = area_is_valid(ptr1, CMPLOG_RTN_LEN)) <= 0 || + (l2 = area_is_valid(ptr2, CMPLOG_RTN_LEN)) <= 0) { + return; + } + int len = MIN(l1, l2); + __libafl_targets_cmplog_routines_checked(k, ptr1, ptr2, len); } -void __cmplog_rtn_hook(uint8_t *ptr1, uint8_t *ptr2) { +void __libafl_targets_cmplog_routines_len(uintptr_t k, const uint8_t *ptr1, + const uint8_t *ptr2, size_t len) { + if (!libafl_cmplog_enabled) { return; } + + if (area_is_valid(ptr1, CMPLOG_RTN_LEN) <= 0 || + area_is_valid(ptr2, CMPLOG_RTN_LEN) <= 0) { + return; + } + + __libafl_targets_cmplog_routines_checked(k, ptr1, ptr2, len); +} +static inline void __cmplog_rtn_hook(const uint8_t *ptr1, const uint8_t *ptr2) { uintptr_t k = RETADDR; k = (k >> 4) ^ (k << 8); k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog_routines(k, ptr1, ptr2); - } // gcc libstdc++ // _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc -static uint8_t *get_gcc_stdstring(uint8_t *string) { - +static const uint8_t *get_gcc_stdstring(const uint8_t *string) { uint32_t *len = (uint32_t *)(string + 8); if (*len < 16) { // in structure @@ -150,13 +160,11 @@ static uint8_t *get_gcc_stdstring(uint8_t *string) { uint8_t **ptr = (uint8_t **)string; return (*ptr); } - } // llvm libc++ _ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocator // IcEEE7compareEmmPKcm -static uint8_t *get_llvm_stdstring(uint8_t *string) { - +static const uint8_t *get_llvm_stdstring(const uint8_t *string) { // length is in: if ((string[0] & 1) == 0) {uint8_t len = (string[0] >> 1);} // or: if (string[0] & 1) {uint32_t *len = (uint32_t *) (string + 8);} @@ -166,52 +174,44 @@ static uint8_t *get_llvm_stdstring(uint8_t *string) { } else { // in structure return (string + 1); } - } -void __cmplog_rtn_gcc_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { - - if (!libafl_cmplog_enabled) {return;} - if (area_is_valid(stdstring, 32) <= 0) { - return; - } +void __cmplog_rtn_gcc_stdstring_cstring(const uint8_t *stdstring, + const uint8_t *cstring) { + if (!libafl_cmplog_enabled) { return; } + if (area_is_valid(stdstring, 32) <= 0) { return; } __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring); - } -void __cmplog_rtn_gcc_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { - - if (!libafl_cmplog_enabled) {return;} - if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0) { +void __cmplog_rtn_gcc_stdstring_stdstring(const uint8_t *stdstring1, + const uint8_t *stdstring2) { + if (!libafl_cmplog_enabled) { return; } + if (area_is_valid(stdstring1, 32) <= 0 || + area_is_valid(stdstring2, 32) <= 0) { return; - } + } __cmplog_rtn_hook(get_gcc_stdstring(stdstring1), get_gcc_stdstring(stdstring2)); - } -void __cmplog_rtn_llvm_stdstring_cstring(uint8_t *stdstring, uint8_t *cstring) { - - if (!libafl_cmplog_enabled) {return;} - if (area_is_valid(stdstring, 32) <= 0){ - return; - } +void __cmplog_rtn_llvm_stdstring_cstring(const uint8_t *stdstring, + const uint8_t *cstring) { + if (!libafl_cmplog_enabled) { return; } + if (area_is_valid(stdstring, 32) <= 0) { return; } __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring); - } -void __cmplog_rtn_llvm_stdstring_stdstring(uint8_t *stdstring1, uint8_t *stdstring2) { - - if (!libafl_cmplog_enabled) {return;} - if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0) { +void __cmplog_rtn_llvm_stdstring_stdstring(const uint8_t *stdstring1, + const uint8_t *stdstring2) { + if (!libafl_cmplog_enabled) { return; } + if (area_is_valid(stdstring1, 32) <= 0 || + area_is_valid(stdstring2, 32) <= 0) { return; } __cmplog_rtn_hook(get_llvm_stdstring(stdstring1), get_llvm_stdstring(stdstring2)); - } - diff --git a/libafl_targets/src/cmplog.h b/libafl_targets/src/cmplog.h index cfc7935bc7..d8cbcb1b88 100644 --- a/libafl_targets/src/cmplog.h +++ b/libafl_targets/src/cmplog.h @@ -2,6 +2,7 @@ #define __LIBAFL_TARGETS_CMPLOG__ #include "common.h" +#include #ifndef CMPLOG_MAP_W #define CMPLOG_MAP_W 65536 @@ -50,12 +51,16 @@ extern uint8_t libafl_cmplog_enabled; void __libafl_targets_cmplog_instructions(uintptr_t k, uint8_t shape, uint64_t arg1, uint64_t arg2); -void __libafl_targets_cmplog_routines(uintptr_t k, uint8_t *ptr1, - uint8_t *ptr2); +void __libafl_targets_cmplog_routines(uintptr_t k, const uint8_t *ptr1, + const uint8_t *ptr2); + +void __libafl_targets_cmplog_routines_len(uintptr_t k, const uint8_t *ptr1, + const uint8_t *ptr2, size_t len); static inline void __libafl_targets_cmplog(uintptr_t k, uint8_t shape, uint64_t arg1, uint64_t arg2) { if (!libafl_cmplog_enabled) { return; } + libafl_cmplog_enabled = false; uint16_t hits; if (libafl_cmplog_map_ptr->headers[k].kind != CMPLOG_KIND_INS) { @@ -73,6 +78,7 @@ static inline void __libafl_targets_cmplog(uintptr_t k, uint8_t shape, hits &= CMPLOG_MAP_H - 1; libafl_cmplog_map_ptr->vals.operands[k][hits].v0 = arg1; libafl_cmplog_map_ptr->vals.operands[k][hits].v1 = arg2; + libafl_cmplog_enabled = true; } #endif diff --git a/libafl_targets/src/lib.rs b/libafl_targets/src/lib.rs index 5c619f0f94..f90507256c 100644 --- a/libafl_targets/src/lib.rs +++ b/libafl_targets/src/lib.rs @@ -79,6 +79,20 @@ pub mod sancov_cmp; #[cfg(any(feature = "sancov_cmplog", feature = "sancov_value_profile"))] pub use sancov_cmp::*; +/// Module containing bindings to the various sanitizer interface headers +#[cfg(feature = "sanitizer_interfaces")] +pub mod sanitizer_ifaces { + #![allow(non_snake_case)] + #![allow(non_camel_case_types)] + #![allow(non_upper_case_globals)] + #![allow(unused)] + #![allow(improper_ctypes)] + #![allow(clippy::unreadable_literal)] + #![allow(missing_docs)] + #![allow(missing_debug_implementations)] + include!(concat!(env!("OUT_DIR"), "/sanitizer_interfaces.rs")); +} + #[cfg(feature = "libfuzzer")] pub mod libfuzzer; #[cfg(feature = "libfuzzer")] diff --git a/libafl_targets/src/libfuzzer.c b/libafl_targets/src/libfuzzer.c index 42c7a8cd4f..72a18ae965 100644 --- a/libafl_targets/src/libfuzzer.c +++ b/libafl_targets/src/libfuzzer.c @@ -1,41 +1,65 @@ #include "common.h" #include +#ifdef __APPLE__ + #include +#else + #include +#endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" EXT_FUNC(LLVMFuzzerInitialize, int, (int *argc, char ***argv), false); EXT_FUNC(LLVMFuzzerCustomMutator, size_t, - (uint8_t *Data, size_t Size, size_t MaxSize, unsigned int Seed), + (uint8_t * Data, size_t Size, size_t MaxSize, unsigned int Seed), false); EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t, - (const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize, unsigned int Seed), + (const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize, unsigned int Seed), false); -EXT_FUNC_IMPL(LLVMFuzzerTestOneInput, int, (uint8_t *Data, size_t Size), false) { +EXT_FUNC_IMPL(LLVMFuzzerTestOneInput, int, (const uint8_t *Data, size_t Size), + false) { return 0; } -EXT_FUNC_IMPL(libafl_main, void, (void), false) { -} +EXT_FUNC(libafl_main, void, (void), false); +#ifdef FUZZER_DEFINE_RUN_DRIVER +extern int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, size_t Size)); +#endif #ifndef FUZZER_NO_LINK_MAIN -EXT_FUNC_IMPL(main, int, (int argc, char** argv), false) { - libafl_main(); +EXT_FUNC_IMPL(main, int, (int argc, char **argv), false) { + if (CHECK_WEAK_FN(libafl_main)) { + libafl_main(); + return 0; + } +#ifdef FUZZER_DEFINE_RUN_DRIVER + return LLVMFuzzerRunDriver(&argc, &argv, &LLVMFuzzerTestOneInput); +#else return 0; +#endif } -#if defined(_WIN32) -// If we do not add the main, the MSVC linker fails with: + #if defined(_WIN32) +// If we do not add the main, the MSVC linker fails with: // LINK : fatal error LNK1561: entry point must be defined -int main(int argc, char** argv) { - libafl_main(); +int main(int argc, char **argv) { + if (CHECK_WEAK_FN(libafl_main)) { + libafl_main(); + return 0; + } + return LLVMFuzzerRunDriver(&argc, &argv, &LLVMFuzzerTestOneInput); } -#endif + #endif #endif #pragma GCC diagnostic pop +// take a page out of libfuzzer's book: static define __sancov_lowest_stack +// since we don't support it yet +// TODO support it +MAYBE_THREAD_LOCAL uintptr_t __sancov_lowest_stack; + EXPORT_FN int libafl_targets_has_libfuzzer_init() { return CHECK_WEAK_FN(LLVMFuzzerInitialize); } @@ -44,6 +68,42 @@ EXPORT_FN int libafl_targets_libfuzzer_init(int *argc, char ***argv) { if (libafl_targets_has_libfuzzer_init()) { return LLVMFuzzerInitialize(argc, argv); } else { - return 0; + return 0; } } + +EXPORT_FN int libafl_targets_has_libfuzzer_custom_mutator() { + return CHECK_WEAK_FN(LLVMFuzzerCustomMutator); +} + +// trust the user to check this appropriately :) +EXPORT_FN size_t libafl_targets_libfuzzer_custom_mutator(uint8_t *Data, + size_t Size, + size_t MaxSize, + unsigned int Seed) { + return LLVMFuzzerCustomMutator(Data, Size, MaxSize, Seed); +} + +EXPORT_FN int libafl_targets_has_libfuzzer_custom_crossover() { + return CHECK_WEAK_FN(LLVMFuzzerCustomCrossOver); +} + +// trust the user to check this appropriately :) +EXPORT_FN size_t libafl_targets_libfuzzer_custom_crossover( + const uint8_t *Data1, size_t Size1, const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize, unsigned int Seed) { + return LLVMFuzzerCustomCrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize, + Seed); +} + +EXPORT_FN size_t libafl_check_malloc_size(void *ptr) { +#if defined(__APPLE__) + return malloc_size(ptr); +#elif defined(__GNUC__) + return malloc_usable_size(ptr); +#elif defined(_WIN32) + return _msize(ptr); +#else + return 0; +#endif +} \ No newline at end of file diff --git a/libafl_targets/src/libfuzzer.rs b/libafl_targets/src/libfuzzer/mod.rs similarity index 88% rename from libafl_targets/src/libfuzzer.rs rename to libafl_targets/src/libfuzzer/mod.rs index db86929043..234e59f42c 100644 --- a/libafl_targets/src/libfuzzer.rs +++ b/libafl_targets/src/libfuzzer/mod.rs @@ -4,11 +4,17 @@ use alloc::{string::String, vec::Vec}; +mod mutators; +pub use mutators::*; + +mod observers; +pub use observers::*; + extern "C" { - /// int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) + // int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) fn LLVMFuzzerTestOneInput(data: *const u8, size: usize) -> i32; - // libafl_targets_libfuzzer_init calls LLVMFUzzerInitialize() + // libafl_targets_libfuzzer_init calls LLVMFuzzerInitialize() fn libafl_targets_libfuzzer_init(argc: *const i32, argv: *const *const *const u8) -> i32; } diff --git a/libafl_targets/src/libfuzzer/mutators.rs b/libafl_targets/src/libfuzzer/mutators.rs new file mode 100644 index 0000000000..ed2f81d772 --- /dev/null +++ b/libafl_targets/src/libfuzzer/mutators.rs @@ -0,0 +1,457 @@ +use alloc::rc::{Rc, Weak}; +use std::{ + cell::RefCell, + marker::PhantomData, + ops::Deref, + prelude::rust_2015::{Box, Vec}, +}; + +use libafl::{ + corpus::Corpus, + inputs::{BytesInput, HasBytesVec, UsesInput}, + mutators::{ + ComposedByMutations, MutationId, MutationResult, Mutator, MutatorsTuple, ScheduledMutator, + }, + random_corpus_id, + state::{HasCorpus, HasMaxSize, HasRand}, + Error, +}; +use libafl_bolts::{rands::Rand, AsSlice, Named}; + +extern "C" { + fn libafl_targets_has_libfuzzer_custom_mutator() -> bool; + fn libafl_targets_libfuzzer_custom_mutator( + data: *mut u8, + size: usize, + max_size: usize, + seed: u32, + ) -> usize; + + fn libafl_targets_has_libfuzzer_custom_crossover() -> bool; + fn libafl_targets_libfuzzer_custom_crossover( + data1: *const u8, + size1: usize, + data2: *const u8, + size2: usize, + out: *mut u8, + max_out_size: usize, + seed: u32, + ) -> usize; +} + +/// Detect the presence of a user-defined custom mutator +#[must_use] +pub fn has_custom_mutator() -> bool { + unsafe { libafl_targets_has_libfuzzer_custom_mutator() } +} + +/// Detect the presence of a user-defined custom crossover +#[must_use] +pub fn has_custom_crossover() -> bool { + unsafe { libafl_targets_has_libfuzzer_custom_crossover() } +} + +/// Erased mutator for dynamic mutator access by the custom mutator/crossover +trait ErasedLLVMFuzzerMutator { + /// Perform mutation on the desired buffer + fn mutate(&self, data: *mut u8, size: usize, max_size: usize) -> usize; +} + +thread_local! { + /// The globally accessible mutator reference, if available + static MUTATOR: RefCell>> = RefCell::new(None); +} + +/// Mutator which is available for user-defined mutator/crossover +/// See: [Structure-Aware Fuzzing with libFuzzer](https://github.com/google/fuzzing/blob/master/docs/structure-aware-fuzzing.md) +#[allow(non_snake_case)] +#[no_mangle] +pub extern "C" fn LLVMFuzzerMutate(data: *mut u8, size: usize, max_size: usize) -> usize { + MUTATOR.with(|mutator| { + if let Ok(mut mutator) = mutator.try_borrow_mut() { + if let Some(mutator) = &mut *mutator { + return mutator.mutate(data, size, max_size); + } + } + unreachable!("Couldn't get mutator!"); + }) +} + +/// A proxy which wraps a targeted mutator. This is used to provide dynamic access to a global +/// mutator without knowing the concrete type, which is necessary for custom mutators. +struct MutatorProxy<'a, M, MT, S> { + /// Pointer to the state of the fuzzer + state: Rc>, // refcell to prevent double-mutability over the pointer + /// A weak reference to the mutator to provide to the custom mutator + mutator: Weak>, + /// The result of mutation, to be propagated to the mutational stage + result: Rc>>, + /// Stage index, which is used by libafl mutator implementations + stage_idx: i32, + phantom: PhantomData<(&'a mut (), MT)>, +} + +impl<'a, M, MT, S> MutatorProxy<'a, M, MT, S> { + /// Crate a new mutator proxy for the given state and mutator + fn new( + state: &'a mut S, + mutator: &Rc>, + result: &Rc>>, + stage_idx: i32, + ) -> Self { + Self { + state: Rc::new(RefCell::new(state)), + mutator: Rc::downgrade(mutator), + result: result.clone(), + stage_idx, + phantom: PhantomData, + } + } + + /// Create a weak version of the proxy, which will become unusable when the custom mutator + /// is no longer permitted to be executed. + fn weak( + &self, + ) -> WeakMutatorProxy FnMut(&'b mut S)) -> bool, M, MT, S> { + let state = Rc::downgrade(&self.state); + WeakMutatorProxy { + accessor: move |f: &mut dyn for<'b> FnMut(&'b mut S)| { + if let Some(state) = state.upgrade() { + if let Ok(state) = state.try_borrow_mut() { + let state_ref = unsafe { state.as_mut().unwrap_unchecked() }; + f(state_ref); + return true; + } + } + false + }, + mutator: self.mutator.clone(), + stage_idx: self.stage_idx, + result: self.result.clone(), + phantom: PhantomData, + } + } +} + +/// A weak proxy to the mutators. In order to preserve Rust memory model semantics, we must ensure +/// that once a libafl mutator exits scope (e.g., once the mutational stage is over) that the +/// mutator is no longer accessible by the custom mutator. +#[derive(Clone)] +struct WeakMutatorProxy { + /// Function which will perform the access to the state. + accessor: F, + /// A weak reference to the mutator + mutator: Weak>, + /// The stage index to provide to the mutator, when executed. + stage_idx: i32, + /// The result of mutation, to be propagated to the mutational stage + result: Rc>>, + phantom: PhantomData<(MT, S)>, +} + +impl ErasedLLVMFuzzerMutator for WeakMutatorProxy +where + F: Fn(&mut dyn for<'b> FnMut(&'b mut S)) -> bool, + M: ScheduledMutator, + MT: MutatorsTuple, + S: HasMaxSize + UsesInput, +{ + fn mutate(&self, data: *mut u8, size: usize, max_size: usize) -> usize { + let mut new_size = 0; // if access fails, the new len is zero + (self.accessor)(&mut |state| { + if let Some(mutator) = self.mutator.upgrade() { + if let Ok(mut mutator) = mutator.try_borrow_mut() { + let mut intermediary = + BytesInput::from(unsafe { core::slice::from_raw_parts(data, size) }); + let old = state.max_size(); + state.set_max_size(max_size); + let res = mutator.scheduled_mutate(state, &mut intermediary, self.stage_idx); + state.set_max_size(old); + let succeeded = res.is_ok(); + + let mut result = self.result.deref().borrow_mut(); + *result = res; + drop(result); + + if succeeded { + let target = intermediary.bytes(); + if target.as_slice().len() > max_size { + self.result + .replace(Err(Error::illegal_state("Mutation result was too long!"))) + .ok(); + } else { + let actual = unsafe { core::slice::from_raw_parts_mut(data, max_size) }; + actual[..target.as_slice().len()].copy_from_slice(target.as_slice()); + new_size = target.as_slice().len(); + } + }; + return; + } + } + self.result + .replace(Err(Error::illegal_state( + "Couldn't borrow mutator while mutating!", + ))) + .ok(); + }); + new_size + } +} + +/// A mutator which invokes a libFuzzer-like custom mutator or crossover. The `CROSSOVER` constant +/// controls whether this mutator invokes `LLVMFuzzerCustomMutate` and `LLVMFuzzerCustomCrossover`. +/// You should avoid using crossover-like mutators with custom mutators as this may lead to the +/// injection of some input portions to another in ways which violate structure. +#[derive(Debug)] +pub struct LLVMCustomMutator { + mutator: Rc>, + phantom: PhantomData, +} + +impl LLVMCustomMutator { + /// Create the mutator which will invoke the custom mutator, emitting an error if the custom mutator is not present + /// + /// # Safety + /// Will create the specified libfuzzer custom mutator `mutate` fn. + /// Only safe if the custom mutator implementation is correct. + pub unsafe fn mutate(mutator: SM) -> Result { + if libafl_targets_has_libfuzzer_custom_mutator() { + Ok(Self::mutate_unchecked(mutator)) + } else { + Err(Error::illegal_state( + "Cowardly refusing to create a LLVMFuzzerMutator if a custom mutator is not defined.", + )) + } + } + + /// Create the mutator which will invoke the custom mutator without checking if it exists first + /// + /// # Safety + /// Will create the specified libfuzzer custom mutator and not check if it exists. + /// Only safe if the custom mutator implementation is correct and exists. + pub unsafe fn mutate_unchecked(mutator: SM) -> Self { + LLVMCustomMutator { + mutator: Rc::new(RefCell::new(mutator)), + phantom: PhantomData, + } + } +} + +impl LLVMCustomMutator { + /// Create the mutator which will invoke the custom crossover, emitting an error if the custom crossover is not present + /// + /// # Safety + /// Will create the specified libfuzzer custom crossover mutator. + /// Only safe if the custom mutator crossover implementation is correct. + pub unsafe fn crossover(mutator: SM) -> Result { + if libafl_targets_has_libfuzzer_custom_crossover() { + Ok(Self::crossover_unchecked(mutator)) + } else { + Err(Error::illegal_state( + "Cowardly refusing to create a LLVMFuzzerMutator if a custom crossover is not defined.", + )) + } + } + + /// Create the mutator which will invoke the custom crossover without checking if it exists first + /// + /// # Safety + /// Will create the specified libfuzzer custom mutator crossover and not check if it exists. + /// Only safe if the custom mutator crossover implementation is correct and exists. + pub unsafe fn crossover_unchecked(mutator: SM) -> Self { + LLVMCustomMutator { + mutator: Rc::new(RefCell::new(mutator)), + phantom: PhantomData, + } + } +} + +impl ComposedByMutations + for LLVMCustomMutator +where + MT: MutatorsTuple, + S: UsesInput + HasRand + HasMaxSize, + SM: ScheduledMutator, +{ + fn mutations(&self) -> &MT { + unimplemented!("It is unsafe to provide reference-based access to the mutators as they are behind a RefCell.") + } + + fn mutations_mut(&mut self) -> &mut MT { + unimplemented!("It is unsafe to provide reference-based access to the mutators as they are behind a RefCell.") + } +} + +impl Named for LLVMCustomMutator { + fn name(&self) -> &str { + "LLVMCustomMutator" + } +} + +impl Mutator for LLVMCustomMutator +where + MT: MutatorsTuple + 'static, + S: UsesInput + HasRand + HasMaxSize + 'static, + SM: ScheduledMutator + 'static, +{ + #[inline] + fn mutate( + &mut self, + state: &mut S, + input: &mut S::Input, + stage_idx: i32, + ) -> Result { + self.scheduled_mutate(state, input, stage_idx) + } +} + +impl ScheduledMutator for LLVMCustomMutator +where + SM: ScheduledMutator + 'static, + MT: MutatorsTuple + 'static, + S: UsesInput + HasRand + HasMaxSize + 'static, +{ + fn iterations(&self, state: &mut S, input: &S::Input) -> u64 { + let mutator = self.mutator.deref().borrow(); + mutator.iterations(state, input) + } + + fn schedule(&self, state: &mut S, input: &S::Input) -> MutationId { + let mutator = self.mutator.deref().borrow(); + mutator.schedule(state, input) + } + + fn scheduled_mutate( + &mut self, + state: &mut S, + input: &mut S::Input, + stage_idx: i32, + ) -> Result { + let seed = state.rand_mut().next(); + let target = input.bytes(); + let mut bytes = Vec::with_capacity(state.max_size()); + bytes.extend_from_slice(target.as_slice()); + bytes.resize(state.max_size(), 0); + + // we assume that the fuzzer did not use this mutator, but instead utilised their own + let result = Rc::new(RefCell::new(Ok(MutationResult::Mutated))); + let proxy = MutatorProxy::new(state, &self.mutator, &result, stage_idx); + let old = MUTATOR.with(|mutator| { + let mut mutator = mutator.borrow_mut(); + mutator.replace(Box::new(proxy.weak())) + }); + let new_size = unsafe { + libafl_targets_libfuzzer_custom_mutator( + bytes.as_mut_ptr(), + target.as_slice().len(), + bytes.len(), + seed as u32, + ) + }; + drop(proxy); + MUTATOR.with(|mutator| { + let mut mutator = mutator.borrow_mut(); + *mutator = old; + }); + if result.deref().borrow().is_err() { + return result.replace(Ok(MutationResult::Skipped)); + } + bytes.truncate(new_size); + core::mem::swap(input.bytes_mut(), &mut bytes); + Ok(MutationResult::Mutated) + } +} + +impl Named for LLVMCustomMutator { + fn name(&self) -> &str { + "LLVMCustomCrossover" + } +} + +impl Mutator for LLVMCustomMutator +where + MT: MutatorsTuple + 'static, + S: UsesInput + HasRand + HasMaxSize + HasCorpus + 'static, + SM: ScheduledMutator + 'static, +{ + #[inline] + fn mutate( + &mut self, + state: &mut S, + input: &mut S::Input, + stage_idx: i32, + ) -> Result { + self.scheduled_mutate(state, input, stage_idx) + } +} + +impl ScheduledMutator for LLVMCustomMutator +where + SM: ScheduledMutator + 'static, + MT: MutatorsTuple + 'static, + S: UsesInput + HasRand + HasMaxSize + HasCorpus + 'static, +{ + fn iterations(&self, state: &mut S, input: &S::Input) -> u64 { + let mutator = self.mutator.deref().borrow(); + mutator.iterations(state, input) + } + + fn schedule(&self, state: &mut S, input: &S::Input) -> MutationId { + let mutator = self.mutator.deref().borrow(); + mutator.schedule(state, input) + } + + fn scheduled_mutate( + &mut self, + state: &mut S, + input: &mut S::Input, + stage_idx: i32, + ) -> Result { + // We don't want to use the testcase we're already using for splicing + let idx = random_corpus_id!(state.corpus(), state.rand_mut()); + if let Some(cur) = state.corpus().current() { + if idx == *cur { + return Ok(MutationResult::Skipped); + } + } + + let mut other_testcase = state.corpus().get(idx)?.borrow_mut(); + let other = other_testcase.load_input(state.corpus())?; + let data2 = Vec::from(other.bytes()); + drop(other_testcase); + + let seed = state.rand_mut().next(); + let mut out = vec![0u8; state.max_size()]; + let data1 = input.bytes(); + + // we assume that the fuzzer did not use this mutator, but instead utilised their own + let result = Rc::new(RefCell::new(Ok(MutationResult::Mutated))); + let proxy = MutatorProxy::new(state, &self.mutator, &result, stage_idx); + let old = MUTATOR.with(|mutator| { + let mut mutator = mutator.borrow_mut(); + mutator.replace(Box::new(proxy.weak())) + }); + let new_size = unsafe { + libafl_targets_libfuzzer_custom_crossover( + data1.as_ptr(), + data1.len(), + data2.as_ptr(), + data2.len(), + out.as_mut_ptr(), + out.len(), + seed as u32, + ) + }; + drop(proxy); + MUTATOR.with(|mutator| { + let mut mutator = mutator.borrow_mut(); + *mutator = old; + }); + if result.deref().borrow().is_err() { + return result.replace(Ok(MutationResult::Skipped)); + } + out.truncate(new_size); + core::mem::swap(input.bytes_mut(), &mut out); + Ok(MutationResult::Mutated) + } +} diff --git a/libafl_targets/src/libfuzzer/observers/mod.rs b/libafl_targets/src/libfuzzer/observers/mod.rs new file mode 100644 index 0000000000..b6f32e6839 --- /dev/null +++ b/libafl_targets/src/libfuzzer/observers/mod.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "libfuzzer_oom")] +mod oom; +#[cfg(feature = "libfuzzer_oom")] +pub use oom::*; diff --git a/libafl_targets/src/libfuzzer/observers/oom.rs b/libafl_targets/src/libfuzzer/observers/oom.rs new file mode 100644 index 0000000000..6f8af739fd --- /dev/null +++ b/libafl_targets/src/libfuzzer/observers/oom.rs @@ -0,0 +1,168 @@ +use core::{ffi::c_void, fmt::Debug}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +use libafl::{ + events::EventFirer, + executors::ExitKind, + feedbacks::Feedback, + inputs::UsesInput, + observers::{Observer, ObserversTuple}, + state::HasClientPerfMonitor, + Error, +}; +use libafl_bolts::Named; +use libc::SIGABRT; +use serde::{Deserialize, Serialize}; + +extern "C" { + fn libafl_check_malloc_size(ptr: *const c_void) -> usize; +} + +static RUNNING: AtomicBool = AtomicBool::new(false); +static OOMED: AtomicBool = AtomicBool::new(false); +static RSS_MAX: AtomicUsize = AtomicUsize::new(2 << 30); +// 2GB, which is the default +static MALLOC_MAX: AtomicUsize = AtomicUsize::new(2 << 30); + +static MALLOC_SIZE: AtomicUsize = AtomicUsize::new(0); + +/// malloc hook which will be invoked if address sanitizer is present. Used to detect if the target makes a malloc call +/// that will exceed the permissible size +/// +/// # Safety +/// Is only safe to call with valid freshly allocated pointers backed by allocations of `size`. +#[no_mangle] +pub unsafe extern "C" fn __sanitizer_malloc_hook(ptr: *const c_void, size: usize) { + if RUNNING.load(Ordering::Relaxed) { + let size = match unsafe { libafl_check_malloc_size(ptr) } { + 0 => size, // either the malloc size function didn't work or it's really zero-sized + real => real, + }; + + let total = MALLOC_SIZE.fetch_add(size, Ordering::Relaxed) + size; + if (size > MALLOC_MAX.load(Ordering::Relaxed) || total > RSS_MAX.load(Ordering::Relaxed)) + && !OOMED.swap(true, Ordering::Relaxed) + { + unsafe { + // we need to kill the process in a way that immediately triggers the crash handler + libc::raise(SIGABRT); + } + } + } +} + +/// free hook which will be invoked if ASAN is present. Used to detect if the target makes a malloc call that will +/// exceed the permissible size +/// +/// # Safety +/// Is only safe to call with valid allocated pointers, about to be freed. +#[no_mangle] +pub unsafe extern "C" fn __sanitizer_free_hook(ptr: *const c_void) { + if RUNNING.load(Ordering::Relaxed) { + let size = unsafe { libafl_check_malloc_size(ptr) }; + MALLOC_SIZE + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |existing| { + Some(existing.saturating_sub(size)) + }) + .expect("must complete successfully"); + } +} + +const OOM_OBS_NAME: &str = "libfuzzer-like-oom"; + +/// Observer which detects if the target would run out of memory or otherwise violate the permissible usage of malloc +#[derive(Debug, Serialize, Deserialize)] +pub struct OomObserver { + oomed: bool, +} + +impl OomObserver { + /// Create a [`OomObserver`] with the provided `rss_max` (total heap size) and `malloc_max` (largest permissible malloc + /// allocation size) + pub fn new(rss_max: usize, malloc_max: usize) -> Self { + RSS_MAX.store(rss_max, Ordering::Relaxed); + MALLOC_MAX.store(malloc_max, Ordering::Relaxed); + Self { oomed: false } + } +} + +impl Named for OomObserver { + // strictly one name to prevent two from being registered + fn name(&self) -> &str { + OOM_OBS_NAME + } +} + +impl Observer for OomObserver +where + S: UsesInput, +{ + fn pre_exec(&mut self, _state: &mut S, _input: &S::Input) -> Result<(), Error> { + OOMED.store(false, Ordering::Relaxed); + // must reset for platforms which do not offer malloc tracking + MALLOC_SIZE.store(0, Ordering::Relaxed); + RUNNING.store(true, Ordering::Relaxed); + Ok(()) + } + + fn post_exec( + &mut self, + _state: &mut S, + _input: &S::Input, + _exit_kind: &ExitKind, + ) -> Result<(), Error> { + RUNNING.store(false, Ordering::Relaxed); + self.oomed = OOMED.load(Ordering::Relaxed); + Ok(()) + } + + fn pre_exec_child(&mut self, state: &mut S, input: &S::Input) -> Result<(), Error> { + self.pre_exec(state, input) + } + + fn post_exec_child( + &mut self, + state: &mut S, + input: &S::Input, + exit_kind: &ExitKind, + ) -> Result<(), Error> { + self.post_exec(state, input, exit_kind) + } +} + +/// Feedback for the similarly named [`OomObserver`] to detect if the target crashed due to an observed OOM +#[derive(Debug, Serialize, Deserialize, Copy, Clone, Default)] +pub struct OomFeedback; + +impl OomFeedback { + /// Whether the target OOM'd in the last execution + pub fn oomed() -> bool { + OOMED.load(Ordering::Relaxed) + } +} + +impl Named for OomFeedback { + fn name(&self) -> &str { + "oom" + } +} + +impl Feedback for OomFeedback +where + S: UsesInput + HasClientPerfMonitor, +{ + fn is_interesting( + &mut self, + _state: &mut S, + _manager: &mut EM, + _input: &S::Input, + _observers: &OT, + _exit_kind: &ExitKind, + ) -> Result + where + EM: EventFirer, + OT: ObserversTuple, + { + Ok(Self::oomed()) + } +} diff --git a/libafl_targets/src/sancov_8bit.rs b/libafl_targets/src/sancov_8bit.rs index 3bf623a932..74922ec2de 100644 --- a/libafl_targets/src/sancov_8bit.rs +++ b/libafl_targets/src/sancov_8bit.rs @@ -1,18 +1,51 @@ //! [`LLVM` `8-bi-counters`](https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards) runtime for `LibAFL`. use alloc::vec::Vec; -use libafl_bolts::ownedref::OwnedMutSlice; +use libafl_bolts::{ownedref::OwnedMutSlice, AsMutSlice, AsSlice}; /// A [`Vec`] of `8-bit-counters` maps for multiple modules. /// They are initialized by calling [`__sanitizer_cov_8bit_counters_init`]( pub static mut COUNTERS_MAPS: Vec> = Vec::new(); +/// Create more copies of the counters maps +/// +/// # Safety +/// You are responsible for ensuring there is no multi-mutability! +#[must_use] +pub unsafe fn extra_counters() -> Vec> { + COUNTERS_MAPS + .iter() + .map(|counters| { + OwnedMutSlice::from_raw_parts_mut( + counters.as_slice().as_ptr().cast_mut(), + counters.as_slice().len(), + ) + }) + .collect() +} + /// Initialize the sancov `8-bit-counters` - usually called by `llvm`. #[no_mangle] #[allow(clippy::cast_sign_loss)] #[allow(clippy::not_unsafe_ptr_arg_deref)] pub extern "C" fn __sanitizer_cov_8bit_counters_init(start: *mut u8, stop: *mut u8) { unsafe { + for existing in &mut COUNTERS_MAPS { + let range = existing.as_mut_slice().as_mut_ptr() + ..=existing + .as_mut_slice() + .as_mut_ptr() + .add(existing.as_slice().len()); + if range.contains(&start) || range.contains(&stop) { + // we have overlapping or touching ranges; merge them + let &start = range.start().min(&start); + let &stop = range.end().max(&stop); + *existing = + OwnedMutSlice::from_raw_parts_mut(start, stop.offset_from(start) as usize); + return; + } + } + // we didn't overlap; keep going COUNTERS_MAPS.push(OwnedMutSlice::from_raw_parts_mut( start, stop.offset_from(start) as usize, diff --git a/libafl_targets/src/sancov_cmp.c b/libafl_targets/src/sancov_cmp.c index a7def0a437..256447e811 100644 --- a/libafl_targets/src/sancov_cmp.c +++ b/libafl_targets/src/sancov_cmp.c @@ -1,15 +1,15 @@ #include "common.h" #ifdef SANCOV_VALUE_PROFILE -#include "value_profile.h" + #include "value_profile.h" #endif #ifdef SANCOV_CMPLOG -#include "cmplog.h" + #include "cmplog.h" + #include #endif void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) { - uintptr_t k = RETADDR; k = (k >> 4) ^ (k << 8); @@ -21,11 +21,9 @@ void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) { k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog(k, 1, (uint64_t)arg1, (uint64_t)arg2); #endif - } void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) { - uintptr_t k = RETADDR; k = (k >> 4) ^ (k << 8); @@ -37,11 +35,9 @@ void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) { k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog(k, 2, (uint64_t)arg1, (uint64_t)arg2); #endif - } void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) { - uintptr_t k = RETADDR; k = (k >> 4) ^ (k << 8); @@ -53,11 +49,9 @@ void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) { k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog(k, 4, (uint64_t)arg1, (uint64_t)arg2); #endif - } void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) { - uintptr_t k = RETADDR; k = (k >> 4) ^ (k << 8); @@ -69,33 +63,32 @@ void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) { k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog(k, 8, (uint64_t)arg1, (uint64_t)arg2); #endif - } void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { - uintptr_t rt = RETADDR; // if (!cases[1]) {return;} for (uint64_t i = 0; i < cases[0]; i++) { - uintptr_t k = rt + i; k = (k >> 4) ^ (k << 8); // val , cases[i + 2] #ifdef SANCOV_VALUE_PROFILE k &= CMP_MAP_SIZE - 1; switch (cases[1]) { - case 8: + case 8: __libafl_targets_value_profile1(k, (uint8_t)val, (uint8_t)cases[i + 2]); break; - case 16: - __libafl_targets_value_profile2(k, (uint16_t)val, (uint16_t)cases[i + 2]); + case 16: + __libafl_targets_value_profile2(k, (uint16_t)val, + (uint16_t)cases[i + 2]); break; - case 32: - __libafl_targets_value_profile4(k, (uint32_t)val, (uint32_t)cases[i + 2]); + case 32: + __libafl_targets_value_profile4(k, (uint32_t)val, + (uint32_t)cases[i + 2]); break; - default: + default: __libafl_targets_value_profile8(k, val, cases[i + 2]); break; } @@ -104,10 +97,7 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { k &= CMPLOG_MAP_W - 1; __libafl_targets_cmplog(k, cases[1] / 8, val, cases[i + 2]); #endif - - } - } void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) { @@ -123,5 +113,82 @@ void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2) { } void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) { - __sanitizer_cov_trace_cmp8(arg1, arg2); + __sanitizer_cov_trace_cmp8(arg1, arg2); } + +#ifdef SANCOV_CMPLOG + +void __sanitizer_weak_hook_memcmp(void *called_pc, const void *s1, + const void *s2, size_t n, int result) { + if (result != 0) { + uintptr_t k = (uintptr_t)called_pc; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + __libafl_targets_cmplog_routines_len(k, s1, s2, MIN(n, 32)); + } +} + +void __sanitizer_weak_hook_strncmp(void *called_pc, const char *s1, + const char *s2, size_t n, int result) { + if (result != 0) { + n = MIN(n, 32); + + uintptr_t k = (uintptr_t)called_pc; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + size_t actual_len; + for (actual_len = 0; actual_len < n; actual_len++) { + if (s1[actual_len] == 0 || s2[actual_len] == 0) { break; } + } + + __libafl_targets_cmplog_routines_len(k, (const uint8_t *) s1, (const uint8_t *) s2, actual_len); + } +} + +void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, + const char *s2, size_t n, int result) { + __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result); +} + +void __sanitizer_weak_hook_strcmp(void *called_pc, const char *s1, + const char *s2, int result) { + if (result != 0) { + uintptr_t k = (uintptr_t)called_pc; + k = (k >> 4) ^ (k << 8); + k &= CMPLOG_MAP_W - 1; + + size_t actual_len; + for (actual_len = 0; actual_len < 32; actual_len++) { + if (s1[actual_len] == 0 || s2[actual_len] == 0) { break; } + } + + __libafl_targets_cmplog_routines_len(k, (const uint8_t *) s1, (const uint8_t *) s2, actual_len); + } +} + +void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, + const char *s2, int result) { + __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result); +} + +// strstr, strcasestr, memmem unhandled + +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" + +void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end) { + // unused + // TODO implement +} + +void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { + // unused + // TODO implement +} + +#pragma GCC diagnostic pop diff --git a/libafl_targets/src/sanitizer_interfaces.h b/libafl_targets/src/sanitizer_interfaces.h new file mode 100644 index 0000000000..33747f063d --- /dev/null +++ b/libafl_targets/src/sanitizer_interfaces.h @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) + #include +#elif defined(__unix__) || !defined(__APPLE__) && defined(__MACH__) + #include + #if defined(BSD) + #include + #endif +#endif diff --git a/scripts/clippy.sh b/scripts/clippy.sh index 5c0b56eb39..5645909e06 100755 --- a/scripts/clippy.sh +++ b/scripts/clippy.sh @@ -2,6 +2,8 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" cd "$SCRIPT_DIR/.." || exit 1 +set -e + RUST_BACKTRACE=full cargo +nightly clippy --all --all-features --release --tests --examples --benches -- -Z macro-backtrace \ -D clippy::all \ -D clippy::pedantic \ @@ -15,4 +17,22 @@ RUST_BACKTRACE=full cargo +nightly clippy --all --all-features --release --tests -A clippy::missing-docs-in-private-items \ -A clippy::unseparated-literal-suffix \ -A clippy::module-name-repetitions \ - -A clippy::unreadable-literal \ + -A clippy::unreadable-literal + +if [[ "$OSTYPE" == "linux-gnu"* ]]; then + cd libafl_libfuzzer/libafl_libfuzzer_runtime + RUST_BACKTRACE=full cargo +nightly clippy --all --all-features --release --tests --examples --benches -- -Z macro-backtrace \ + -D clippy::all \ + -D clippy::pedantic \ + -W clippy::similar_names \ + -A clippy::type_repetition_in_bounds \ + -A clippy::missing-errors-doc \ + -A clippy::cast-possible-truncation \ + -A clippy::used-underscore-binding \ + -A clippy::ptr-as-ptr \ + -A clippy::missing-panics-doc \ + -A clippy::missing-docs-in-private-items \ + -A clippy::unseparated-literal-suffix \ + -A clippy::module-name-repetitions \ + -A clippy::unreadable-literal +fi