Skip to content

Commit

Permalink
chore(tests): heavy integration to use large dataset (#429)
Browse files Browse the repository at this point in the history
* chore(tests): heavy integration to use large dataset

* fix(cfg): `lazy_static` persists across calls (bad), and is not needed (#431)

* chore(tests): heavy integration tests to use large dataset
  • Loading branch information
Jon-Becker committed Jun 6, 2024
1 parent 773acc6 commit e8ef5b1
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 215 deletions.
4 changes: 4 additions & 0 deletions .config/nextest.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
[profile.default]
retries = { backoff = "exponential", count = 2, delay = "2s", jitter = true }

[[profile.default.overrides]]
filter = 'test(heavy_integration_test)'
retries = 0
75 changes: 41 additions & 34 deletions .github/workflows/heavy-integration.yml
Original file line number Diff line number Diff line change
@@ -1,45 +1,52 @@
name: heavy integration

on:
schedule:
# Runs at 10PM utc
- cron: "0 22 * * *"
workflow_dispatch:
schedule:
# Runs at 10PM utc
- cron: "0 22 * * *"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
CARGO_TERM_COLOR: always
CARGO_TERM_COLOR: always

jobs:
heavy-integration:
name: heavy (long-running) integration tests
runs-on: ubuntu-latest
timeout-minutes: 120
heavy-integration:
name: heavy (long-running) integration tests
runs-on: ubuntu-latest
timeout-minutes: 120

steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable
- uses: taiki-e/install-action@nextest
- name: Run Tests
run: |
cargo nextest r --no-fail-fast --release --nocapture -- --ignored
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable
- uses: taiki-e/install-action@nextest
- name: Fetch Dataset
run: |
# download from https://jbecker.dev/data/largest1k.tar.gz
wget https://jbecker.dev/data/largest1k.tar.gz
# If any of the jobs fail, this will create a high-priority issue to signal so.
issue:
name: Open an issue
runs-on: ubuntu-latest
needs: heavy-integration
if: ${{ failure() }}
steps:
- uses: actions/checkout@v4
- uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WORKFLOW_URL: |
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
with:
update_existing: true
filename: .github/INTEGRATION_FAILURE.md
# extract the dataset
tar -xvf largest1k.tar.gz
- name: Run Tests
run: |
cargo nextest r --no-fail-fast --release --nocapture -- --ignored
# If any of the jobs fail, this will create a high-priority issue to signal so.
issue:
name: Open an issue
runs-on: ubuntu-latest
needs: heavy-integration
if: ${{ failure() }}
steps:
- uses: actions/checkout@v4
- uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WORKFLOW_URL: |
${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
with:
update_existing: true
filename: .github/INTEGRATION_FAILURE.md
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ false/*

*.svg
*.sh

largest1k
65 changes: 7 additions & 58 deletions crates/cfg/src/core/graph.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
use std::{collections::HashMap, sync::Mutex};

use ethers::prelude::U256;
use eyre::{eyre, OptionExt, Result};
use eyre::{OptionExt, Result};
use heimdall_common::utils::strings::encode_hex_reduced;
use heimdall_vm::ext::exec::VMTrace;
use petgraph::{matrix_graph::NodeIndex, Graph};

use lazy_static::lazy_static;

lazy_static! {
static ref INSTRUCTION_NODE_MAP: Mutex<HashMap<u128, NodeIndex<u32>>> =
Mutex::new(HashMap::new());
static ref CONNECTING_EDGES: Mutex<Vec<String>> = Mutex::new(Vec::new());
}

/// convert a symbolic execution [`VMTrace`] into a [`Graph`] of blocks, illustrating the
/// control-flow graph found by the symbolic execution engine.
// TODO: should this be a trait for VMTrace to implement?
Expand Down Expand Up @@ -56,53 +46,12 @@ pub fn build_cfg(
cfg_node.push_str(&format!("{}\n", &assembly));
}

// check if the map already contains the current node
let mut instruction_node_map =
INSTRUCTION_NODE_MAP.lock().map_err(|_| eyre!("failed to lock instruction node map"))?;
let chunk_index = match vm_trace.operations.first() {
Some(operation) => operation.last_instruction.instruction,
None => 0,
};

match instruction_node_map.get(&chunk_index) {
Some(node_index) => {
// this node already exists, so we need to add an edge to it.
if let Some(parent_node) = parent_node {
// check if the edge already exists
let mut connecting_edges = CONNECTING_EDGES
.lock()
.map_err(|_| eyre!("failed to lock connecting edges"))?;
let edge = format!("{} -> {}", parent_node.index(), node_index.index());
if !connecting_edges.contains(&edge) {
contract_cfg.add_edge(parent_node, *node_index, jump_taken.to_string());
connecting_edges.push(edge);
}
drop(connecting_edges)
}
}
None => {
// this node does not exist, so we need to add it to the map and the graph
let node_index = contract_cfg.add_node(cfg_node);

if let Some(parent_node) = parent_node {
// check if the edge already exists
let mut connecting_edges = CONNECTING_EDGES
.lock()
.map_err(|_| eyre!("failed to lock connecting edges"))?;
let edge = format!("{} -> {}", parent_node.index(), node_index.index());
if !connecting_edges.contains(&edge) {
contract_cfg.add_edge(parent_node, node_index, jump_taken.to_string());
connecting_edges.push(edge);
}
drop(connecting_edges)
}

instruction_node_map.insert(chunk_index, node_index);
parent_node = Some(node_index);
}
};

drop(instruction_node_map);
// add the node to the graph
let node_index = contract_cfg.add_node(cfg_node);
if let Some(parent_node) = parent_node {
contract_cfg.update_edge(parent_node, node_index, jump_taken.to_string());
}
parent_node = Some(node_index);

// recurse into the children of the VMTrace map
for child in vm_trace.children.iter() {
Expand Down
71 changes: 70 additions & 1 deletion crates/core/tests/test_cfg.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#[cfg(test)]
mod integration_tests {
use heimdall_cfg::CFGArgs;
use std::path::PathBuf;

use heimdall_cfg::{cfg, CFGArgs, CFGArgsBuilder};
use heimdall_common::utils::io::file::delete_path;
use petgraph::dot::Dot;
use serde_json::Value;

#[tokio::test]
async fn test_cfg_simple() {
Expand Down Expand Up @@ -57,4 +61,69 @@ mod integration_tests {
assert!(output.contains(line))
}
}

#[tokio::test]
#[ignore]
async fn heavy_integration_test() {
let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.expect("no parent")
.parent()
.expect("no parent")
.to_owned();

// if the ./largest1k directory does not exist, download it from https://jbecker.dev/data/largest1k.tar.gz
let dataset_dir = root_dir.join("largest1k");
if !dataset_dir.exists() {
eprintln!("dataset not found in root, skipping test");
std::process::exit(0);
}

// list files in root_dir
let contracts = std::fs::read_dir(dataset_dir)
.expect("failed to read dataset directory")
.map(|res| {
// HashMap from filename (without extension) to bytecode (from serde_json::Value)
res.map(|e| {
let path = e.path();
let filename = path
.file_stem()
.expect("no file stem")
.to_str()
.expect("no file stem")
.to_owned();

// read contents as json and parse to serde_json::Value
let contents_json: Value = serde_json::from_str(
&std::fs::read_to_string(path).expect("failed to read file"),
)
.expect("failed to parse json");
let bytecode = contents_json["code"].as_str().expect("no bytecode").to_owned();

(filename, bytecode)
})
})
.collect::<Result<Vec<_>, std::io::Error>>()
.expect("failed to collect files");

for (contract_address, bytecode) in contracts {
println!("Generating CFG for contract {contract_address}");
let args = CFGArgsBuilder::new()
.target(bytecode)
.timeout(10000)
.output(String::from("./output/tests/cfg/integration"))
.build()
.expect("failed to build args");

let _ = cfg(args)
.await
.map_err(|e| {
eprintln!("failed to generate cfg for contract {contract_address}: {e}");
e
})
.expect("failed to generate cfg");
}

delete_path(&String::from("./output/tests/cfg/integration"));
}
}
16 changes: 5 additions & 11 deletions crates/core/tests/test_decode.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#[cfg(test)]
mod tests {
use heimdall_decoder::DecodeArgs;
mod integration_tests {
use heimdall_common::utils::{sync::blocking_await, threading::task_pool};
use heimdall_decoder::{DecodeArgs, DecodeArgsBuilder};
use serde_json::Value;

#[tokio::test]
async fn test_decode_transfer() {
Expand Down Expand Up @@ -31,17 +32,10 @@ mod tests {
};
let _ = heimdall_decoder::decode(args).await;
}
}

mod integration_tests {
use heimdall_common::utils::{sync::blocking_await, threading::task_pool};
use heimdall_decoder::DecodeArgsBuilder;
use serde_json::Value;

/// Thorough testing for decode across a large number of transactions.
#[test]
#[ignore]
fn heavy_test_decode_thorough() {
fn heavy_integration_test() {
let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| {
println!("RPC_URL not set, skipping test");
std::process::exit(0);
Expand Down
Loading

0 comments on commit e8ef5b1

Please sign in to comment.