Skip to content

Commit 3764b10

Browse files
DonggeLiufdt622DavidKorczynski
authored
LLM-based crash triaging (#375)
Experiment based on #221, #253, and #374. --------- Signed-off-by: David Korczynski <david@adalogics.com> Co-authored-by: fdt622 <malkiya622@gmail.com> Co-authored-by: DavidKorczynski <david@adalogics.com>
1 parent e8d18ba commit 3764b10

File tree

13 files changed

+502
-41
lines changed

13 files changed

+502
-41
lines changed

data_prep/introspector.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from experiment import benchmark as benchmarklib
3434
from experiment import oss_fuzz_checkout
3535

36-
T = TypeVar('T', str, list, dict) # Generic type.
36+
T = TypeVar('T', str, list, dict, int) # Generic type.
3737

3838
TIMEOUT = 45
3939
MAX_RETRY = 5
@@ -234,6 +234,15 @@ def query_introspector_function_source(project: str, func_sig: str) -> str:
234234
return _get_data(resp, 'source', '')
235235

236236

237+
def query_introspector_function_line(project: str, func_sig: str) -> list:
238+
"""Queries FuzzIntrospector API for source line of |func_sig|."""
239+
resp = _query_introspector(INTROSPECTOR_FUNCTION_SOURCE, {
240+
'project': project,
241+
'function_signature': func_sig
242+
})
243+
return [_get_data(resp, 'src_begin', 0), _get_data(resp, 'src_end', 0)]
244+
245+
237246
def query_introspector_source_code(project: str, filepath: str, begin_line: int,
238247
end_line: int) -> str:
239248
"""Queries FuzzIntrospector API for source code of a

experiment/builder_runner.py

Lines changed: 113 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import time
2626
import traceback
2727
import uuid
28+
from collections import defaultdict, namedtuple
2829
from typing import Any, Optional
2930

3031
from google.cloud import storage
@@ -34,6 +35,7 @@
3435
from experiment.fuzz_target_error import SemanticCheckResult
3536
from experiment.workdir import WorkDirs
3637
from llm_toolkit import code_fixer
38+
from llm_toolkit.crash_triager import TriageResult
3739
from llm_toolkit.models import DefaultModel
3840

3941
# The directory in the oss-fuzz image
@@ -57,6 +59,10 @@
5759

5860
EARLY_FUZZING_ROUND_THRESHOLD = 3
5961

62+
ParseResult = namedtuple(
63+
'ParseResult',
64+
['cov_pcs', 'total_pcs', 'crashes', 'crash_info', 'semantic_check_result'])
65+
6066

6167
@dataclasses.dataclass
6268
class BuildResult:
@@ -84,6 +90,8 @@ class RunResult:
8490
cov_pcs: int = 0
8591
total_pcs: int = 0
8692
crashes: bool = False
93+
crash_info: str = ''
94+
triage: str = TriageResult.NOT_APPLICABLE
8795
semantic_check: SemanticCheckResult = SemanticCheckResult(
8896
SemanticCheckResult.NOT_APPLICABLE)
8997

@@ -94,6 +102,11 @@ def dict(self):
94102
class BuilderRunner:
95103
"""Builder and runner."""
96104

105+
# Regex for extract function name.
106+
FUNC_NAME = re.compile(r'(?:^|\s|\b)([\w:]+::)*(\w+)(?:<[^>]*>)?(?=\(|$)')
107+
# Regex for extract line number,
108+
LINE_NUMBER = re.compile(r':(\d+):')
109+
97110
def __init__(self,
98111
benchmark: Benchmark,
99112
work_dirs: WorkDirs,
@@ -181,7 +194,7 @@ def _pre_build_check(self, target_path: str,
181194

182195
def _parse_stacks_from_libfuzzer_logs(self,
183196
lines: list[str]) -> list[list[str]]:
184-
"""Parse stack traces from libFuzzer logs."""
197+
"""Parses stack traces from libFuzzer logs."""
185198
# TODO (dongge): Use stack parsing from ClusterFuzz.
186199
# There can have over one thread stack in a log.
187200
stacks = []
@@ -209,6 +222,46 @@ def _parse_stacks_from_libfuzzer_logs(self,
209222

210223
return stacks
211224

225+
def _parse_func_from_stacks(self, project_name: str,
226+
stacks: list[list[str]]) -> dict:
227+
"""Parses project functions from stack traces."""
228+
func_info = defaultdict(set)
229+
230+
for stack in stacks:
231+
for line in stack:
232+
# Use 3 spaces to divide each line of crash info into four parts.
233+
# Only parse the fourth part, which includes the function name,
234+
# file path, and line number.
235+
parts = line.split(' ', 3)
236+
if len(parts) < 4:
237+
continue
238+
func_and_file_path = parts[3]
239+
if project_name not in func_and_file_path:
240+
continue
241+
func_name, _, file_path = func_and_file_path.partition(' /')
242+
if func_name == 'LLVMFuzzerTestOneInput':
243+
line_match = self.LINE_NUMBER.search(file_path)
244+
if line_match:
245+
line_number = int(line_match.group(1))
246+
func_info[func_name].add(line_number)
247+
else:
248+
logging.warning('Failed to parse line number from %s in project %s',
249+
func_name, project_name)
250+
break
251+
if project_name in file_path:
252+
func_match = self.FUNC_NAME.search(func_name)
253+
line_match = self.LINE_NUMBER.search(file_path)
254+
if func_match and line_match:
255+
func_name = func_match.group(2)
256+
line_number = int(line_match.group(1))
257+
func_info[func_name].add(line_number)
258+
else:
259+
logging.warning(
260+
'Failed to parse function name from %s in project %s',
261+
func_name, project_name)
262+
263+
return func_info
264+
212265
def _parse_fuzz_cov_info_from_libfuzzer_logs(
213266
self,
214267
lines: list[str]) -> tuple[Optional[int], Optional[int], Optional[int]]:
@@ -236,11 +289,10 @@ def _stack_func_is_of_testing_project(self, stack_frame: str) -> bool:
236289
LIBFUZZER_LOG_STACK_FRAME_LLVM2 not in stack_frame and
237290
LIBFUZZER_LOG_STACK_FRAME_CPP not in stack_frame)
238291

239-
def _parse_libfuzzer_logs(
240-
self,
241-
log_handle,
242-
check_cov_increase: bool = True
243-
) -> tuple[int, int, bool, SemanticCheckResult]:
292+
def _parse_libfuzzer_logs(self,
293+
log_handle,
294+
project_name: str,
295+
check_cov_increase: bool = True) -> ParseResult:
244296
"""Parses libFuzzer logs."""
245297
lines = None
246298
try:
@@ -251,7 +303,8 @@ def _parse_libfuzzer_logs(
251303
except MemoryError as e:
252304
# Some logs from abnormal fuzz targets are too large to be parsed.
253305
logging.error('%s is too large to parse: %s', log_handle.name, e)
254-
return 0, 0, False, SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP)
306+
return ParseResult(0, 0, False, '',
307+
SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
255308

256309
cov_pcs, total_pcs, crashes = 0, 0, False
257310

@@ -280,46 +333,59 @@ def _parse_libfuzzer_logs(
280333
if crashes:
281334
symptom = SemanticCheckResult.extract_symptom(fuzzlog)
282335
crash_stacks = self._parse_stacks_from_libfuzzer_logs(lines)
336+
crash_func = self._parse_func_from_stacks(project_name, crash_stacks)
337+
crash_info = SemanticCheckResult.extract_crash_info(fuzzlog)
283338

284339
# FP case 1: Common fuzz target errors.
285340
# Null-deref, normally indicating inadequate parameter initialization or
286341
# wrong function usage.
287342
if symptom == 'null-deref':
288-
return cov_pcs, total_pcs, True, SemanticCheckResult(
289-
SemanticCheckResult.NULL_DEREF, symptom, crash_stacks)
343+
return ParseResult(
344+
cov_pcs, total_pcs, True, crash_info,
345+
SemanticCheckResult(SemanticCheckResult.NULL_DEREF, symptom,
346+
crash_stacks, crash_func))
290347

291348
# Signal, normally indicating assertion failure due to inadequate
292349
# parameter initialization or wrong function usage.
293350
if symptom == 'signal':
294-
return cov_pcs, total_pcs, True, SemanticCheckResult(
295-
SemanticCheckResult.SIGNAL, symptom, crash_stacks)
351+
return ParseResult(
352+
cov_pcs, total_pcs, True, crash_info,
353+
SemanticCheckResult(SemanticCheckResult.SIGNAL, symptom,
354+
crash_stacks, crash_func))
296355

297356
# Exit, normally indicating the fuzz target exited in a controlled manner,
298357
# blocking its bug discovery.
299358
if symptom.endswith('fuzz target exited'):
300-
return cov_pcs, total_pcs, True, SemanticCheckResult(
301-
SemanticCheckResult.EXIT, symptom, crash_stacks)
359+
return ParseResult(
360+
cov_pcs, total_pcs, True, crash_info,
361+
SemanticCheckResult(SemanticCheckResult.EXIT, symptom, crash_stacks,
362+
crash_func))
302363

303364
# Fuzz target modified constants.
304365
if symptom.endswith('fuzz target overwrites its const input'):
305-
return cov_pcs, total_pcs, True, SemanticCheckResult(
306-
SemanticCheckResult.OVERWRITE_CONST, symptom, crash_stacks)
366+
return ParseResult(
367+
cov_pcs, total_pcs, True, crash_info,
368+
SemanticCheckResult(SemanticCheckResult.OVERWRITE_CONST, symptom,
369+
crash_stacks, crash_func))
307370

308371
# OOM, normally indicating malloc's parameter is too large, e.g., because
309372
# of using parameter `size`.
310373
# TODO(dongge): Refine this, 1) Merge this with the other oom case found
311374
# from reproducer name; 2) Capture the actual number in (malloc(\d+)).
312375
if 'out-of-memory' in symptom or 'out of memory' in symptom:
313-
return cov_pcs, total_pcs, True, SemanticCheckResult(
314-
SemanticCheckResult.FP_OOM, symptom, crash_stacks)
376+
return ParseResult(
377+
cov_pcs, total_pcs, True, crash_info,
378+
SemanticCheckResult(SemanticCheckResult.FP_OOM, symptom,
379+
crash_stacks, crash_func))
315380

316381
# FP case 2: fuzz target crashes at init or first few rounds.
317382
if lastround is None or lastround <= EARLY_FUZZING_ROUND_THRESHOLD:
318383
# No cov line has been identified or only INITED round has been passed.
319384
# This is very likely the false positive cases.
320-
return cov_pcs, total_pcs, True, \
321-
SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH,\
322-
symptom, crash_stacks)
385+
return ParseResult(
386+
cov_pcs, total_pcs, True, crash_info,
387+
SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH, symptom,
388+
crash_stacks, crash_func))
323389

324390
# FP case 3: 1st func of the 1st thread stack is in fuzz target.
325391
if len(crash_stacks) > 0:
@@ -328,21 +394,28 @@ def _parse_libfuzzer_logs(
328394
for stack_frame in first_stack[:1]:
329395
if self._stack_func_is_of_testing_project(stack_frame):
330396
if 'LLVMFuzzerTestOneInput' in stack_frame:
331-
return cov_pcs, total_pcs, True, \
332-
SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH,\
333-
symptom, crash_stacks)
397+
return ParseResult(
398+
cov_pcs, total_pcs, True, crash_info,
399+
SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH,
400+
symptom, crash_stacks, crash_func))
334401
break
335402

336-
elif check_cov_increase and initcov == donecov and lastround is not None:
403+
return ParseResult(
404+
cov_pcs, total_pcs, True, crash_info,
405+
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR, symptom,
406+
crash_stacks, crash_func))
407+
408+
if check_cov_increase and initcov == donecov and lastround is not None:
337409
# Another error fuzz target case: no cov increase.
338410
# A special case is initcov == donecov == None, which indicates no
339411
# interesting inputs were found. This may happen if the target rejected
340412
# all inputs we tried.
341-
return cov_pcs, total_pcs, False, SemanticCheckResult(
342-
SemanticCheckResult.NO_COV_INCREASE)
413+
return ParseResult(
414+
cov_pcs, total_pcs, False, '',
415+
SemanticCheckResult(SemanticCheckResult.NO_COV_INCREASE))
343416

344-
return cov_pcs, total_pcs, crashes, SemanticCheckResult(
345-
SemanticCheckResult.NO_SEMANTIC_ERR)
417+
return ParseResult(cov_pcs, total_pcs, crashes, '',
418+
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR))
346419

347420
def build_and_run(self, generated_project: str, target_path: str,
348421
iteration: int,
@@ -367,13 +440,14 @@ def build_and_run_local(
367440
build_result: BuildResult,
368441
language: str) -> tuple[BuildResult, Optional[RunResult]]:
369442
"""Builds and runs the fuzz target locally for fuzzing."""
370-
443+
project_name = self.benchmark.project
371444
benchmark_target_name = os.path.basename(target_path)
372445
project_target_name = os.path.basename(self.benchmark.target_path)
373446
benchmark_log_path = self.work_dirs.build_logs_target(
374447
benchmark_target_name, iteration)
375448
build_result.succeeded = self.build_target_local(generated_project,
376449
benchmark_log_path)
450+
377451
# Copy err.log into work dir (Ignored for JVM projects)
378452
if language != 'jvm':
379453
try:
@@ -405,8 +479,10 @@ def build_and_run_local(
405479
# difference in short running. Adding the flag for JVM
406480
# projects to temporary skip the checking of coverage change.
407481
flag = not self.benchmark.language == 'jvm'
408-
run_result.cov_pcs, run_result.total_pcs, run_result.crashes, \
409-
run_result.semantic_check = self._parse_libfuzzer_logs(f, flag)
482+
run_result.cov_pcs, run_result.total_pcs, \
483+
run_result.crashes, run_result.crash_info, \
484+
run_result.semantic_check = \
485+
self._parse_libfuzzer_logs(f, project_name, flag)
410486
run_result.succeeded = not run_result.semantic_check.has_err
411487

412488
return build_result, run_result
@@ -694,6 +770,8 @@ def build_and_run_cloud(
694770
"""Builds and runs the fuzz target locally for fuzzing."""
695771
logging.info('Evaluating %s on cloud.', os.path.realpath(target_path))
696772

773+
project_name = self.benchmark.project
774+
697775
uid = self.experiment_name + str(uuid.uuid4())
698776
run_log_name = f'{uid}.run.log'
699777
run_log_path = f'gs://{self.experiment_bucket}/{run_log_name}'
@@ -828,8 +906,10 @@ def build_and_run_cloud(
828906
# Parse libfuzzer logs to get fuzz target runtime details.
829907
with open(self.work_dirs.run_logs_target(generated_target_name, iteration),
830908
'rb') as f:
831-
run_result.cov_pcs, run_result.total_pcs, run_result.crashes, \
832-
run_result.semantic_check = self._parse_libfuzzer_logs(f)
909+
run_result.cov_pcs, run_result.total_pcs, \
910+
run_result.crashes, run_result.crash_info, \
911+
run_result.semantic_check = \
912+
self._parse_libfuzzer_logs(f, project_name)
833913
run_result.succeeded = not run_result.semantic_check.has_err
834914

835915
return build_result, run_result

0 commit comments

Comments
 (0)