In [5]:
import torch
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, matthews_corrcoef, cohen_kappa_score,
    mean_squared_error, mean_absolute_error, roc_auc_score
)

# --- Load Model & Tokenizer ---
model_dir = "./cdl_codebert_model_final"
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# --- Load Test Data ---
test_df = pd.read_csv("cdl_test.csv")
inputs = test_df['input'].astype(str).tolist()         # <--- Change here
true_labels = test_df['label'].astype(int).tolist()

# --- Batch Prediction ---
def batch_predict_probs(inputs, model, tokenizer, batch_size=16, max_length=256):
    all_preds = []
    all_probs = []
    for i in range(0, len(inputs), batch_size):
        batch = inputs[i:i+batch_size]
        encodings = tokenizer(batch, truncation=True, padding="max_length", max_length=max_length, return_tensors="pt")
        input_ids = encodings["input_ids"].to(device)
        attention_mask = encodings["attention_mask"].to(device)
        with torch.no_grad():
            logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
            probs = torch.softmax(logits, dim=1).cpu().numpy()
            preds = np.argmax(probs, axis=1)
            all_probs.extend(probs[:, 1])
            all_preds.extend(preds)
    return np.array(all_preds), np.array(all_probs)

preds, probs = batch_predict_probs(inputs, model, tokenizer)

# --- Add predictions to DataFrame ---
test_df["pred_label"] = preds
test_df["pred_prob"] = probs

# --- Evaluation Metrics ---
try:
    tn, fp, fn, tp = confusion_matrix(true_labels, preds).ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
except Exception:
    specificity, fpr = float('nan'), float('nan')

try:
    auc = roc_auc_score(true_labels, probs)
except Exception:
    auc = float('nan')

print("\n" + "="*28 + " Evaluation Metrics " + "="*28)
print(f"{'Accuracy':<15}: {accuracy_score(true_labels, preds):.4f}")
print(f"{'MCC':<15}: {matthews_corrcoef(true_labels, preds):.4f}")
print(f"{'Kappa':<15}: {cohen_kappa_score(true_labels, preds):.4f}")
print(f"{'Precision':<15}: {precision_score(true_labels, preds, zero_division=0):.4f}")
print(f"{'Recall':<15}: {recall_score(true_labels, preds, zero_division=0):.4f}")
print(f"{'F1 Score':<15}: {f1_score(true_labels, preds, zero_division=0):.4f}")
print(f"{'Specificity':<15}: {specificity:.4f}")
print(f"{'FPR':<15}: {fpr:.4f}")
print(f"{'AUC Score':<15}: {auc:.4f}")
print(f"{'MAE':<15}: {mean_absolute_error(true_labels, preds):.4f}")
print(f"{'MSE':<15}: {mean_squared_error(true_labels, preds):.4f}")
print("="*70)

# Save with predictions if desired
test_df.to_csv("cdl_test_with_preds.csv", index=False)



Accuracy       : 0.6658
MCC            : 0.3018
Kappa          : 0.2908
Precision      : 0.6548
Recall         : 0.4617
F1 Score       : 0.5415
Specificity    : 0.8183
FPR            : 0.1817
AUC Score      : 0.7188
MAE            : 0.3342
MSE            : 0.3342


In [3]:
def identify_conflicts(event_trace, predicted_label):
    """
    event_trace: list of strings describing the symbolic trace for a function
    predicted_label: 0 (not vulnerable) or 1 (vulnerable)
    Returns: (conflict: bool, reasons: list of strings)
    """
    reasons = []
    conflict = False

    # If model predicts not vulnerable (0), look for unsafe symbolic patterns
    if predicted_label == 0:
        # Rule 1: Assign to pointer without prior check
        for i, event in enumerate(event_trace):
            if event.startswith("assign: *"):
                # Check last 5 events for any if/condition/guard/check
                guarded = any('if:' in event_trace[j] or 'condition:' in event_trace[j] or 'check' in event_trace[j]
                              for j in range(max(0, i-5), i))
                if not guarded:
                    conflict = True
                    reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")

        # Rule 2: Use of <unknown>
        for event in event_trace:
            if 'use <unknown>' in event:
                conflict = True
                reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")

        # Rule 3: Direct field access (often signals privilege escalation or type confusion)
        for event in event_trace:
            if "field_access" in event:
                conflict = True
                reasons.append(f"Direct field access `{event}` could be unsafe, but predicted not vulnerable.")

        # Rule 4: Return after unsafe op (may be missing error check)
        for i, event in enumerate(event_trace):
            if event.startswith("return:"):
                unsafe_ops = any("assign: *" in e for e in event_trace[max(0, i-5):i])
                if unsafe_ops:
                    conflict = True
                    reasons.append(f"Returning after unsafe op before `{event}`, but predicted not vulnerable.")

        # Rule 5: Unsafe operation inside an else branch
        for i, event in enumerate(event_trace):
            if event == "else":
                possible_unsafe = any("assign: *" in e for e in event_trace[i:i+5])
                if possible_unsafe:
                    conflict = True
                    reasons.append("Possible unsafe write in else-branch, but predicted not vulnerable.")

    return conflict, reasons


In [4]:
import ast


test_df['event_trace_list'] = test_df['event_trace'].apply(ast.literal_eval)


In [5]:
conflict_results = test_df.apply(
    lambda row: identify_conflicts(row['event_trace_list'], row['pred_label']), axis=1
)
test_df['conflict'] = [c for c, _ in conflict_results]
test_df['conflict_reason'] = [r for _, r in conflict_results]


In [6]:
print(test_df[test_df['conflict'] == True][['input', 'event_trace', 'pred_label', 'conflict_reason']].head(5))


                                               input  \
0  static void s390x_cpu_get_id(Object *obj, Visi...   
1  static int ast2500_rambits(AspeedSDMCState *s)...   
2  static int vhdx_open(BlockDriverState *bs, QDi...   
3  void hmp_nbd_server_start(Monitor *mon, const ...   
4  int qemu_show_nic_models(const char *arg, cons...   

                                         event_trace  pred_label  \
0  ['declare S390CPU* cpu', 'assign: *cpu = S390_...           0   
1  ['switch: switch (s->ram_size >> 20) {\ncase 1...           0   
2  ['declare BDRVVHDXState* s', 'assign: *s = bs-...           0   
3  ['declare char* uri', 'assign: *uri = qdict_ge...           0   
4  ['declare int i', 'if: "!arg || strcmp(arg, \\...           0   

                                     conflict_reason  
0  [Unsafe pointer assignment `assign: *cpu = S39...  
1  [Use of unknown/untrusted type in `    use <un...  
2  [Unsafe pointer assignment `assign: *s = bs->o...  
3  [Unsafe pointer assignment `ass

In [7]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2807 entries, 0 to 2806
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   func              2807 non-null   object 
 1   func_cleaned      2807 non-null   object 
 2   project           2807 non-null   object 
 3   label             2807 non-null   bool   
 4   graph_path        2807 non-null   object 
 5   event_trace       2807 non-null   object 
 6   input             2807 non-null   object 
 7   pred_label        2807 non-null   int64  
 8   pred_prob         2807 non-null   float32
 9   event_trace_list  2807 non-null   object 
 10  conflict          2807 non-null   bool   
 11  conflict_reason   2807 non-null   object 
dtypes: bool(2), float32(1), int64(1), object(8)
memory usage: 213.9+ KB


In [9]:
test_df['conflict'].value_counts()

conflict
True     1733
False    1074
Name: count, dtype: int64

In [10]:
test_df['label'].value_counts()

label
False    1607
True     1200
Name: count, dtype: int64

In [16]:
mask = (test_df['conflict'] == True) & (test_df['label'] == 1)
num_true_conflicts = mask.sum()
print(f"Number of TRUE conflicts where model missed a real vulnerability: {num_true_conflicts}")


Number of TRUE conflicts where model missed a real vulnerability: 591


Another version

In [17]:
def identify_conflicts(event_trace, predicted_label):
    """
    Optimized: One-pass event_trace scan, all rules together, easier maintenance.
    """
    reasons = []
    conflict = False

    if predicted_label == 0:
        # Precompute guards for efficiency (track recent control/checks)
        recent_guards = [False] * len(event_trace)
        for i, event in enumerate(event_trace):
            if any(x in event for x in ['if:', 'condition:', 'check']):
                for j in range(max(0, i - 5), i):
                    recent_guards[j] = True  # Mark as guarded

        # For Rule 4: Keep a window of recent events for quick lookup
        recent_assign = [False] * len(event_trace)
        for i, event in enumerate(event_trace):
            if event.startswith("assign: *"):
                for j in range(i+1, min(len(event_trace), i+6)):
                    if event_trace[j].startswith("return:"):
                        recent_assign[j] = True  # Mark return after unsafe op

        # Track else blocks for Rule 5
        else_indices = [i for i, e in enumerate(event_trace) if e == "else"]

        # One-pass scan for all triggers
        for i, event in enumerate(event_trace):
            # Rule 1: Assign to pointer without prior check
            if event.startswith("assign: *") and not recent_guards[i]:
                conflict = True
                reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")

            # Rule 2: Use of <unknown>
            if 'use <unknown>' in event:
                conflict = True
                reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")

            # Rule 3: Direct field access
            if "field_access" in event:
                conflict = True
                reasons.append(f"Direct field access `{event}` could be unsafe, but predicted not vulnerable.")

            # Rule 4: Return after unsafe op
            if event.startswith("return:") and recent_assign[i]:
                conflict = True
                reasons.append(f"Returning after unsafe op before `{event}`, but predicted not vulnerable.")

        # Rule 5: Unsafe op in else branch
        for idx in else_indices:
            for j in range(idx, min(idx+5, len(event_trace))):
                if event_trace[j].startswith("assign: *"):
                    conflict = True
                    reasons.append("Possible unsafe write in else-branch, but predicted not vulnerable.")
                    break  # Only need to trigger once per else block

    return conflict, reasons


-------------------------------------

In [6]:
import pandas as pd
test_df = pd.read_csv('cdl_test_with_preds.csv')

In [9]:
def conflict_detection(event_trace, pred_label):
    """
    Refined version: Only flags real danger, minimizes field_access noise.
    """
    conflict = False
    reasons = []

    # Only proceed if model predicts not vulnerable (pred_label==0)
    if pred_label == 0:
        # Example: List of dangerous fields (customize as needed)
        dangerous_fields = {'password', 'user_ptr', 'kernel_addr'}
        assign_patterns = ['assign: *', 'assign: *(']  # pattern for pointer assignment

        for i, event in enumerate(event_trace):
            event_strip = event.strip()

            # Rule 1: Assign to pointer without guard
            if event_strip.startswith("assign: *"):
                guarded = any('if:' in event_trace[j] or 'condition:' in event_trace[j] or 'check' in event_trace[j]
                              for j in range(max(0, i-5), i))
                if not guarded:
                    conflict = True
                    reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")

            # Rule 2: Use of <unknown>
            if 'use <unknown>' in event_strip:
                conflict = True
                reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")

            # Rule 3: Context-aware field access (only for dangerous fields)
            if event_strip.startswith("field_access:"):
                # Extract the field name
                parts = event_strip.split(":")
                field_name = parts[-1].strip() if len(parts) > 1 else ""
                if field_name in dangerous_fields:
                    conflict = True
                    reasons.append(f"Direct access to sensitive field `{field_name}` without proper check.")
                # Optionally: look ahead for unsafe usage (pointer assignment)
                if i+1 < len(event_trace) and any(pat in event_trace[i+1] for pat in assign_patterns):
                    conflict = True
                    reasons.append(f"Field `{field_name}` is immediately used in unsafe pointer assignment.")

            # Rule 4: Return after unsafe op
            if event_strip.startswith("return:"):
                unsafe_ops = any("assign: *" in e for e in event_trace[max(0, i-5):i])
                if unsafe_ops:
                    conflict = True
                    reasons.append(f"Returning after unsafe op before `{event}`, but predicted not vulnerable.")

            # Rule 5: Unsafe operation inside an else branch (optional, often noisy)
            if event_strip == "else":
                possible_unsafe = any("assign: *" in e for e in event_trace[i:i+5])
                if possible_unsafe:
                    conflict = True
                    reasons.append("Possible unsafe write in else-branch, but predicted not vulnerable.")

    return conflict, reasons


In [8]:
import ast


test_df['event_trace_list'] = test_df['event_trace'].apply(ast.literal_eval)

In [10]:
conflict_results = test_df.apply(
    lambda row: conflict_detection(row['event_trace_list'], row['pred_label']), axis=1
)
test_df['conflict'] = [c for c, _ in conflict_results]
test_df['conflict_reason'] = [r for _, r in conflict_results]

In [11]:
test_df.info()
test_df['conflict'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2807 entries, 0 to 2806
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   func              2807 non-null   object 
 1   func_cleaned      2807 non-null   object 
 2   project           2807 non-null   object 
 3   label             2807 non-null   bool   
 4   graph_path        2807 non-null   object 
 5   event_trace       2807 non-null   object 
 6   input             2807 non-null   object 
 7   pred_label        2807 non-null   int64  
 8   pred_prob         2807 non-null   float64
 9   event_trace_list  2807 non-null   object 
 10  conflict          2807 non-null   bool   
 11  conflict_reason   2807 non-null   object 
dtypes: bool(2), float64(1), int64(1), object(8)
memory usage: 224.9+ KB


conflict
True     1613
False    1194
Name: count, dtype: int64

In [12]:
test_df['pred_label'].value_counts()

pred_label
0    1961
1     846
Name: count, dtype: int64

In [13]:
test_df['label'].value_counts()

label
False    1607
True     1200
Name: count, dtype: int64

In [45]:
mask = (test_df['conflict'] == False) & (test_df['label'] == True)
num_true_conflicts = mask.sum()
print(f"Number of TRUE conflicts where model not detected a real vulnerability: {num_true_conflicts}")

Number of TRUE conflicts where model not detected a real vulnerability: 646


In [15]:
import pandas as pd

# Let's say your results are stored in a DataFrame called df

# Select only false positives: conflict True but actual label is 0 (not vulnerable)
false_positives = test_df[(test_df['conflict'] == True) & (test_df['label'] == False)]

# Optional: Sort by reason or any column you like
false_positives = false_positives.sort_values(by='conflict_reason')

# Show or save the results
print(false_positives[['func', 'pred_label', 'label', 'conflict_reason', 'event_trace']])

# Save to CSV for manual analysis if needed
false_positives.to_csv('false_positive_conflicts.csv', index=False)


                                                   func  pred_label  label  \
971   int bdrv_is_allocated_above(BlockDriverState *...           0  False   
2796  void qemu_bh_update_timeout(int *timeout)\n\n{...           0  False   
325   static int vmdk_probe(const uint8_t *buf, int ...           0  False   
2043  static PCIBus *pci_get_bus_devfn(int *devfnp, ...           0  False   
297   tight_filter_gradient24(VncState *vs, uint8_t ...           0  False   
...                                                 ...         ...    ...   
1191  static int rate_get_samples (struct audio_pcm_...           0  False   
1036  static int vnc_tls_initialize(void)\n\n{\n\n  ...           0  False   
1670  static VirtIOSerialPort *find_port_by_name(cha...           0  False   
459   static void test_submit(void)\n\n{\n\n    Work...           0  False   
363   static void xen_config_cleanup_dir(char *dir)\...           0  False   

                                        conflict_reason  \
971 

In [32]:
import pandas as pd
data = pd.read_csv('false_positive_conflicts.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1142 entries, 0 to 1141
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   func              1142 non-null   object 
 1   func_cleaned      1142 non-null   object 
 2   project           1142 non-null   object 
 3   label             1142 non-null   bool   
 4   graph_path        1142 non-null   object 
 5   event_trace       1142 non-null   object 
 6   input             1142 non-null   object 
 7   pred_label        1142 non-null   int64  
 8   pred_prob         1142 non-null   float64
 9   event_trace_list  1142 non-null   object 
 10  conflict          1142 non-null   bool   
 11  conflict_reason   1142 non-null   object 
dtypes: bool(2), float64(1), int64(1), object(8)
memory usage: 91.6+ KB


In [33]:
data['event_trace'].iloc[0]

'[\'declare int result\', \'assign: result = 0\', \'use result as int\', \'literal: 0\', \'if: "secn > 0"\', \'    declare uint8_t* sp\', \'    assign: *sp = (const uint8_t *)src\', \'    use sp as uint8_t*\', \'    operator: <operator>.cast ((const uint8_t *)src)\', \'    use src as void*\', \'    declare uint8_t* dp\', \'    declare uint8_t* dpp\', \'    assign: *dp = 0\', \'    use dp as uint8_t*\', \'    literal: 0\', \'    assign: *dpp = 0\', \'    use dpp as uint8_t*\', \'    literal: 0\', \'    if: "s->bdrv_cur"\', \'        assign: dp = g_malloc(512)\', \'        use dp as uint8_t*\', \'        call: g_malloc (g_malloc(512))\', \'        literal: 512\', \'        if: "!dp || bdrv_read(s->bdrv_cur, s->secs_cur + (sec >> 5), dp, 1) < 0"\', \'            assign: result = 1\', \'            use result as int\', \'            literal: 1\', \'        else\', \'            else\', \'                assign: dpp = dp + ((sec & 31) << 4)\', \'                use dpp as uint8_t*\', \'    

In [35]:
data['conflict_reason'].iloc[0]

"['Direct field access `                    field_access: s->current` could be unsafe, but predicted not vulnerable.', 'Direct field access `                    field_access: s->secs_cur` could be unsafe, but predicted not vulnerable.', 'Direct field access `            field_access: s->bdrv_cur` could be unsafe, but predicted not vulnerable.', 'Direct field access `            field_access: s->secs_cur` could be unsafe, but predicted not vulnerable.']"

In [36]:
data['conflict'].iloc[0]

np.True_

In [46]:
# Find cases where:
# Model predicted 0 (not vulnerable), ground truth is 1 (vulnerable), conflict_refined is True
caught_model_misses = test_df[
    (test_df['pred_label'] == 0) &
    (test_df['label'] == 1) &
    (test_df['conflict'] == True)
]

# Count
num_caught = len(caught_model_misses)
print(f"Conflicts that caught model misses (False Negatives flagged): {num_caught}")

# Optionally, show a few examples
print(caught_model_misses[['func_cleaned', 'label', 'pred_label', 'conflict', 'conflict_reason']].head())


Conflicts that caught model misses (False Negatives flagged): 554
                                         func_cleaned  label  pred_label  \
2   static int vhdx_open(BlockDriverState *bs, QDi...   True           0   
4   int qemu_show_nic_models(const char *arg, cons...   True           0   
5   static void openpic_set_irq(void *opaque, int ...   True           0   
13  static CharDriverState *qemu_chr_open_fd(int f...   True           0   
19  static void ehci_detach(USBPort *port) {\nEHCI...   True           0   

    conflict                                    conflict_reason  
2       True  [Unsafe pointer assignment `assign: *s = bs->o...  
4       True  [Use of unknown/untrusted type in `use <unknow...  
5       True  [Unsafe pointer assignment `assign: *opp = opa...  
13      True  [Use of unknown/untrusted type in `use <unknow...  
19      True  [Unsafe pointer assignment `assign: *s = port-...  


In [47]:

model_misses = test_df[(test_df['pred_label'] == 0) & (test_df['label'] == 1)]
total_misses = len(model_misses)


not_caught = total_misses - num_caught
print(f"Model misses not caught by conflict detection: {not_caught}")


Model misses not caught by conflict detection: 92


In [48]:
recall = num_caught / total_misses if total_misses > 0 else 0
print(f"Recall of conflict detection for model's false negatives: {recall:.2%}")


Recall of conflict detection for model's false negatives: 85.76%


In [14]:
# Suppose your DataFrame is called df, with columns: 'conflict', 'ground_truth'
false_positives = test_df[(test_df['conflict'] == True) & (test_df['label'] == False)].shape[0]
total_actual_negatives = test_df[test_df['label'] == False].shape[0]

conflict_fpr = false_positives / total_actual_negatives if total_actual_negatives > 0 else 0
print(f"Conflict detection false positive rate: {conflict_fpr:.2%}")


Conflict detection false positive rate: 65.90%


----------------------------------------------------------------------------------

In [16]:
import re

#Define whitelists based on your analysis
SAFE_POINTER_ASSIGN_RHS = {'NULL', '&cpu->env', 'qemu_get_nic_opaque(nc)', 'arg', 'CPU(cpu)', 'to_qiv(v)', 'to_qov(v)', 'cfg_entry->reg'}
SAFE_UNKNOWN_VALUES = {'NULL', 'stderr', 'errno', 'false', 'true', 'cpu_env', 'env', 'TARGET_PAGE_MASK', 'PATH_MAX', 'EIO'}

def refined_conflict_detection(event_trace, pred_label):
    conflict = False
    reasons = []

    if pred_label == 0:
        for i, event in enumerate(event_trace):
            event_strip = event.strip()

            # Refined Unsafe Pointer Assignment Rule
            if event_strip.startswith("assign: *"):
                # Extract RHS value
                m = re.search(r"assign: \*.*= (.+)", event_strip)
                rhs = m.group(1).strip() if m else ""
                if rhs not in SAFE_POINTER_ASSIGN_RHS:
                    # Only flag if not whitelisted
                    guarded = any('if:' in event_trace[j] or 'check' in event_trace[j] for j in range(max(0, i-5), i))
                    if not guarded:
                        conflict = True
                        reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")

            # Refined Use of Unknown/Untrusted Type Rule
            if 'use <unknown>' in event_strip:
                m = re.search(r"use <unknown> ([^ ]+)", event_strip)
                unknown_val = m.group(1).strip() if m else ""
                if unknown_val not in SAFE_UNKNOWN_VALUES:
                    conflict = True
                    reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")

    return conflict, reasons


In [17]:
conflict_results = test_df.apply(
    lambda row: refined_conflict_detection(row['event_trace_list'], row['pred_label']), axis=1
)
test_df['conflict'] = [c for c, _ in conflict_results]
test_df['conflict_reason'] = [r for _, r in conflict_results]

In [18]:
test_df.info()
test_df['conflict'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2807 entries, 0 to 2806
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   func              2807 non-null   object 
 1   func_cleaned      2807 non-null   object 
 2   project           2807 non-null   object 
 3   label             2807 non-null   bool   
 4   graph_path        2807 non-null   object 
 5   event_trace       2807 non-null   object 
 6   input             2807 non-null   object 
 7   pred_label        2807 non-null   int64  
 8   pred_prob         2807 non-null   float64
 9   event_trace_list  2807 non-null   object 
 10  conflict          2807 non-null   bool   
 11  conflict_reason   2807 non-null   object 
dtypes: bool(2), float64(1), int64(1), object(8)
memory usage: 224.9+ KB


conflict
True     1508
False    1299
Name: count, dtype: int64

In [19]:
# Suppose your DataFrame is called df, with columns: 'conflict', 'ground_truth'
false_positives = test_df[(test_df['conflict'] == True) & (test_df['label'] == False)].shape[0]
total_actual_negatives = test_df[test_df['label'] == False].shape[0]

conflict_fpr = false_positives / total_actual_negatives if total_actual_negatives > 0 else 0
print(f"Conflict detection false positive rate: {conflict_fpr:.2%}")

Conflict detection false positive rate: 61.05%


In [20]:
# True Positives for conflict detection
tp_conflict = test_df[(test_df['label'] == True) & (test_df['conflict'] == True)].shape[0]

# Model false negatives rescued by conflict detection
fn_model_and_conflict = test_df[
    (test_df['label'] == True) &
    (test_df['pred_label'] == 0) &
    (test_df['conflict'] == True)
].shape[0]

print(f"True Positives (conflict): {tp_conflict}")
print(f"Model missed but conflict detected: {fn_model_and_conflict}")


True Positives (conflict): 527
Model missed but conflict detected: 527


-------------------------------

In [21]:
import re

# Whitelist based on prior analysis
SAFE_POINTER_ASSIGN_RHS = {
    'NULL', '&cpu->env', 'qemu_get_nic_opaque(nc)', 'arg', 'CPU(cpu)',
    'to_qiv(v)', 'to_qov(v)', 'cfg_entry->reg'
}
SAFE_UNKNOWN_VALUES = {
    'NULL', 'stderr', 'errno', 'false', 'true', 'cpu_env', 'env', 'TARGET_PAGE_MASK', 'PATH_MAX', 'EIO'
}
DANGEROUS_FUNCS = {'strcpy', 'strcat', 'sprintf', 'vsprintf', 'memcpy', 'gets', 'system'}

def refined_conflict_detection(event_trace, pred_label):
    """
    Context-aware conflict detection tuned for your event_trace style.
    Only flags truly suspicious patterns and avoids common safe idioms.
    """
    conflict = False
    reasons = []

    # Convert string-list to Python list if needed
    if isinstance(event_trace, str):
        try:
            event_trace = eval(event_trace)
        except Exception:
            pass

    if pred_label == 0:
        for i, event in enumerate(event_trace):
            event_strip = event.strip()

            # Unsafe Pointer Assignment (with whitelist)
            if event_strip.startswith("assign: *"):
                m = re.search(r"assign: \*.*= (.+)", event_strip)
                rhs = m.group(1).strip() if m else ""
                # Only flag if not whitelisted
                if rhs not in SAFE_POINTER_ASSIGN_RHS:
                    guarded = any(
                        'if:' in event_trace[j] or 'check' in event_trace[j]
                        for j in range(max(0, i-5), i)
                    )
                    if not guarded:
                        conflict = True
                        reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")

            # Use of <unknown> Value (with whitelist)
            if 'use <unknown>' in event_strip:
                m = re.search(r"use <unknown> ([^ ]+)", event_strip)
                unknown_val = m.group(1).strip() if m else ""
                if unknown_val not in SAFE_UNKNOWN_VALUES:
                    conflict = True
                    reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")

            # Dangerous Buffer/Function Use (CWE-119, CWE-120, CWE-242, CWE-676)
            if event_strip.startswith("call:"):
                func_call = event_strip.split(":")[1].strip().split("(")[0]
                if func_call in DANGEROUS_FUNCS:
                    guarded = any(
                        'if:' in event_trace[j] and (
                            'size' in event_trace[j] or 'length' in event_trace[j] or 'bound' in event_trace[j]
                        )
                        for j in range(max(0, i-5), i)
                    )
                    if not guarded:
                        conflict = True
                        reasons.append(f"Dangerous call `{func_call}` without prior bounds/validation check.")

            # Null Pointer Dereference
            if event_strip.startswith("dereference:") or event_strip.startswith("assign: *"):
                # Null check for assigned pointer (basic)
                m = re.search(r"assign: \*([a-zA-Z0-9_]+)", event_strip)
                var = m.group(1) if m else ""
                if var and not any(
                    'if:' in event_trace[j] and (
                        f"{var} != NULL" in event_trace[j] or f"{var} == NULL" in event_trace[j]
                    )
                    for j in range(max(0, i-5), i)
                ):
                    conflict = True
                    reasons.append(f"Possible null pointer dereference of `{var}`.")

            # Out-of-bounds Array Access
            if "assign:" in event_strip and "[" in event_strip and "]" in event_strip:
                # crude index extraction: array[idx]
                m = re.search(r"\[([^\]]+)\]", event_strip)
                index_var = m.group(1) if m else ""
                if index_var and not any(
                    'if:' in event_trace[j] and (
                        f"{index_var} <" in event_trace[j] or f"{index_var} >=" in event_trace[j]
                    )
                    for j in range(max(0, i-5), i)
                ):
                    conflict = True
                    reasons.append(f"Possible out-of-bounds array access with index `{index_var}`.")

    return conflict, reasons


In [22]:
conflict_results = test_df.apply(
    lambda row: refined_conflict_detection(row['event_trace_list'], row['pred_label']), axis=1
)
test_df['conflict'] = [c for c, _ in conflict_results]
test_df['conflict_reason'] = [r for _, r in conflict_results]

In [23]:
test_df.info()
test_df['conflict'].value_counts()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2807 entries, 0 to 2806
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   func              2807 non-null   object 
 1   func_cleaned      2807 non-null   object 
 2   project           2807 non-null   object 
 3   label             2807 non-null   bool   
 4   graph_path        2807 non-null   object 
 5   event_trace       2807 non-null   object 
 6   input             2807 non-null   object 
 7   pred_label        2807 non-null   int64  
 8   pred_prob         2807 non-null   float64
 9   event_trace_list  2807 non-null   object 
 10  conflict          2807 non-null   bool   
 11  conflict_reason   2807 non-null   object 
dtypes: bool(2), float64(1), int64(1), object(8)
memory usage: 224.9+ KB


conflict
True     1566
False    1241
Name: count, dtype: int64

In [24]:
test_df['conflict'].value_counts()
# Suppose your DataFrame is called df, with columns: 'conflict', 'ground_truth'
false_positives = test_df[(test_df['conflict'] == True) & (test_df['label'] == False)].shape[0]
total_actual_negatives = test_df[test_df['label'] == False].shape[0]

conflict_fpr = false_positives / total_actual_negatives if total_actual_negatives > 0 else 0
print(f"Conflict detection false positive rate: {conflict_fpr:.2%}")

Conflict detection false positive rate: 63.53%


In [25]:
# True Positives for conflict detection
tp_conflict = test_df[(test_df['label'] == True) & (test_df['conflict'] == True)].shape[0]

# Model false negatives rescued by conflict detection
fn_model_and_conflict = test_df[
    (test_df['label'] == True) &
    (test_df['pred_label'] == 0) &
    (test_df['conflict'] == True)
].shape[0]

print(f"True Positives (conflict): {tp_conflict}")
print(f"Model missed but conflict detected: {fn_model_and_conflict}")

True Positives (conflict): 545
Model missed but conflict detected: 545


------------------------------

In [26]:
import re

SAFE_POINTER_ASSIGN_RHS = {
    'NULL', '&cpu->env', 'qemu_get_nic_opaque(nc)', 'arg', 'CPU(cpu)',
    'to_qiv(v)', 'to_qov(v)', 'cfg_entry->reg'
}
SAFE_UNKNOWN_VALUES = {
    'NULL', 'stderr', 'errno', 'false', 'true', 'cpu_env', 'env', 'TARGET_PAGE_MASK', 'PATH_MAX', 'EIO'
}
DANGEROUS_FUNCS = {'strcpy', 'strcat', 'sprintf', 'vsprintf', 'memcpy', 'gets', 'system'}

def profile_conflict_detection(event_trace, pred_label):
    conflict = False
    reasons = []
    rule_labels = []

    if isinstance(event_trace, str):
        try:
            event_trace = eval(event_trace)
        except Exception:
            pass

    if pred_label == 0:
        for i, event in enumerate(event_trace):
            event_strip = event.strip()

            # 1. Unsafe Pointer Assignment (with whitelist)
            if event_strip.startswith("assign: *"):
                m = re.search(r"assign: \*.*= (.+)", event_strip)
                rhs = m.group(1).strip() if m else ""
                if rhs not in SAFE_POINTER_ASSIGN_RHS:
                    guarded = any(
                        'if:' in event_trace[j] or 'check' in event_trace[j]
                        for j in range(max(0, i-5), i)
                    )
                    if not guarded:
                        conflict = True
                        reasons.append(f"Unsafe pointer assignment `{event}` not preceded by a safety check, but predicted not vulnerable.")
                        rule_labels.append("unsafe_pointer_assign")

            # 2. Use of <unknown> Value (with whitelist)
            if 'use <unknown>' in event_strip:
                m = re.search(r"use <unknown> ([^ ]+)", event_strip)
                unknown_val = m.group(1).strip() if m else ""
                if unknown_val not in SAFE_UNKNOWN_VALUES:
                    conflict = True
                    reasons.append(f"Use of unknown/untrusted type in `{event}` but predicted not vulnerable.")
                    rule_labels.append("use_unknown_value")

            # 3. Dangerous Buffer/Function Use
            if event_strip.startswith("call:"):
                func_call = event_strip.split(":")[1].strip().split("(")[0]
                if func_call in DANGEROUS_FUNCS:
                    guarded = any(
                        'if:' in event_trace[j] and (
                            'size' in event_trace[j] or 'length' in event_trace[j] or 'bound' in event_trace[j]
                        )
                        for j in range(max(0, i-5), i)
                    )
                    if not guarded:
                        conflict = True
                        reasons.append(f"Dangerous call `{func_call}` without prior bounds/validation check.")
                        rule_labels.append("dangerous_func_call")

            # 4. Null Pointer Dereference
            if event_strip.startswith("dereference:") or event_strip.startswith("assign: *"):
                m = re.search(r"assign: \*([a-zA-Z0-9_]+)", event_strip)
                var = m.group(1) if m else ""
                if var and not any(
                    'if:' in event_trace[j] and (
                        f"{var} != NULL" in event_trace[j] or f"{var} == NULL" in event_trace[j]
                    )
                    for j in range(max(0, i-5), i)
                ):
                    conflict = True
                    reasons.append(f"Possible null pointer dereference of `{var}`.")
                    rule_labels.append("null_pointer_deref")

            # 5. Out-of-bounds Array Access
            if "assign:" in event_strip and "[" in event_strip and "]" in event_strip:
                m = re.search(r"\[([^\]]+)\]", event_strip)
                index_var = m.group(1) if m else ""
                if index_var and not any(
                    'if:' in event_trace[j] and (
                        f"{index_var} <" in event_trace[j] or f"{index_var} >=" in event_trace[j]
                    )
                    for j in range(max(0, i-5), i)
                ):
                    conflict = True
                    reasons.append(f"Possible out-of-bounds array access with index `{index_var}`.")
                    rule_labels.append("oob_array_access")

    return conflict, reasons, list(set(rule_labels))  # dedupe rule labels



In [27]:
import pandas as pd
from collections import Counter

# Assume df is your DataFrame
rule_fp_counter = Counter()

for idx, row in test_df.iterrows():
    if not row['label']:  # Only look at false positives
        conflict, reasons, rule_labels = profile_conflict_detection(row['event_trace'], row['pred_label'])
        if conflict:
            for rule in rule_labels:
                rule_fp_counter[rule] += 1

print("False positives per rule:", rule_fp_counter)


False positives per rule: Counter({'use_unknown_value': 827, 'null_pointer_deref': 582, 'unsafe_pointer_assign': 514, 'oob_array_access': 283})


In [28]:
import pandas as pd

# Assume df is your DataFrame and you already have 'label', 'event_trace', 'conflict_reason' columns
# Let's add a 'rule_labels' column using your profiling function

def get_rule_labels(event_trace, pred_label):
    # Use your profile_conflict_detection function above
    _, _, rule_labels = profile_conflict_detection(event_trace, pred_label)
    return rule_labels

# Apply to get rule labels for each row
test_df['rule_labels'] = test_df.apply(lambda row: get_rule_labels(row['event_trace'], row['pred_label']), axis=1)

# Filter false positives where 'use_unknown_value' rule fired
mask = (test_df['label'] == False) & (test_df['conflict'] == True) & (test_df['rule_labels'].apply(lambda x: 'use_unknown_value' in x))

# Print a sample (e.g., 20)
sample_fp = test_df[mask].sample(20, random_state=42) if mask.sum() > 20 else test_df[mask]

for idx, row in sample_fp.iterrows():
    print(f"\nRow: {idx}")
    print(f"Event Trace: {row['event_trace']}")
    print(f"Conflict Reason: {row['conflict_reason']}")
    print('-' * 80)



Row: 614
Event Trace: ['declare X86CPU* cpu', 'assign: *cpu = X86_CPU(obj)', 'use cpu as X86CPU*', 'call: X86_CPU (X86_CPU(obj))', 'use obj as Object*', 'declare CPUX86State* env', 'assign: *env = &cpu->env', 'use env as CPUX86State*', 'operator: <operator>.addressOf (&cpu->env)', 'field_access: cpu->env', 'use cpu as X86CPU*', 'field: env', 'declare KVMState* s', 'assign: *s = kvm_state', 'use s as KVMState*', 'use <unknown> kvm_state as ANY', 'assign: cpu->host_features = true', 'field_access: cpu->host_features', 'use cpu as X86CPU*', 'field: host_features', 'use <unknown> true as ANY', 'if: "kvm_enabled()"', '    assign: env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX)', '    field_access: env->cpuid_level', '    use env as CPUX86State*', '    field: cpuid_level', '    call: kvm_arch_get_supported_cpuid (kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX))', '    use s as KVMState*', '    literal: 0x0', '    literal: 0', '    use <unknown> R_EAX as ANY', '    assign: e

In [35]:
BENIGN_SYMBOLS = {
    # Extend as you analyze more
    "IO_MEM_RAM", "KVM_SET_MSRS", "ELF_MACHINE", "stderr", "PROTOCOLS",
    "AFM_free", "EINVAL", "NULL", "DISAS_TB_JUMP", "GOOD", "VIRTIO_F_VERSION_1",
    "SELF_ANNOUNCE_ROUNDS", "USB_SPEED_LOW", "USB_SPEED_FULL", "USB_SPEED_HIGH",
    "PORTSC_PED", "PLS_U0", "PORTSC_PLS", "PORTSC_PR", "PORTSC_PRC", "MAX_IDE_BUS",
    "MAX_IDE_DEVS", "MAX_FD", "main_cpu_reset", "TARGET_PAGE_SIZE", "cpu_irq_handler",
    "IF_PFLASH", "curl_read_cb", "VIRTIO_NET_S_LINK_UP", "MAC_TABLE_ENTRIES",
    "MAX_VLAN", "GIC_FDT_IRQ_FLAGS_EDGE_LO_HI", "GIC_FDT_IRQ_PPI_CPU_START",
    "GIC_FDT_IRQ_PPI_CPU_WIDTH", "GIC_FDT_IRQ_TYPE_PPI", "CURL_NUM_ACB",
    "CURL_NUM_STATES", "SPR_BOOKE_TSR", "TSR_DIS", "SPR_BOOKE_DECAR",
    "TCR_ARE", "SPR_BOOKE_TCR"
    # ... and others as you see them
}

def is_benign_unknown(event):
    # Event like 'use <unknown> SYMBOL as ANY'
    for sym in BENIGN_SYMBOLS:
        if f"<unknown> {sym}" in event:
            return True
    return False

def ref_conflict_detection(event_trace, pred_label):
    conflict = False
    reasons = []

    if pred_label == 0:
        for i, event in enumerate(event_trace):
            event_strip = event.strip()

            # 1. Only trigger on 'unknown' if not benign
            if 'use <unknown>' in event_strip and not is_benign_unknown(event_strip):
                conflict = True
                reasons.append(f"Use of untrusted or dynamic unknown type in `{event_strip}` but predicted not vulnerable.")

            # 2. Pointer assignment without prior check, but skip for whitelisted allocation/init functions
            if event_strip.startswith("assign: *"):
                # If assignment is from an alloc/init function, relax the rule
                safe_sources = ["g_new", "g_malloc", "g_malloc0", "DO_UPCAST", "cpu_init", "qdev_create", "qemu_allocate_irqs"]
                is_safe = any(src in event_trace[i] for src in safe_sources)
                guarded = any('if:' in event_trace[j] or 'condition:' in event_trace[j] or 'check' in event_trace[j]
                              for j in range(max(0, i-5), i))
                if not is_safe and not guarded:
                    conflict = True
                    reasons.append(f"Unsafe pointer assignment `{event_strip}` not preceded by a safety check.")

            # 3. OOB: Only flag for non-literals or user-controlled indices
            if 'Possible out-of-bounds array access' in event_strip:
                # You may parse the index and check if it's a literal, else fire
                pass  # implement only for dynamic/user indices

            # 4. Null deref: Only if dereferenced with no prior check
            if 'Possible null pointer dereference' in event_strip:
                # Only flag if field_access/use occurs and no guard
                pass  # keep this, but be sure to check for guarding

    return conflict, reasons


In [39]:
conflict_results_up = test_df.apply(
    lambda row: ref_conflict_detection(row['event_trace_list'], row['pred_label']), axis=1
)
test_df['conflict_up'] = [c for c, _ in conflict_results_up]
test_df['conflict_reason_up'] = [r for _, r in conflict_results_up]

In [40]:
test_df['conflict_up'].value_counts()

conflict_up
True     1561
False    1246
Name: count, dtype: int64

In [38]:
test_df['conflict'].value_counts()
# Suppose your DataFrame is called df, with columns: 'conflict', 'ground_truth'
false_positives = test_df[(test_df['conflict'] == True) & (test_df['label'] == False)].shape[0]
total_actual_negatives = test_df[test_df['label'] == False].shape[0]

conflict_fpr = false_positives / total_actual_negatives if total_actual_negatives > 0 else 0
print(f"Conflict detection false positive rate: {conflict_fpr:.2%}")

Conflict detection false positive rate: 63.85%


In [41]:
import pandas as pd
import random

# Load your false positive CSV
fp_df = pd.read_csv('false_positive_conflicts.csv')

# If you have a specific column for the conflict rule, use that. If not, extract rule names from the 'conflict_reason'
def extract_rule_names(conflict_reason):
    # This is a rough heuristic, adapt if your formatting is different
    if isinstance(conflict_reason, str):
        # Try to extract the first phrase (rule type)
        if "pointer assignment" in conflict_reason:
            return "unsafe_pointer_assign"
        elif "null pointer dereference" in conflict_reason:
            return "null_pointer_deref"
        elif "out-of-bounds array access" in conflict_reason:
            return "oob_array_access"
        elif "unknown/untrusted type" in conflict_reason:
            return "use_unknown_value"
        else:
            return "other"
    return "none"

# If you want to sample 20 FPs
sampled_fp = fp_df.sample(n=20, random_state=42) if len(fp_df) > 20 else fp_df

for idx, row in sampled_fp.iterrows():
    print(f"Row: {idx}")
    print(f"Rule Type(s): {[extract_rule_names(cr) for cr in eval(row['conflict_reason'])] if isinstance(row['conflict_reason'], str) and row['conflict_reason'].startswith('[') else extract_rule_names(row['conflict_reason'])}")
    print(f"Conflict Reason:\n{row['conflict_reason']}")
    print(f"Event Trace:\n{row['event_trace']}")
    print("-" * 80)


Row: 576
Rule Type(s): ['use_unknown_value', 'use_unknown_value']
Conflict Reason:
['Use of unknown/untrusted type in `        use <unknown> true as ANY` but predicted not vulnerable.', 'Use of unknown/untrusted type in `use <unknown> false as ANY` but predicted not vulnerable.']
Event Trace:
['declare uint64_t i', 'declare int sectors_per_bit', 'assign: sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta)', 'use sectors_per_bit as int', 'operator: <operator>.shiftLeft (1 << hbitmap_granularity(bitmap->meta))', 'literal: 1', 'call: hbitmap_granularity (hbitmap_granularity(bitmap->meta))', 'field_access: bitmap->meta', 'use bitmap as BdrvDirtyBitmap*', 'field: meta', 'for: for (i = sector;i < sector + nb_sectors;i += sectors_per_bit)', '    if: "hbitmap_get(bitmap->meta, i)"', '        return: return true;', '        use <unknown> true as ANY', 'return: return false;', 'use <unknown> false as ANY', 'declare ANY false', 'declare ANY true']
--------------------------------------------

In [42]:
import pandas as pd
import re
from sklearn.preprocessing import MultiLabelBinarizer

SAFE_MACROS = [
    "NULL", "errno", "stderr", "ELF_MACHINE", "KERNEL_LOAD_ADDR", "MO_64", "MO_32",
    "MEMORY_DEVICE_INFO_KIND_DIMM", "SOCKET_ADDRESS_LEGACY_KIND_INET", "SOCKET_ADDRESS_LEGACY_KIND_UNIX",
    "SOCKET_ADDRESS_LEGACY_KIND_FD", "SOCKET_ADDRESS_LEGACY_KIND_VSOCK", "PIIX4_CPU_HOTPLUG_IO_BASE",
    "GPE_LEN", "GPE_BASE", "INDEX_op_end", "OPC_MAX_SIZE", "ACCESS_CODE"
]
SAFE_UNKNOWN_RE = re.compile(r"use <unknown> (" + "|".join(SAFE_MACROS) + r") as ANY", re.I)

def feature_row(row):
    rule_types = row['rule_type'] if isinstance(row['rule_type'], list) else []
    conflict_reason = str(row['conflict_reason'])
    event_trace = str(row['event_trace'])
    # Basic features
    return {
        "num_rules": len(rule_types),
        "use_unknown_safe": bool(SAFE_UNKNOWN_RE.search(conflict_reason)),
        "pointer_assign_from_address_of": bool(re.search(r"assign: \*\w+ = &", event_trace)),
        "pointer_assign_from_null": bool(re.search(r"assign: \*\w+ = NULL", event_trace)),
        "has_check_nearby": any('if' in e or 'check' in e or 'assert' in e for e in event_trace.split(';')),
        "reason_len": len(conflict_reason.split()),
        "trace_len": len(event_trace.split(';')),
        # One-hot rule types handled below
        "rule_type": rule_types,
    }

# Load your conflicts into a DataFrame, e.g., fp_df
features = fp_df.apply(feature_row, axis=1, result_type='expand')

# One-hot encode rule types
mlb = MultiLabelBinarizer()
rule_types_onehot = mlb.fit_transform(features['rule_type'])
rule_types_df = pd.DataFrame(rule_types_onehot, columns=mlb.classes_)
features = pd.concat([features.drop('rule_type', axis=1), rule_types_df], axis=1)


KeyError: 'rule_type'

In [43]:
print(fp_df.columns)


Index(['func', 'func_cleaned', 'project', 'label', 'graph_path', 'event_trace',
       'input', 'pred_label', 'pred_prob', 'event_trace_list', 'conflict',
       'conflict_reason'],
      dtype='object')


In [44]:
import ast

def safe_eval(val):
    """Safely eval if string-repr of list, else return as-is."""
    if isinstance(val, list):
        return val
    try:
        return ast.literal_eval(val)
    except Exception:
        return [val]  # fallback

# Randomly sample 10 FPs (assuming conflict == True and pred_label == 0)
sample = fp_df[(fp_df['conflict'] == True) & (fp_df['pred_label'] == 0)].sample(10, random_state=42)

for idx, row in sample.iterrows():
    conflict_reason = safe_eval(row['conflict_reason'])
    event_trace = safe_eval(row['event_trace_list'])

    print(f"\nRow: {idx}")
    print("Conflict Reason(s):")
    for r in conflict_reason:
        print("  -", r)
    print("Event Trace:")
    for e in event_trace:
        print("  -", e)
    print("="*60)



Row: 576
Conflict Reason(s):
  - Use of unknown/untrusted type in `        use <unknown> true as ANY` but predicted not vulnerable.
  - Use of unknown/untrusted type in `use <unknown> false as ANY` but predicted not vulnerable.
Event Trace:
  - declare uint64_t i
  - declare int sectors_per_bit
  - assign: sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta)
  - use sectors_per_bit as int
  - operator: <operator>.shiftLeft (1 << hbitmap_granularity(bitmap->meta))
  - literal: 1
  - call: hbitmap_granularity (hbitmap_granularity(bitmap->meta))
  - field_access: bitmap->meta
  - use bitmap as BdrvDirtyBitmap*
  - field: meta
  - for: for (i = sector;i < sector + nb_sectors;i += sectors_per_bit)
  -     if: "hbitmap_get(bitmap->meta, i)"
  -         return: return true;
  -         use <unknown> true as ANY
  - return: return false;
  - use <unknown> false as ANY
  - declare ANY false
  - declare ANY true

Row: 312
Conflict Reason(s):
  - Unsafe pointer assignment `assign: *reg = cfg

In [45]:
from collections import Counter
import re

unknown_type_pattern = re.compile(r"use <unknown> ([\w\->*]+) as ANY")
unsafe_pointer_pattern = re.compile(r"Unsafe pointer assignment")

unknown_types = []
pointer_fp = 0
total_fp = 0

for idx, row in fp_df.iterrows():
    reasons = row['conflict_reason']
    # If it's a string repr, use ast.literal_eval; if list, use directly
    if isinstance(reasons, str) and reasons.startswith("["):
        import ast
        reasons = ast.literal_eval(reasons)
    elif isinstance(reasons, str):
        reasons = [reasons]
    for r in reasons:
        total_fp += 1
        # Count pointer assignment overflags
        if unsafe_pointer_pattern.search(r):
            pointer_fp += 1
        # Extract unknown/untrusted types
        m = unknown_type_pattern.search(r)
        if m:
            unknown_types.append(m.group(1))

print("Total False Positives (conflicts):", total_fp)
print("Overflagged Pointer Assignments:", pointer_fp)
print("Unknown/Untrusted Types (top 10):")
for t, count in Counter(unknown_types).most_common(10):
    print(f"  {t:30s} : {count} times")


Total False Positives (conflicts): 8674
Overflagged Pointer Assignments: 995
Unknown/Untrusted Types (top 10):
  NULL                           : 694 times
  stderr                         : 135 times
  true                           : 133 times
  false                          : 110 times
  error_abort                    : 90 times
  cpu_env                        : 83 times
  EINVAL                         : 75 times
  cpu_gpr                        : 66 times
  errno                          : 60 times
  env                            : 58 times
