# Scikit-learn Fuzzing Analysis

Explora cómo se comportan los baseline tests y el fuzzing generado para cada instancia de `scikit-learn`.

In [1]:
from pathlib import Path
import json
import pandas as pd
from IPython.display import display

RESULTS_DIR = Path("/fs/nexus-scratch/ihbas/verifier_harness/results")
RESULTS_DIR

PosixPath('/fs/nexus-scratch/ihbas/verifier_harness/results')

In [2]:
def load_scikit_learn_results(results_dir: Path = RESULTS_DIR) -> pd.DataFrame:
    rows = []
    for path in sorted(results_dir.glob('scikit-learn__scikit-learn-*.json')):
        data = json.loads(path.read_text())
        fuzzing = data.get('fuzzing', {})
        details = fuzzing.get('details', {})
        baseline_details = details.get('baseline_tests', {})
        fuzz_details = details.get('fuzzing_tests', {})

        rows.append({
            'instance_id': data.get('instance_id'),
            'result_path': str(path),
            'baseline_tests_passed': baseline_details.get('passed'),
            'baseline_returncode': baseline_details.get('returncode'),
            'baseline_test_count': baseline_details.get('count'),
            'baseline_coverage': fuzzing.get('baseline_coverage'),
            'fuzzing_tests_generated': fuzzing.get('tests_generated'),
            'fuzzing_passed': fuzzing.get('fuzzing_passed'),
            'fuzzing_returncode': fuzz_details.get('returncode'),
            'fuzzing_failures': len(fuzz_details.get('test_failures', []) or []),
            'combined_coverage': fuzzing.get('combined_coverage'),
            'divergences_detected': fuzzing.get('divergences_detected'),
            'overall_verdict': data.get('verdict'),
        })

    df = pd.DataFrame(rows)
    if not df.empty:
        df = df.sort_values('instance_id').reset_index(drop=True)
    return df

results_df = load_scikit_learn_results()
print(f'Total instancias analizadas: {len(results_df)}')
results_df.head()


Total instancias analizadas: 10


Unnamed: 0,instance_id,result_path,baseline_tests_passed,baseline_returncode,baseline_test_count,baseline_coverage,fuzzing_tests_generated,fuzzing_passed,fuzzing_returncode,fuzzing_failures,combined_coverage,divergences_detected,overall_verdict
0,scikit-learn__scikit-learn-10297,/fs/nexus-scratch/ihbas/verifier_harness/resul...,True,0,29,20.0,3,True,0,0,20.0,False,⚠️ WARNING
1,scikit-learn__scikit-learn-10844,/fs/nexus-scratch/ihbas/verifier_harness/resul...,True,0,17,66.666667,3,True,0,0,66.666667,False,✅ EXCELLENT
2,scikit-learn__scikit-learn-10908,/fs/nexus-scratch/ihbas/verifier_harness/resul...,True,0,48,66.666667,2,True,0,0,66.666667,False,✅ EXCELLENT
3,scikit-learn__scikit-learn-11310,/fs/nexus-scratch/ihbas/verifier_harness/resul...,True,0,51,64.285714,2,True,0,0,64.285714,False,✅ EXCELLENT
4,scikit-learn__scikit-learn-11578,/fs/nexus-scratch/ihbas/verifier_harness/resul...,True,0,94,100.0,2,True,0,0,100.0,False,✅ EXCELLENT


In [3]:
def summarize_results(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty:
        return pd.DataFrame()
    summary_rows = {
        'total_instances': len(df),
        'baseline_failures': int((df['baseline_tests_passed'] == False).sum()),
        'fuzzing_failures': int((df['fuzzing_passed'] == False).sum()),
        'divergences_detected': int(df['divergences_detected'].fillna(False).sum()),
    }
    return pd.DataFrame([summary_rows])

summary_df = summarize_results(results_df)
display(summary_df)

failing = results_df[(results_df['fuzzing_passed'] == False) | (results_df['baseline_tests_passed'] == False)]
print(f'Instancias con problemas: {len(failing)}')
display(failing[['instance_id', 'baseline_tests_passed', 'baseline_returncode', 'fuzzing_passed', 'fuzzing_returncode', 'fuzzing_failures', 'combined_coverage']])


Unnamed: 0,total_instances,baseline_failures,fuzzing_failures,divergences_detected
0,10,0,2,0


Instancias con problemas: 2


Unnamed: 0,instance_id,baseline_tests_passed,baseline_returncode,fuzzing_passed,fuzzing_returncode,fuzzing_failures,combined_coverage
6,scikit-learn__scikit-learn-12682,True,0,False,-9,0,0.0
8,scikit-learn__scikit-learn-13124,True,0,False,2,0,100.0


In [4]:
def inspect_instance(instance_id: str, df: pd.DataFrame = results_df, show_test_code: bool = False, max_lines: int = 200) -> dict:
    row = df[df['instance_id'] == instance_id]
    if row.empty:
        raise ValueError(f'Instance {instance_id} not found')
    path = Path(row.iloc[0]['result_path'])
    data = json.loads(path.read_text())
    fuzz_details = data['fuzzing']['details']
    print(f'Instance: {instance_id}')
    print(f"Baseline passed: {fuzz_details['baseline_tests']['passed']} (returncode={fuzz_details['baseline_tests']['returncode']})")
    print(f"Fuzzing passed: {data['fuzzing']['fuzzing_passed']} (returncode={fuzz_details['fuzzing_tests']['returncode']})")
    failures = fuzz_details['fuzzing_tests'].get('test_failures', []) or []
    print(f"Test failures captured: {len(failures)}")
    if failures:
        for failure in failures:
            print('--- Failure details ---')
            for key in ['test_name', 'exception_type', 'exception_message']:
                if failure.get(key):
                    print(f"{key}: {failure[key]}")
    generated_test_file = fuzz_details['fuzzing_tests'].get('generated_test_file')
    if generated_test_file:
        print(f"Generated test file: {generated_test_file}")
        if show_test_code:
            test_path = Path(generated_test_file)
            if test_path.exists():
                print('=== Test code preview ===')
                code_lines = test_path.read_text(encoding='utf-8').splitlines()
                preview = '\n'.join(code_lines[:max_lines])
                print(preview)
            else:
                print('(Test file not found on disk)')
    elif show_test_code:
        print('No generated test file recorded in results.')
    return fuzz_details

# Ejemplo: descomenta la siguiente línea para inspeccionar y mostrar el test
details = inspect_instance('scikit-learn__scikit-learn-12973', show_test_code=True, max_lines=120)


Instance: scikit-learn__scikit-learn-12973
Baseline passed: True (returncode=0)
Fuzzing passed: True (returncode=0)
Test failures captured: 0
Generated test file: /fs/nexus-scratch/ihbas/verifier_harness/fuzzing_results/scikit-learn__scikit-learn-12973_test_fuzzing_generated.py
=== Test code preview ===
# Auto-generated change-aware fuzzing tests for patch validation
import pytest
from hypothesis import given, strategies as st, settings
from hypothesis import assume
import sys
from pathlib import Path

# Import from patched module: sklearn.linear_model.least_angle
from sklearn.linear_model.least_angle import LassoLarsIC

@given(st.one_of(st.none(), st.integers(min_value=-100, max_value=100), st.text()), st.one_of(st.none(), st.integers(min_value=-100, max_value=100), st.text()), st.one_of(st.none(), st.integers(min_value=-100, max_value=100), st.text()))
@settings(max_examples=1000, deadline=2000)
def test_fit_boundaries(arg0, arg1, arg2):
    """Test boundary conditions for fit"""
   