In [1]:
from typing import List, Tuple

import numpy as np
from numpy import exp
import pandas as pd
from scipy.stats import spearmanr, gaussian_kde

import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

from result_analysis.helper_functions import process_csv_data, get_numeric_features

In [None]:
experiment_results = process_csv_data("path/to/fuzzing_output.csv"),
experiment_features = process_csv_data("path/to/features.csv"),

def merge_data(
    fuzzing_results: pd.DataFrame,
    satzilla_features: pd.DataFrame
) -> pd.DataFrame:
    """
    Merge invalid fuzzer instances with Satzilla features, based on their index
    (which should be set to 'instance_name' in both data frames).
    
    Args:
        fuzzing_results (pd.DataFrame): DataFrame of invalid fuzzer instances.
        satzilla_features (pd.DataFrame): DataFrame of Satzilla features.
    
    Returns:
        pd.DataFrame: Merged DataFrame, containing rows that appear in both.
    """
    if "instance_name" in fuzzing_results.columns:
        fuzzing_results = fuzzing_results.set_index("instance_name")
    if "instance_name" in satzilla_features.columns:
        satzilla_features = satzilla_features.set_index("instance_name")
    
    cols_to_drop = ['base_generator', 'presumed_difficulty', 'randomness', 'generator']
    fuzzing_results = fuzzing_results.drop(columns=cols_to_drop)
    
    merged_df = fuzzing_results.join(satzilla_features, how="inner")
    return merged_df

merged_data = merge_data(experiment_results, experiment_features)

print(merged_data["generator"].value_counts())
print(merged_data["counter"].value_counts())

In [None]:
def fix_NaN_columns(df: pd.DataFrame) -> pd.DataFrame:
    """
    Fix NaN values in satisfiability column based on count_value:
    - If count_value > 0, set satisfiability to 'SATISFIABLE'
    - If count_value = 0, set satisfiability to 'UNSATISFIABLE'
    Only affects rows where satisfiability is NaN but count_value is not NaN.
    
    Args:
        df (pd.DataFrame): DataFrame containing satisfiability and count_value columns
        
    Returns:
        pd.DataFrame: DataFrame with fixed satisfiability values
    """
    df = df.copy()
    
    df['count_value'] = pd.to_numeric(df['count_value'], errors='coerce')
    
    mask = df['satisfiability'].isna() & df['count_value'].notna()
    
    df.loc[mask & (df['count_value'] > 0), 'satisfiability'] = 'SATISFIABLE'
    df.loc[mask & (df['count_value'] == 0), 'satisfiability'] = 'UNSATISFIABLE'
    
    return df

fix_NaN_columns(merged_data)

In [4]:
def classify_with_cpog(merged_df: pd.DataFrame) -> List[pd.DataFrame]:
    """
    Classify fuzzing results using CPOG verification.
    """
    correct_mask = merged_df["count_matches"] == True
    correct_results = merged_df[correct_mask].copy()
    remaining_df = merged_df[~correct_mask].copy()
    
    incorrect_mask = (remaining_df["count_matches"] == False) & (remaining_df["cpog_message"] == "NO ERROR")
    incorrect_results = remaining_df[incorrect_mask].copy()
    remaining_df = remaining_df[~incorrect_mask].copy()
    
    cpog_errors_list = []
    if not remaining_df.empty:
        instance_groups = remaining_df.groupby(level=0)
        additional_incorrect = pd.DataFrame()
        consistent_cpog_errors = pd.DataFrame()
        
        for _, group in instance_groups:
            unique_counts = group["count_value"].unique()
            if len(unique_counts) > 1:
                count_value_counts = group["count_value"].value_counts()
                majority_count = count_value_counts.index[0]
                incorrect_mask = group["count_value"] != majority_count
                additional_incorrect = pd.concat([additional_incorrect, group[incorrect_mask]])
            else:
                consistent_cpog_errors = pd.concat([consistent_cpog_errors, group])
        
        incorrect_results = pd.concat([incorrect_results, additional_incorrect])
        
        if not consistent_cpog_errors.empty:
            for msg in consistent_cpog_errors["cpog_message"].unique():
                if pd.notna(msg):
                    subset = consistent_cpog_errors[consistent_cpog_errors["cpog_message"] == msg].copy()
                    cpog_errors_list.append(subset)
    
    return [correct_results, incorrect_results] + cpog_errors_list

def classify_with_majority_vote(merged_df: pd.DataFrame) -> List[pd.DataFrame]:
    """
    Classify fuzzing results using majority vote across counters.
    """
    instance_groups = merged_df.groupby(level=0)
    majority_votes = {}
    
    for instance_name, group in instance_groups:
        count_value_counts = group["count_value"].value_counts()
        majority_count = count_value_counts.index[0]
        majority_votes[instance_name] = majority_count
    
    merged_df["correct_count_majority_vote"] = merged_df.index.map(majority_votes)
    merged_df["count_matches_majority"] = merged_df["count_value"] == merged_df["correct_count_majority_vote"]
    
    correct_mask = merged_df["count_matches_majority"] == True
    correct_results = merged_df[correct_mask].copy()
    incorrect_results = merged_df[~correct_mask].copy()
    
    return [correct_results, incorrect_results]
def classify_fuzzing_results(merged_df: pd.DataFrame) -> Tuple[List[pd.DataFrame], pd.DataFrame]:
    """
    Classify fuzzing results into categories and add classification labels.
    
    Args:
        merged_df (pd.DataFrame): DataFrame containing fuzzing results
        
    Returns:
        Tuple[List[pd.DataFrame], pd.DataFrame]: A tuple containing:
        1. List of DataFrames for each category:
           - timeout_results
           - crash_results
           - correct_results
           - incorrect_results
           followed by DataFrames for each CPOG error message
        2. Original DataFrame with added 'result_classification' column
    """
    df = merged_df.copy()
    df['result_classification'] = 'unknown'
    
    timeout_mask = df["timed_out"] == True if "timed_out" in df.columns else pd.Series(False, index=df.index)
    timeout_results = df[timeout_mask].copy()
    df.loc[timeout_mask, 'result_classification'] = 'timeout'
    
    crash_mask = df["count_value"].isna() & ~timeout_mask
    crash_results = df[crash_mask].copy()
    df.loc[crash_mask, 'result_classification'] = 'crash'
    
    remaining_mask = ~timeout_mask & ~crash_mask
    remaining_df = df[remaining_mask].copy()
    
    has_cpog = all(col in remaining_df.columns for col in ["cpog_message", "cpog_count", "count_matches"])
    
    if has_cpog:
        classification_results = classify_with_cpog(remaining_df)
        correct_results, incorrect_results = classification_results[:2]
        cpog_error_results = classification_results[2:]
        
        for result_df, label in [(correct_results, 'correct'), 
                               (incorrect_results, 'incorrect')]:
            result_mask = df.index.isin(result_df.index)
            df.loc[result_mask & remaining_mask, 'result_classification'] = label
        
        for error_df in cpog_error_results:
            if not error_df.empty:
                error_msg = error_df['cpog_message'].iloc[0]
                error_mask = df.index.isin(error_df.index)
                df.loc[error_mask & remaining_mask, 'result_classification'] = f'cpog_error_{error_msg}'
        
        results = [timeout_results, crash_results, correct_results, incorrect_results] + cpog_error_results
    else:
        classification_results = classify_with_majority_vote(remaining_df)
        correct_results, incorrect_results = classification_results
        
        for result_df, label in [(correct_results, 'correct'), 
                               (incorrect_results, 'incorrect')]:
            result_mask = df.index.isin(result_df.index)
            df.loc[result_mask & remaining_mask, 'result_classification'] = label
        
        results = [timeout_results, crash_results, correct_results, incorrect_results]
    
    return results, df

In [None]:
(results, classified_df) = classify_fuzzing_results(merged_data)
timeout_results, crash_results, correct_results, incorrect_results = results[:4]
cpog_subsets = results[4:] if len(results) > 4 else []

print("Overall Statistics:")
print("-" * 20)
print(f"Timeouts: {len(timeout_results)}")
print(f"Crashes: {len(crash_results)}")
print(f"Correct: {len(correct_results)}")
print(f"Incorrect: {len(incorrect_results)}")

print("\nBreakdown by Generator:")
print("-" * 20)
for generator in sorted(merged_data['generator'].unique()):
    print(f"\nGenerator: {generator}")
    gen_mask = merged_data['generator'] == generator
    print(f"Timeouts: {len(timeout_results[timeout_results['generator'] == generator])}")
    print(f"Crashes: {len(crash_results[crash_results['generator'] == generator])}")
    print(f"Correct: {len(correct_results[correct_results['generator'] == generator])}")
    print(f"Incorrect: {len(incorrect_results[incorrect_results['generator'] == generator])}")

print("\nBreakdown by Solver:")
print("-" * 20)
for solver in sorted(merged_data['counter'].unique()):
    print(f"\nSolver: {solver}")
    solver_mask = merged_data['counter'] == solver
    print(f"Timeouts: {len(timeout_results[timeout_results['counter'] == solver])}")
    print(f"Crashes: {len(crash_results[crash_results['counter'] == solver])}")
    print(f"Correct: {len(correct_results[correct_results['counter'] == solver])}")
    print(f"Incorrect: {len(incorrect_results[incorrect_results['counter'] == solver])}")

if cpog_subsets:
    print("\nCPOG error categories:")
    print("-" * 20)
    for subset in cpog_subsets:
        msg = subset['cpog_message'].iloc[0]
        print(f"\nCPOG error '{msg}':")
        
        print("\nBy Generator:")
        for generator in sorted(subset['generator'].unique()):
            count = len(subset[subset['generator'] == generator])
            print(f"{generator}: {count} instances")
            
        print("\nBy Solver:")
        for solver in sorted(subset['counter'].unique()):
            count = len(subset[subset['counter'] == solver])
            print(f"{solver}: {count} instances")
else:
    print("\nUsing majority vote classification")

In [None]:
avg_solve_time = classified_df.groupby('generator')['solve_time'].mean().sort_values()

plt.figure(figsize=(12, 6))
sns.barplot(x=avg_solve_time.index, y=avg_solve_time.values, palette='viridis', width=0.4)
plt.xticks(rotation=45, ha='right', fontsize=16)
plt.xlabel('Generator', fontsize=16)
plt.ylabel('Average Solve Time', fontsize=18) 
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tight_layout()
plt.show()

In [None]:
avg_count_value = classified_df.groupby('generator')['count_value'].mean().sort_values()

plt.figure(figsize=(12, 6))
sns.barplot(x=avg_count_value.index, y=avg_count_value.values, palette='viridis', width=0.4)
plt.yscale('log')
plt.xticks(rotation=45, ha='right', fontsize=16)
plt.xlabel('Generator', fontsize=16)
plt.ylabel('Average Count Value (log scale)', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.tight_layout()
plt.show()

In [8]:
feature_categories = {
   'Problem Size': ['nvars', 'nclauses', 'vars-clauses-ratio', 'nvarsOrig', 'nclausesOrig'],
   'Variable-Clause Graph': ['VCG-VAR-mean', 'VCG-VAR-coeff-variation', 'VCG-VAR-min', 'VCG-VAR-max', 'VCG-VAR-entropy', 
                             'VCG-CLAUSE-mean', 'VCG-CLAUSE-coeff-variation', 'VCG-CLAUSE-min', 'VCG-CLAUSE-max', 'VCG-CLAUSE-entropy'],
   'Variable Graph': ['VG-mean', 'VG-coeff-variation', 'VG-min', 'VG-max'],
   'Clause Graph': ['CG-mean', 'CG-coeff-variation', 'CG-min', 'CG-max', 'CG-entropy',
                    'cluster-coeff-mean', 'cluster-coeff-coeff-variation', 'cluster-coeff-min', 'cluster-coeff-max', 'cluster-coeff-entropy'],
   'Balance': ['POSNEG-RATIO-CLAUSE-mean', 'POSNEG-RATIO-CLAUSE-coeff-variation', 'POSNEG-RATIO-CLAUSE-min', 'POSNEG-RATIO-CLAUSE-max', 'POSNEG-RATIO-CLAUSE-entropy',
              'POSNEG-RATIO-VAR-mean', 'POSNEG-RATIO-VAR-stdev', 'POSNEG-RATIO-VAR-min', 'POSNEG-RATIO-VAR-max', 'POSNEG-RATIO-VAR-entropy',
              'UNARY', 'BINARY+', 'TRINARY+'],
   'Horn Formula': ['horn-clauses-fraction', 'HORNY-VAR-mean', 'HORNY-VAR-coeff-variation', 'HORNY-VAR-min', 'HORNY-VAR-max', 'HORNY-VAR-entropy'],
   'Other Features': ['Pre-featuretime', 'Basic-featuretime', 'KLB-featuretime', 'CG-featuretime', 'solved', 'generator', 'seed', 'base_generator', 'presumed_difficulty', 'randomness']
}


In [None]:
def compare_horn_impact(df, base_gen, horn_gen, feature_categories):
   df1 = df[df['generator'] == base_gen].copy()
   df2 = df[df['generator'] == horn_gen].copy()
   feature_cols = get_numeric_features(df)
   
   results = []
   for feature in feature_cols:
       if df1[feature].isna().any() or df2[feature].isna().any():
           continue
           
       if df1[feature].std() == 0 or df2[feature].std() == 0:
           continue
           
       try:
           mean_diff = df2[feature].mean() - df1[feature].mean()
           if df1[feature].mean() != 0:
               mean_diff_pct = mean_diff / df1[feature].mean() * 100
           else:
               mean_diff_pct = float('inf')
           
           combined_data = pd.concat([df1, df2])
           corr = spearmanr(combined_data[feature], combined_data['solve_time'])
           
           category = 'Other'
           for cat, features in feature_categories.items():
               if feature in features:
                   category = cat
                   break
                   
           results.append({
               'feature': feature,
               'category': category,
               'mean_difference': mean_diff,
               'percent_change': mean_diff_pct,
               'solve_time_correlation': corr.correlation,
               'correlation_p_value': corr.pvalue
           })
       except:
           continue

   return pd.DataFrame(results)

fuzz_results = compare_horn_impact(
   classified_df,
   'FuzzSAT-hard-100',
   'FuzzSATHORN-hard-100',
   feature_categories
)

pair_results = compare_horn_impact(
   classified_df,
   'PairSAT-hard-100',
   'PairSATHORN-hard-100',
   feature_categories
)

sig_fuzz = fuzz_results[(fuzz_results['correlation_p_value'] < 0.05) & 
                       (abs(fuzz_results['solve_time_correlation']) > 0.4)]
sig_pair = pair_results[(pair_results['correlation_p_value'] < 0.05) & 
                       (abs(pair_results['solve_time_correlation']) > 0.4)]

common_features = set(sig_fuzz['feature']) & set(sig_pair['feature'])

print("Features with significant correlation in both FuzzSAT and PairSAT:\n")
for category in feature_categories.keys():
   cat_features = [f for f in common_features if f in feature_categories[category]]
   if cat_features:
       print(f"\n{category}:")
       print("=" * 80)
       for feature in cat_features:
           fuzz_corr = sig_fuzz[sig_fuzz['feature'] == feature]['solve_time_correlation'].iloc[0]
           pair_corr = sig_pair[sig_pair['feature'] == feature]['solve_time_correlation'].iloc[0]
           print(f"{feature:40} FuzzSAT: {fuzz_corr:6.3f}  PairSAT: {pair_corr:6.3f}")

In [10]:
def get_behavior_probabilities(solver_data: pd.DataFrame) -> dict:
   """
   Calculate probability distribution of behaviors for a generator.
   
   Args:
       solver_data: DataFrame with result_classification column
       
   Returns:
       dict: Probabilities for each behavior type
   """
   total = len(solver_data)
   behavior_counts = solver_data['result_classification'].value_counts()
   
   return {
       'correct': behavior_counts.get('correct', 0) / total,
       'incorrect': behavior_counts.get('incorrect', 0) / total,
       'timeout': behavior_counts.get('timeout', 0) / total,
       'crash': behavior_counts.get('crash', 0) / total
   }

def compute_similarity(behavior1, behavior2, count1, count2, time1, time2,
                     gen1_solver_data, gen2_solver_data):
   """
   Compute similarity between two instances based on behavior, count and time.
   
   Args:
       behavior1, behavior2: Classification results
       count1, count2: Count values 
       time1, time2: Solve times
       gen1_solver_data, gen2_solver_data: DataFrames with solver data
       
   Returns:
       float: Similarity score between 0 and 1
   """
   if behavior1 != behavior2:
       return 0.0
       
   if behavior1 == "timeout" and behavior2 == "timeout":
       gen1_probs = get_behavior_probabilities(gen1_solver_data)
       gen2_probs = get_behavior_probabilities(gen2_solver_data)
       
       return (gen1_probs['correct'] * gen2_probs['correct'] +
               gen1_probs['incorrect'] * gen2_probs['incorrect'])
       
   if behavior1 == "crash" and behavior2 == "crash":
       if time1 is None or time2 is None:
           return 0.0
       return exp(-0.1 * abs(time1 - time2))
       
   if count1 is None or count2 is None or time1 is None or time2 is None:
       return 0.0
       
   return (0.3 * exp(-0.1 * abs(count1 - count2)) + 
           0.7 * exp(-0.1 * abs(time1 - time2)))

def calculate_behavioral_similarity(classified_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
   """
   Calculate behavioral similarity between generators.
   
   Args:
       classified_df: DataFrame with generator, counter, result_classification,
                     count_value and solve_time columns
   
   Returns:
       similarity_matrix: DataFrame with pairwise generator similarities
       stats_df: DataFrame with detailed similarity statistics
   """
   generators = sorted(classified_df['generator'].unique())
   solvers = sorted(classified_df['counter'].unique())
   
   similarity_matrix = pd.DataFrame(np.zeros((len(generators), len(generators))),
                                  index=generators, columns=generators)
   
   similarity_stats = []
   
   for i, gen1 in enumerate(generators):
       for j, gen2 in enumerate(generators[i:], i):
           if gen1 == gen2:
               similarity_matrix.loc[gen1, gen2] = 1.0
               continue
               
           instance_similarities = []
           
           for solver in solvers:
               gen1_solver_data = classified_df[
                   (classified_df['generator'] == gen1) & 
                   (classified_df['counter'] == solver)
               ]
               
               gen2_solver_data = classified_df[
                   (classified_df['generator'] == gen2) & 
                   (classified_df['counter'] == solver)
               ]
               
               gen1_instances = gen1_solver_data.index.unique()
               gen2_instances = gen2_solver_data.index.unique()
               
               gen1_behaviors = dict(zip(gen1_solver_data.index, 
                                       gen1_solver_data['result_classification']))
               gen2_behaviors = dict(zip(gen2_solver_data.index, 
                                       gen2_solver_data['result_classification']))
               gen1_counts = dict(zip(gen1_solver_data.index,
                                    gen1_solver_data['count_value']))
               gen2_counts = dict(zip(gen2_solver_data.index,
                                    gen2_solver_data['count_value']))
               gen1_times = dict(zip(gen1_solver_data.index,
                                   gen1_solver_data['solve_time']))
               gen2_times = dict(zip(gen2_solver_data.index,
                                   gen2_solver_data['solve_time']))
               
               for inst1 in gen1_instances:
                   for inst2 in gen2_instances:
                       similarity = compute_similarity(
                           gen1_behaviors[inst1], gen2_behaviors[inst2],
                           gen1_counts[inst1], gen2_counts[inst2],
                           gen1_times[inst1], gen2_times[inst2],
                           gen1_solver_data, gen2_solver_data
                       )
                       instance_similarities.append(similarity)
           
           if instance_similarities:
               stats = {
                   'generator1': gen1,
                   'generator2': gen2,
                   'mean_similarity': np.mean(instance_similarities),
                   'median_similarity': np.median(instance_similarities),
                   'min_similarity': np.min(instance_similarities),
                   'max_similarity': np.max(instance_similarities),
                   'std_similarity': np.std(instance_similarities),
                   'q25_similarity': np.percentile(instance_similarities, 25),
                   'q75_similarity': np.percentile(instance_similarities, 75),
                   'count': len(instance_similarities)
               }
               
               consistency = 1 - (stats['max_similarity'] - stats['min_similarity'])
               final_score = 0.7 * stats['mean_similarity'] + 0.3 * consistency
               
               similarity_matrix.loc[gen1, gen2] = final_score
               similarity_matrix.loc[gen2, gen1] = final_score
               
               similarity_stats.append(stats)
               reversed_stats = stats.copy()
               reversed_stats['generator1'], reversed_stats['generator2'] = gen2, gen1
               similarity_stats.append(reversed_stats)
   
   return similarity_matrix, pd.DataFrame(similarity_stats)

similarity_matrix, similarity_stats = calculate_behavioral_similarity(classified_df)

In [None]:
def get_text_color(val, vmin=0, vmax=1):
    """
    Determine text color based on background brightness.
    Returns white for dark backgrounds and black for light backgrounds.
    """
    threshold = (vmin + vmax) / 2
    return 'white' if val < threshold else 'black'

def sort_key(x):
    base = x.split('-')[0]
    diff = x.split('-')[1]
    rand = int(x.split('-')[2])
    diff_order = {'easy': 0, 'hard': 1}
    return (base, diff_order[diff], rand)

sorted_generators = sorted(similarity_matrix.index, key=sort_key)
similarity_matrix = similarity_matrix.loc[sorted_generators, sorted_generators]

plt.figure(figsize=(20, 20))
plt.rcParams.update({
    'font.size': 24,
    'axes.titlesize': 30,
    'axes.labelsize': 28
})

ax = plt.axes([0.1, 0.1, 0.8, 0.8])
mask = np.triu(np.ones_like(similarity_matrix), k=1)

sns.heatmap(
    similarity_matrix,
    annot=False,
    cmap='viridis',
    vmin=0,
    vmax=1,
    center=0.5,
    mask=mask,
    square=True,
    cbar=False,
    ax=ax
)

for i in range(len(similarity_matrix)):
    for j in range(len(similarity_matrix)):
        if not mask[i, j]:
            val = similarity_matrix.iloc[i, j]
            text_color = get_text_color(val)
            ax.text(j + 0.5, i + 0.5, f'{val:.3f}',
                   ha='center', va='center',
                   size=22, color=text_color)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', va='top', fontsize=20)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=20)

plt.gcf().set_size_inches(20, 20)
plt.tight_layout()
plt.show()

In [None]:
def analyze_instance_similarity_features(classified_df: pd.DataFrame, similarity_matrix: pd.DataFrame, 
                                       similarity_stats: pd.DataFrame):
    df = classified_df.copy()
    feature_cols = get_numeric_features(df)
    feature_similarities = []
    behavioral_similarities = []
    
    for _, row in similarity_stats.iterrows():
        gen1_data = df[df['generator'] == row['generator1']][feature_cols]
        gen2_data = df[df['generator'] == row['generator2']][feature_cols]
        
        gen1_means = gen1_data.mean(skipna=True)
        gen2_means = gen2_data.mean(skipna=True)
        gen1_stds = gen1_data.std(skipna=True)
        gen2_stds = gen2_data.std(skipna=True)
        
        valid_features = ~(gen1_means.isna() | gen2_means.isna() | gen1_stds.isna() | gen2_stds.isna())
        feature_sim = pd.Series(0.0, index=feature_cols)
        feature_sim[valid_features] = exp(-0.1 * abs(gen1_means[valid_features] - gen2_means[valid_features])) * \
                                    exp(-0.1 * abs(gen1_stds[valid_features] - gen2_stds[valid_features]))
        
        feature_similarities.append(feature_sim)
        behavioral_similarities.append(row['mean_similarity'])
    
    feature_similarities_df = pd.DataFrame(feature_similarities)
    correlations = []
    
    for feature in feature_cols:
        if feature_similarities_df[feature].isna().any():
            continue
        correlation = spearmanr(feature_similarities_df[feature], behavioral_similarities, nan_policy='omit')
        correlations.append({
            'feature': feature,
            'correlation': correlation.correlation,
            'p_value': correlation.pvalue
        })
    
    corr_df = pd.DataFrame(correlations)
    corr_df['abs_correlation'] = abs(corr_df['correlation'])
    
    significant = corr_df[
        (corr_df['p_value'] < 0.05) & 
        (corr_df['abs_correlation'] >= 0.2)
    ].sort_values('abs_correlation', ascending=True)
    
    plt.figure(figsize=(12, 12))
    plt.barh(range(len(significant)), significant['correlation'], 
            color=plt.cm.viridis(0.5))
    plt.yticks(range(len(significant)), significant['feature'], fontsize=18)
    plt.xticks(fontsize=16)
    plt.xlabel('Spearman Correlation with Similarity', fontsize=18)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    return significant

raw_features_df = analyze_instance_similarity_features(classified_df, similarity_matrix, similarity_stats)