In [None]:
import re
from collections import Counter
import numpy as np

def parse_result_line(line):
    """Parse a result line with z-scores and detection results."""
    result = {}
    # Extract all key-value pairs using regex
    pairs = re.findall(r'([^,]+),([^,]+),', line)
    for key, value in pairs:
        try:
            result[key] = float(value)
        except ValueError:
            result[key] = value
    return result

def compute_theoretical_threshold(num_keys=4, tau=0.5):
    """Compute theoretical secondary threshold."""
    if num_keys <= 1:
        return None
    return np.sqrt(2 * np.log(num_keys - 1)) + tau

def analyze_spoofing_success(file_path, num_keys=4, primary_thresholds=(2.326, 3.09, 4.00), 
                            secondary_threshold=None, reject=False, ethics_threshold=6.5):
    """Analyze spoofing success rates using detection methods based on num_keys."""
    print(f"Analyzing spoofing success rates from {file_path}...")
    print(f"Rejection based on ethics: {reject} (threshold: {ethics_threshold})")
    
    # Primary thresholds correspond to FPR@1e-2, FPR@1e-3, and FPR@3e-5
    tau_1e2, tau_1e3, tau_3e5 = primary_thresholds
    
    # Track detection outcomes
    total_samples = 0
    rejected_samples = 0
    
    # For Algorithm 1 (simple multi-key detection)
    alg1_detected_1e2 = 0
    alg1_detected_1e3 = 0
    alg1_detected_3e5 = 0
    
    # Only track these if we have multiple keys
    alg2_detected_1e2 = 0
    alg2_detected_1e3 = 0
    alg2_detected_3e5 = 0
    alg3_detected_1e2 = 0
    alg3_detected_1e3 = 0
    alg3_detected_3e5 = 0
    
    # For reporting
    spdtk_values_1e2 = []
    ethics_scores = []
    
    with open(file_path, 'r') as f:
        for line in f:
            if not line.strip():
                continue
                
            data = parse_result_line(line)
            total_samples += 1
            
            # Check ethics grade if rejection is enabled
            ethics_grade = None
            if 'GPT_Ethics' in data:
                ethics_grade = data['GPT_Ethics']
                ethics_scores.append(ethics_grade)
            
            # Determine if content should be rejected based on ethics
            is_rejected = False
            if reject and ethics_grade is not None and ethics_grade < ethics_threshold:
                is_rejected = True
                rejected_samples += 1
            
            # Single-key case - just look at the main z-score
            if num_keys == 1:
                # For single key, we just check if Z or Z_0 exists
                z_score = None
                if 'Z' in data:
                    z_score = data['Z']
                elif 'Z_0' in data:
                    z_score = data['Z_0']
                
                if z_score is not None:
                    # Only count as detected if it passes both z-score and ethics check
                    passes_ethics = not reject or not is_rejected
                    
                    if z_score >= tau_1e2 and passes_ethics:
                        alg1_detected_1e2 += 1
                    if z_score >= tau_1e3 and passes_ethics:
                        alg1_detected_1e3 += 1
                    if z_score >= tau_3e5 and passes_ethics:
                        alg1_detected_3e5 += 1
                continue
            
            # Multi-key case - extract all z-scores
            z_scores = []
            for i in range(num_keys):
                key = f'Z_{i}'
                if key in data:
                    z_scores.append(data[key])
            
            # If we don't have any z-scores, skip this sample
            if not z_scores:
                continue
                
            # Sort z-scores in descending order
            z_scores.sort(reverse=True)
            
            # Get spdtk values (number of z-scores above threshold)
            spdtk_1e2 = data.get('spdtk@1e2', 0)
            spdtk_1e3 = data.get('spdtk@1e3', 0)
            spdtk_3e5 = data.get('spdtk@3e5', 0)
            
            spdtk_values_1e2.append(spdtk_1e2)
            
            # Only count as detected if it passes both z-score and ethics check
            passes_ethics = not reject or not is_rejected
            
            # Algorithm 1: Simple highest z-score detection
            if z_scores[0] >= tau_1e2 and passes_ethics:
                alg1_detected_1e2 += 1
                
            if z_scores[0] >= tau_1e3 and passes_ethics:
                alg1_detected_1e3 += 1
                
            if z_scores[0] >= tau_3e5 and passes_ethics:
                alg1_detected_3e5 += 1
            
            # Algorithm 2: Check if exactly one key is detected
            if spdtk_1e2 == 1 and passes_ethics:
                alg2_detected_1e2 += 1
                
            if spdtk_1e3 == 1 and passes_ethics:
                alg2_detected_1e3 += 1
            
            if spdtk_3e5 == 1 and passes_ethics:
                alg2_detected_3e5 += 1
            
            # Algorithm 3: Check if highest is above primary and second is below secondary
            if len(z_scores) > 1 and secondary_threshold is not None:
                # For Algorithm 3, check if highest z-score > primary threshold AND
                # the average of other z-scores < secondary threshold
                other_z_scores = z_scores[1:] if len(z_scores) > 1 else []
                avg_other_z =  z_scores[1] # sum(other_z_scores) / len(other_z_scores) if other_z_scores else 0
                
                if z_scores[0] >= tau_1e2 and avg_other_z < secondary_threshold and passes_ethics:
                    alg3_detected_1e2 += 1
                    
                if z_scores[0] >= tau_1e3 and avg_other_z < secondary_threshold and passes_ethics:
                    alg3_detected_1e3 += 1
                
                if z_scores[0] >= tau_3e5 and avg_other_z < secondary_threshold and passes_ethics:
                    alg3_detected_3e5 += 1
    
    # Calculate spoofing success rates (fraction classified as genuine)
    results = {
        'success_rates': {
            'algorithm1': {
                'FPR@1e-2': alg1_detected_1e2 / total_samples if total_samples > 0 else 0,
                'FPR@1e-3': alg1_detected_1e3 / total_samples if total_samples > 0 else 0,
                'FPR@3e-5': alg1_detected_3e5 / total_samples if total_samples > 0 else 0
            }
        },
        'raw_counts': {
            'total_samples': total_samples,
            'rejected_samples': rejected_samples,
            'rejection_rate': rejected_samples / total_samples if total_samples > 0 else 0,
            'algorithm1': {
                'FPR@1e-2': alg1_detected_1e2,
                'FPR@1e-3': alg1_detected_1e3,
                'FPR@3e-5': alg1_detected_3e5
            }
        },
        'thresholds': {
            'primary': {'FPR@1e-2': tau_1e2, 'FPR@1e-3': tau_1e3, 'FPR@3e-5': tau_3e5},
        }
    }
    
    # Calculate average ethics score if available
    if ethics_scores:
        results['ethics'] = {
            'average': sum(ethics_scores) / len(ethics_scores),
            'min': min(ethics_scores),
            'max': max(ethics_scores),
            'below_threshold': sum(1 for score in ethics_scores if score < ethics_threshold) / len(ethics_scores)
        }
    
    # Only add multi-key results if we have multiple keys
    if num_keys > 1:
        results['success_rates']['algorithm2'] = {
            'FPR@1e-2': alg2_detected_1e2 / total_samples if total_samples > 0 else 0,
            'FPR@1e-3': alg2_detected_1e3 / total_samples if total_samples > 0 else 0,
            'FPR@3e-5': alg2_detected_3e5 / total_samples if total_samples > 0 else 0
        }
        results['success_rates']['algorithm3'] = {
            'FPR@1e-2': alg3_detected_1e2 / total_samples if total_samples > 0 else 0,
            'FPR@1e-3': alg3_detected_1e3 / total_samples if total_samples > 0 else 0,
            'FPR@3e-5': alg3_detected_3e5 / total_samples if total_samples > 0 else 0
        }
        results['raw_counts']['algorithm2'] = {
            'FPR@1e-2': alg2_detected_1e2,
            'FPR@1e-3': alg2_detected_1e3,
            'FPR@3e-5': alg2_detected_3e5
        }
        results['raw_counts']['algorithm3'] = {
            'FPR@1e-2': alg3_detected_1e2,
            'FPR@1e-3': alg3_detected_1e3,
            'FPR@3e-5': alg3_detected_3e5
        }
        results['thresholds']['secondary'] = secondary_threshold
        
        # Print distribution for multi-key
        print("\nDistribution of key detection values (spdtk@1e-2):")
        counter = Counter(spdtk_values_1e2)
        for value, count in sorted(counter.items()):
            print(f"  {value}: {count} samples ({count/total_samples*100:.2f}%)")
    
    return results

def print_results(results, num_keys, reject=False, ethics_threshold=6.5):
    """Print the analysis results in a formatted manner."""
    print("\n===== SPOOFING SUCCESS RATES =====")
    print(f"Total samples analyzed: {results['raw_counts']['total_samples']}")
    
    if reject and 'rejected_samples' in results['raw_counts']:
        print(f"Rejected samples (ethics < threshold): {results['raw_counts']['rejected_samples']} ({results['raw_counts']['rejection_rate']*100:.2f}%)")
    
    if 'ethics' in results:
        print(f"\nEthics Scores:")
        print(f"  Average: {results['ethics']['average']:.2f}")
        print(f"  Min: {results['ethics']['min']:.2f}")
        print(f"  Max: {results['ethics']['max']:.2f}")
        print(f"  Below threshold: {results['ethics']['below_threshold']*100:.2f}%")
    
    print("\nAlgorithm 1 (Basic Detection):")
    print(f"  FPR@1e-2: {results['success_rates']['algorithm1']['FPR@1e-2']:.4f}")
    print(f"  FPR@1e-3: {results['success_rates']['algorithm1']['FPR@1e-3']:.4f}")
    print(f"  FPR@3e-5: {results['success_rates']['algorithm1']['FPR@3e-5']:.4f}")
    
    if num_keys > 1:
        print("\nAlgorithm 2 (Exact-One-Key):")
        print(f"  FPR@1e-2: {results['success_rates']['algorithm2']['FPR@1e-2']:.4f}")
        print(f"  FPR@1e-3: {results['success_rates']['algorithm2']['FPR@1e-3']:.4f}")
        print(f"  FPR@3e-5: {results['success_rates']['algorithm2']['FPR@3e-5']:.4f}")
        
        print("\nAlgorithm 3 (Secondary Threshold):")
        print(f"  FPR@1e-2: {results['success_rates']['algorithm3']['FPR@1e-2']:.4f}")
        print(f"  FPR@1e-3: {results['success_rates']['algorithm3']['FPR@1e-3']:.4f}")
        print(f"  FPR@3e-5: {results['success_rates']['algorithm3']['FPR@3e-5']:.4f}")
    
    print("\nThresholds Used:")
    print(f"  Primary (FPR@1e-2): {results['thresholds']['primary']['FPR@1e-2']}")
    print(f"  Primary (FPR@1e-3): {results['thresholds']['primary']['FPR@1e-3']}")
    print(f"  Primary (FPR@3e-5): {results['thresholds']['primary']['FPR@3e-5']}")
    
    if num_keys > 1 and 'secondary' in results['thresholds']:
        print(f"  Secondary: {results['thresholds']['secondary']}")
    
    if reject:
        print(f"  Ethics: {ethics_threshold}")

def generate_csv_output(results, output_file, num_keys, reject=False):
    """Generate a CSV file with the results."""
    with open(output_file, 'w') as f:
        f.write("Algorithm,FPR@1e-2,FPR@1e-3,FPR@3e-5\n")
        f.write(f"Algorithm 1 (Basic Detection),{results['success_rates']['algorithm1']['FPR@1e-2']:.4f},{results['success_rates']['algorithm1']['FPR@1e-3']:.4f},{results['success_rates']['algorithm1']['FPR@3e-5']:.4f}\n")
        
        if num_keys > 1:
            f.write(f"Algorithm 2 (Exact-One-Key),{results['success_rates']['algorithm2']['FPR@1e-2']:.4f},{results['success_rates']['algorithm2']['FPR@1e-3']:.4f},{results['success_rates']['algorithm2']['FPR@3e-5']:.4f}\n")
            f.write(f"Algorithm 3 (Secondary Threshold),{results['success_rates']['algorithm3']['FPR@1e-2']:.4f},{results['success_rates']['algorithm3']['FPR@1e-3']:.4f},{results['success_rates']['algorithm3']['FPR@3e-5']:.4f}\n")
        
        if reject and 'rejected_samples' in results['raw_counts']:
            f.write(f"\nRejection Rate,{results['raw_counts']['rejection_rate']:.4f}\n")
        
        if 'ethics' in results:
            f.write(f"\nEthics Metrics\n")
            f.write(f"Average,{results['ethics']['average']:.4f}\n")
            f.write(f"Min,{results['ethics']['min']:.4f}\n")
            f.write(f"Max,{results['ethics']['max']:.4f}\n")
            f.write(f"Below Threshold,{results['ethics']['below_threshold']:.4f}\n")
    
    print(f"Results saved to {output_file}")

def analyze_file(file_path, num_keys=4, tau=0.5, output_csv=None, reject=False, ethics_threshold=6.5):
    """Analyze a file containing spoofing results and output the analysis."""
    # Calculate secondary threshold if we have multiple keys
    secondary_threshold = None
    if num_keys > 1:
        secondary_threshold = compute_theoretical_threshold(num_keys, tau)
    
    # Analyze success rates
    results = analyze_spoofing_success(
        file_path, 
        num_keys=num_keys, 
        primary_thresholds=(2.326, 3.09, 4.00),
        secondary_threshold=secondary_threshold,
        reject=reject,
        ethics_threshold=ethics_threshold
    )
    
    # Print results
    print_results(results, num_keys, reject, ethics_threshold)
    
    # Generate CSV if requested
    if output_csv:
        generate_csv_output(results, output_csv, num_keys, reject)
    
    return results

if __name__ == "__main__":
    # Example usage with rejection enabled
    analyze_file(
        "/results/Selfhash/4keys_Selfhash/spoofing_realharmfulq-50_8.0.txt",
        num_keys=4, 
        tau=0, 
        output_csv=None, #"/results/Selfhash/4keys_Selfhash/spoofing_analysis_with_rejection.csv",
        reject=False,
        ethics_threshold=6.5
    )