From 831d233fda83b2ddff96b5e75b703404322e6488 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 18:02:05 -0400
Subject: [PATCH 1/6] Add Benchmark speed test report

---
 Sources/DiarizationCLI/main.swift | 853 +++++++++++++++++++++++++++++-
 1 file changed, 840 insertions(+), 13 deletions(-)
diff --git a/Sources/DiarizationCLI/main.swift b/Sources/DiarizationCLI/main.swift
index bc78b28a9..15842ae72 100644
--- a/Sources/DiarizationCLI/main.swift
+++ b/Sources/DiarizationCLI/main.swift
@@ -43,6 +43,8 @@ struct DiarizationCLI {
                 benchmark    Run AMI SDM benchmark evaluation with real annotations
                 process      Process a single audio file
                 download     Download datasets for benchmarking
+                speedtest    Run end-to-end pipeline speed test
+                batchspeedtest Run batch speed test
                 help         Show this help message
 
             BENCHMARK OPTIONS:
@@ -55,7 +57,11 @@ struct DiarizationCLI {
                 --debug                 Enable debug mode
                 --output <file>         Output results to JSON file
                 --auto-download         Automatically download dataset if not found
-                
+                --speed-test            Enable speed testing with timing measurements
+                --speed-iterations <int> Number of speed test iterations [default: 3]
+                --speed-warmup <int>    Number of warmup runs for speed test [default: 1]
+                --detailed-timing       Show detailed per-component timing breakdown
+
             NOTE: Benchmark now uses real AMI manual annotations from Tests/ami_public_1.6.2/
                   If annotations are not found, falls back to simplified placeholder.
 
@@ -69,25 +75,72 @@ struct DiarizationCLI {
                 --dataset <name>     Dataset to download (ami-sdm, ami-ihm, all) [default: all]
                 --force             Force re-download even if files exist
 
+            SPEEDTEST OPTIONS:
+                <audio-file>         Audio file to test (.wav, .m4a, .mp3)
+                --iterations <int>   Number of test iterations [default: 5]
+                --warmup <int>       Number of warmup runs [default: 2]
+                --threshold <float>  Clustering threshold 0.0-1.0 [default: 0.7]
+                --min-duration-on <float>   Minimum speaker segment duration in seconds [default: 1.0]
+                --min-duration-off <float>  Minimum silence between speakers in seconds [default: 0.5]
+                --min-activity <float>      Minimum activity threshold in frames [default: 10.0]
+                --output <file>      Output results to JSON file
+                --debug              Enable debug mode
+                --detailed           Show detailed per-component timing
+
+            NOTE: Benchmark now uses real AMI manual annotations from Tests/ami_public_1.6.2/
+                  If annotations are not found, falls back to simplified placeholder.
+
+            BATCH SPEEDTEST OPTIONS:
+                --files <file1,file2,...>  Comma-separated list of audio files to test
+                --iterations <int>        Number of test iterations [default: 3]
+                --warmup <int>           Number of warmup runs [default: 1]
+                --threshold <float>       Clustering threshold 0.0-1.0 [default: 0.7]
+                --min-duration-on <float> Minimum speaker segment duration in seconds [default: 1.0]
+                --min-duration-off <float> Minimum silence between speakers in seconds [default: 0.5]
+                --min-activity <float>      Minimum activity threshold in frames [default: 10.0]
+                --debug                   Enable debug mode
+                --output <file>           Output results to JSON file
+                --detailed              Show detailed per-file timing
+
             EXAMPLES:
                 # Download AMI datasets
                 swift run fluidaudio download --dataset ami-sdm
-                
+
                 # Run AMI SDM benchmark with auto-download
                 swift run fluidaudio benchmark --auto-download
-                
+
                 # Run benchmark with custom threshold and save results
                 swift run fluidaudio benchmark --threshold 0.8 --output results.json
-                
+
+                # Run benchmark with speed testing enabled
+                swift run fluidaudio benchmark --speed-test --speed-iterations 5 --detailed-timing
+
+                # Run benchmark with both accuracy and speed testing
+                swift run fluidaudio benchmark --speed-test --threshold 0.7 --output comprehensive_results.json
+
                 # Process a single audio file
                 swift run fluidaudio process meeting.wav
-                
+
                 # Process file with custom settings
                 swift run fluidaudio process meeting.wav --threshold 0.6 --output output.json
+
+                # Run speed test on audio file
+                swift run fluidaudio speedtest meeting.wav
+
+                # Run speed test with custom iterations and detailed timing
+                swift run fluidaudio speedtest meeting.wav --iterations 10 --warmup 3 --detailed
+
+                # Run speed test with custom parameters and save results
+                swift run fluidaudio speedtest meeting.wav --threshold 0.8 --output speed_results.json
+
+                # Run batch speed test on multiple files
+                swift run fluidaudio batchspeedtest --files test1.wav,test2.wav --iterations 5 --warmup 2 --detailed
             """)
     }
 
     static func runBenchmark(arguments: [String]) async {
+        let benchmarkStartTime = Date()
+
         var dataset = "ami-sdm"
         var threshold: Float = 0.7
         var minDurationOn: Float = 1.0
@@ -189,6 +242,9 @@ struct DiarizationCLI {
             print("💡 Supported datasets: ami-sdm, ami-ihm")
             exit(1)
         }
+
+        let benchmarkElapsed = Date().timeIntervalSince(benchmarkStartTime)
+        print("\n⏱️ Total benchmark execution time: \(String(format: "%.1f", benchmarkElapsed)) seconds")
     }
 
     static func downloadDataset(arguments: [String]) async {
@@ -326,6 +382,560 @@ struct DiarizationCLI {
         }
     }
 
+    static func runSpeedTest(arguments: [String]) async {
+        guard !arguments.isEmpty else {
+            print("❌ No audio file specified")
+            printUsage()
+            exit(1)
+        }
+
+        let audioFile = arguments[0]
+
+        // Check for help flags first
+        if audioFile == "--help" || audioFile == "-h" {
+            printUsage()
+            return
+        }
+
+        var iterations = 5
+        var warmupRuns = 2
+        var threshold: Float = 0.7
+        var minDurationOn: Float = 1.0
+        var minDurationOff: Float = 0.5
+        var minActivityThreshold: Float = 10.0
+        var debugMode = false
+        var outputFile: String?
+        var detailedTiming = false
+
+        // Parse remaining arguments
+        var i = 1
+        while i < arguments.count {
+            switch arguments[i] {
+            case "--iterations":
+                if i + 1 < arguments.count {
+                    iterations = Int(arguments[i + 1]) ?? 5
+                    i += 1
+                }
+            case "--warmup":
+                if i + 1 < arguments.count {
+                    warmupRuns = Int(arguments[i + 1]) ?? 2
+                    i += 1
+                }
+            case "--threshold":
+                if i + 1 < arguments.count {
+                    threshold = Float(arguments[i + 1]) ?? 0.7
+                    i += 1
+                }
+            case "--min-duration-on":
+                if i + 1 < arguments.count {
+                    minDurationOn = Float(arguments[i + 1]) ?? 1.0
+                    i += 1
+                }
+            case "--min-duration-off":
+                if i + 1 < arguments.count {
+                    minDurationOff = Float(arguments[i + 1]) ?? 0.5
+                    i += 1
+                }
+            case "--min-activity":
+                if i + 1 < arguments.count {
+                    minActivityThreshold = Float(arguments[i + 1]) ?? 10.0
+                    i += 1
+                }
+            case "--debug":
+                debugMode = true
+            case "--output":
+                if i + 1 < arguments.count {
+                    outputFile = arguments[i + 1]
+                    i += 1
+                }
+            case "--detailed":
+                detailedTiming = true
+            default:
+                print("⚠️ Unknown option: \(arguments[i])")
+            }
+            i += 1
+        }
+
+        print("⚡ Starting End-to-End Pipeline Speed Test")
+        print("   Audio file: \(audioFile)")
+        print("   Iterations: \(iterations)")
+        print("   Warmup runs: \(warmupRuns)")
+        print("   Clustering threshold: \(threshold)")
+        print("   Min duration on: \(minDurationOn)s")
+        print("   Min duration off: \(minDurationOff)s")
+        print("   Min activity threshold: \(minActivityThreshold)")
+        print("   Debug mode: \(debugMode ? "enabled" : "disabled")")
+        print("   Detailed timing: \(detailedTiming ? "enabled" : "disabled")")
+
+        let config = DiarizerConfig(
+            clusteringThreshold: threshold,
+            minDurationOn: minDurationOn,
+            minDurationOff: minDurationOff,
+            minActivityThreshold: minActivityThreshold,
+            debugMode: debugMode
+        )
+
+        let manager = DiarizerManager(config: config)
+
+        do {
+            try await manager.initialize()
+            print("✅ Models initialized successfully")
+        } catch {
+            print("❌ Failed to initialize models: \(error)")
+            print("💡 Make sure you have network access for model downloads")
+            exit(1)
+        }
+
+        // Load audio file once
+        let audioSamples: [Float]
+        do {
+            audioSamples = try await loadAudioFile(path: audioFile)
+            let duration = Float(audioSamples.count) / 16000.0
+            print("✅ Loaded audio: \(audioSamples.count) samples (\(String(format: "%.1f", duration))s)")
+        } catch {
+            print("❌ Failed to load audio file: \(error)")
+            exit(1)
+        }
+
+        // Run warmup iterations
+        print("\n🔥 Running \(warmupRuns) warmup iterations...")
+        for i in 1...warmupRuns {
+            print("   Warmup \(i)/\(warmupRuns)...")
+            do {
+                let _ = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
+            } catch {
+                print("   ⚠️ Warmup \(i) failed: \(error)")
+            }
+        }
+
+        // Run actual speed test iterations
+        print("\n⚡ Running \(iterations) speed test iterations...")
+        var timingResults: [SpeedTestResult] = []
+        let duration = Float(audioSamples.count) / 16000.0
+
+        for i in 1...iterations {
+            print("   Iteration \(i)/\(iterations)...")
+
+            let startTime = Date()
+            do {
+                let result = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
+                let processingTime = Date().timeIntervalSince(startTime)
+                let rtf = Float(processingTime) / duration
+
+                let speedResult = SpeedTestResult(
+                    iteration: i,
+                    processingTimeSeconds: processingTime,
+                    realTimeFactor: rtf,
+                    speakerCount: result.speakerDatabase.count,
+                    segmentCount: result.segments.count,
+                    audioDurationSeconds: duration
+                )
+
+                timingResults.append(speedResult)
+                print("     ✅ RTF: \(String(format: "%.2f", rtf))x, \(result.speakerDatabase.count) speakers, \(result.segments.count) segments")
+
+            } catch {
+                print("     ❌ Iteration \(i) failed: \(error)")
+            }
+        }
+
+        guard !timingResults.isEmpty else {
+            print("❌ No successful iterations completed")
+            return
+        }
+
+        // Calculate statistics
+        let avgRTF = timingResults.map { $0.realTimeFactor }.reduce(0, +) / Float(timingResults.count)
+        let avgProcessingTime = timingResults.map { $0.processingTimeSeconds }.reduce(0, +) / Double(timingResults.count)
+        let minRTF = timingResults.map { $0.realTimeFactor }.min()!
+        let maxRTF = timingResults.map { $0.realTimeFactor }.max()!
+        let stdDevRTF = calculateStandardDeviation(timingResults.map { $0.realTimeFactor })
+
+        // Print results
+        printSpeedTestResults(
+            timingResults,
+            avgRTF: avgRTF,
+            avgProcessingTime: avgProcessingTime,
+            minRTF: minRTF,
+            maxRTF: maxRTF,
+            stdDevRTF: stdDevRTF,
+            audioFile: audioFile,
+            detailed: detailedTiming
+        )
+
+        // Save results if requested
+        if let outputFile = outputFile {
+            let summary = SpeedTestSummary(
+                audioFile: audioFile,
+                iterations: iterations,
+                warmupRuns: warmupRuns,
+                averageRTF: avgRTF,
+                averageProcessingTime: avgProcessingTime,
+                minRTF: minRTF,
+                maxRTF: maxRTF,
+                stdDevRTF: stdDevRTF,
+                results: timingResults,
+                config: config
+            )
+
+            do {
+                try await saveSpeedTestResults(summary, to: outputFile)
+                print("💾 Speed test results saved to: \(outputFile)")
+            } catch {
+                print("⚠️ Failed to save results: \(error)")
+            }
+        }
+    }
+
+    static func runBatchSpeedTest(arguments: [String]) async {
+        // Check for help flags first
+        if arguments.contains("--help") || arguments.contains("-h") {
+            printUsage()
+            return
+        }
+
+        var audioFiles: [String] = []
+        var iterations = 3
+        var warmupRuns = 1
+        var threshold: Float = 0.7
+        var minDurationOn: Float = 1.0
+        var minDurationOff: Float = 0.5
+        var minActivityThreshold: Float = 10.0
+        var debugMode = false
+        var outputFile: String?
+        var detailedTiming = false
+
+        // Parse arguments
+        var i = 0
+        while i < arguments.count {
+            switch arguments[i] {
+            case "--files":
+                // Collect all file paths until next option
+                i += 1
+                while i < arguments.count && !arguments[i].hasPrefix("--") {
+                    audioFiles.append(arguments[i])
+                    i += 1 }
+                continue
+            case "--iterations":
+                if i + 1 < arguments.count {
+                    iterations = Int(arguments[i + 1]) ?? 3
+                    i += 1
+                }
+            case "--warmup":
+                if i + 1 < arguments.count {
+                    warmupRuns = Int(arguments[i + 1]) ?? 1
+                    i += 1
+                }
+            case "--threshold":
+                if i + 1 < arguments.count {
+                    threshold = Float(arguments[i + 1]) ?? 0.7
+                    i += 1
+                }
+            case "--min-duration-on":
+                if i + 1 < arguments.count {
+                    minDurationOn = Float(arguments[i + 1]) ?? 1.0
+                    i += 1
+                }
+            case "--min-duration-off":
+                if i + 1 < arguments.count {
+                    minDurationOff = Float(arguments[i + 1]) ?? 0.5
+                    i += 1
+                }
+            case "--min-activity":
+                if i + 1 < arguments.count {
+                    minActivityThreshold = Float(arguments[i + 1]) ?? 10.0
+                    i += 1
+                }
+            case "--debug":
+                debugMode = true
+            case "--output":
+                if i + 1 < arguments.count {
+                    outputFile = arguments[i + 1]
+                    i += 1
+                }
+            case "--detailed":
+                detailedTiming = true
+            default:
+                if !arguments[i].hasPrefix("--") {
+                    audioFiles.append(arguments[i])
+                } else {
+                    print("⚠️ Unknown option: \(arguments[i])")
+                }
+            }
+            i += 1
+        }
+
+        // If no files specified, use default test files
+        if audioFiles.isEmpty {
+            print("📁 No audio files specified, using default test files...")
+            // You can add default test files here
+            audioFiles = ["test1.wav", "test2.wav"] // Placeholder
+        }
+
+        print("⚡ Starting Batch Speed Test")
+        print("   Audio files: \(audioFiles.count)")
+        print("   Iterations per file: \(iterations)")
+        print("   Warmup runs: \(warmupRuns)")
+        print("   Clustering threshold: \(threshold)")
+        print("   Min duration on: \(minDurationOn)s")
+        print("   Min duration off: \(minDurationOff)s")
+        print("   Min activity threshold: \(minActivityThreshold)")
+
+        let config = DiarizerConfig(
+            clusteringThreshold: threshold,
+            minDurationOn: minDurationOn,
+            minDurationOff: minDurationOff,
+            minActivityThreshold: minActivityThreshold,
+            debugMode: debugMode
+        )
+
+        let manager = DiarizerManager(config: config)
+
+        do {
+            try await manager.initialize()
+            print("✅ Models initialized successfully")
+        } catch {
+            print("❌ Failed to initialize models: \(error)")
+            exit(1)
+        }
+
+        var allResults: [BatchSpeedTestResult] = []
+        var totalProcessingTime: Double = 0
+        var totalAudioDuration: Float = 0
+
+        for (fileIndex, audioFile) in audioFiles.enumerated() {
+            print("\n📁 Testing file \(fileIndex + 1)/\(audioFiles.count): \(audioFile)")
+
+            guard FileManager.default.fileExists(atPath: audioFile) else {
+                print("   ❌ File not found: \(audioFile)")
+                continue
+            }
+
+            // Load audio file
+            let audioSamples: [Float]
+            do {
+                audioSamples = try await loadAudioFile(path: audioFile)
+                let duration = Float(audioSamples.count) / 16000.0
+                print("   ✅ Loaded audio: \(String(format: "%.1f", duration))s")
+                totalAudioDuration += duration
+            } catch {
+                print("   ❌ Failed to load audio file: \(error)")
+                continue
+            }
+
+            // Run warmup iterations
+            for i in 1...warmupRuns {
+                print("   🔥 Warmup \(i)/\(warmupRuns)...")
+                do {
+                    let _ = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
+                } catch {
+                    print("   ⚠️ Warmup \(i) failed: \(error)")
+                }
+            }
+
+            // Run speed test iterations
+            var fileResults: [SpeedTestResult] = []
+            let duration = Float(audioSamples.count) / 16000.0
+
+            for i in 1...iterations {
+                print("   ⚡ Iteration \(i)/\(iterations)...")
+
+                let startTime = Date()
+                do {
+                    let result = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
+                    let processingTime = Date().timeIntervalSince(startTime)
+                    let rtf = Float(processingTime) / duration
+                    totalProcessingTime += processingTime
+
+                    let speedResult = SpeedTestResult(
+                        iteration: i,
+                        processingTimeSeconds: processingTime,
+                        realTimeFactor: rtf,
+                        speakerCount: result.speakerDatabase.count,
+                        segmentCount: result.segments.count,
+                        audioDurationSeconds: duration
+                    )
+
+                    fileResults.append(speedResult)
+                    print("     ✅ RTF: \(String(format: "%.2f", rtf))x")
+
+                } catch {
+                    print("     ❌ Iteration \(i) failed: \(error)")
+                }
+            }
+
+            if !fileResults.isEmpty {
+                let avgRTF = fileResults.map { $0.realTimeFactor }.reduce(0, +) / Float(fileResults.count)
+                let avgProcessingTime = fileResults.map { $0.processingTimeSeconds }.reduce(0, +) / Double(fileResults.count)
+                let minRTF = fileResults.map { $0.realTimeFactor }.min()!
+                let maxRTF = fileResults.map { $0.realTimeFactor }.max()!
+                let stdDevRTF = calculateStandardDeviation(fileResults.map { $0.realTimeFactor })
+
+                let batchResult = BatchSpeedTestResult(
+                    audioFile: audioFile,
+                    averageRTF: avgRTF,
+                    averageProcessingTime: avgProcessingTime,
+                    minRTF: minRTF,
+                    maxRTF: maxRTF,
+                    stdDevRTF: stdDevRTF,
+                    results: fileResults
+                )
+
+                allResults.append(batchResult)
+            }
+        }
+
+        guard !allResults.isEmpty else {
+            print("❌ No successful tests completed")
+            return
+        }
+
+        // Calculate overall statistics
+        let overallAvgRTF = allResults.map { $0.averageRTF }.reduce(0, +) / Float(allResults.count)
+        let overallAvgProcessingTime = allResults.map { $0.averageProcessingTime }.reduce(0, +) / Double(allResults.count)
+        let overallMinRTF = allResults.map { $0.minRTF }.min()!
+        let overallMaxRTF = allResults.map { $0.maxRTF }.max()!
+        let overallStdDevRTF = calculateStandardDeviation(allResults.map { $0.averageRTF })
+
+        // Print batch results
+        printBatchSpeedTestResults(
+            allResults,
+            overallAvgRTF: overallAvgRTF,
+            overallAvgProcessingTime: overallAvgProcessingTime,
+            overallMinRTF: overallMinRTF,
+            overallMaxRTF: overallMaxRTF,
+            overallStdDevRTF: overallStdDevRTF,
+            totalProcessingTime: totalProcessingTime,
+            totalAudioDuration: totalAudioDuration,
+            detailed: detailedTiming
+        )
+
+        // Save results if requested
+        if let outputFile = outputFile {
+            let summary = BatchSpeedTestSummary(
+                audioFiles: audioFiles,
+                iterations: iterations,
+                warmupRuns: warmupRuns,
+                overallAverageRTF: overallAvgRTF,
+                overallAverageProcessingTime: overallAvgProcessingTime,
+                overallMinRTF: overallMinRTF,
+                overallMaxRTF: overallMaxRTF,
+                overallStdDevRTF: overallStdDevRTF,
+                totalProcessingTime: totalProcessingTime,
+                totalAudioDuration: totalAudioDuration,
+                results: allResults,
+                config: config
+            )
+
+            do {
+                try await saveBatchSpeedTestResults(summary, to: outputFile)
+                print("💾 Batch speed test results saved to: \(outputFile)")
+            } catch {
+                print("⚠️ Failed to save results: \(error)")
+            }
+        }
+    }
+
+    static func printBatchSpeedTestResults(
+        _ results: [BatchSpeedTestResult],
+        overallAvgRTF: Float,
+        overallAvgProcessingTime: Double,
+        overallMinRTF: Float,
+        overallMaxRTF: Float,
+        overallStdDevRTF: Float,
+        totalProcessingTime: Double,
+        totalAudioDuration: Float,
+        detailed: Bool
+    ) {
+        print("\n🏁 Batch Speed Test Results")
+        let separator = String(repeating: "=", count: 80)
+        print("\(separator)")
+
+        // Print table header
+        print("│ File Name        │  RTF   │ Processing │ Speakers │ Segments │ Duration │")
+        let headerSep = "├─────────────────┼────────┼────────────┼──────────┼──────────┼──────────┤"
+        print("\(headerSep)")
+
+        // Print individual file results
+        for result in results.sorted(by: { $0.averageRTF < $1.averageRTF }) {
+            let fileName = result.audioFile.split(separator: "/").last.map(String.init) ?? result.audioFile
+            let fileNameStr = String(fileName.prefix(15)).padding(toLength: 15, withPad: " ", startingAt: 0)
+            let rtfStr = String(format: "%.2fx", result.averageRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
+            let procStr = String(format: "%.1fs", result.averageProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
+            let avgSpeakers = result.results.reduce(0) { $0 + $1.speakerCount } / result.results.count
+            let speakerStr = String(format: "%.1f", Float(avgSpeakers)).padding(toLength: 8, withPad: " ", startingAt: 0)
+            let avgSegments = result.results.reduce(0) { $0 + $1.segmentCount } / result.results.count
+            let segmentStr = String(format: "%.1f", Float(avgSegments)).padding(toLength: 8, withPad: " ", startingAt: 0)
+            let durationStr = String(format: "%.1fs", result.results.first?.audioDurationSeconds ?? 0).padding(toLength: 8, withPad: " ", startingAt: 0)
+
+            print("│ \(fileNameStr) │ \(rtfStr) │ \(procStr) │ \(speakerStr) │ \(segmentStr) │ \(durationStr) │")
+        }
+
+        // Print summary section
+        let midSep = "├─────────────────┼────────┼────────────┼──────────┼──────────┼──────────┤"
+        print("\(midSep)")
+
+        let avgRtfStr = String(format: "%.2fx", overallAvgRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
+        let avgProcStr = String(format: "%.1fs", overallAvgProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
+        let totalDurationStr = String(format: "%.1fs", totalAudioDuration).padding(toLength: 8, withPad: " ", startingAt: 0)
+
+        print("│ OVERALL AVERAGE │ \(avgRtfStr) │ \(avgProcStr) │          │          │ \(totalDurationStr) │")
+        let bottomSep = "└─────────────────┴────────┴────────────┴──────────┴──────────┴──────────┘"
+        print("\(bottomSep)")
+
+        // Print overall statistics
+        print("\n📊 Overall Performance Statistics:")
+        print("   Files tested: \(results.count)")
+        print("   Total audio duration: \(String(format: "%.1f", totalAudioDuration))s")
+        print("   Total processing time: \(String(format: "%.1f", totalProcessingTime))s")
+        print("   Overall average RTF: \(String(format: "%.2f", overallAvgRTF))x")
+        print("   Overall min RTF: \(String(format: "%.2f", overallMinRTF))x")
+        print("   Overall max RTF: \(String(format: "%.2f", overallMaxRTF))x")
+        print("   Overall RTF Std Dev: \(String(format: "%.2f", overallStdDevRTF))x")
+
+        // Performance assessment
+        print("\n🎯 Overall Performance Assessment:")
+        if overallAvgRTF < 0.1 {
+            print("   🚀 EXCELLENT: Real-time factor < 0.1x (10x faster than real-time)")
+        } else if overallAvgRTF < 0.5 {
+            print("   ✅ VERY GOOD: Real-time factor < 0.5x (2x faster than real-time)")
+        } else if overallAvgRTF < 1.0 {
+            print("   👍 GOOD: Real-time factor < 1.0x (faster than real-time)")
+        } else if overallAvgRTF < 2.0 {
+            print("   ⚠️ MODERATE: Real-time factor < 2.0x (slower than real-time)")
+        } else {
+            print("   🐌 SLOW: Real-time factor >= 2.0x (significantly slower than real-time)")
+        }
+
+        // File-by-file analysis if detailed
+        if detailed {
+            print("\n🔍 File-by-File Analysis:")
+            let sortedResults = results.sorted(by: { $0.averageRTF < $1.averageRTF })
+            print("   Fastest file: \(sortedResults.first?.audioFile.split(separator: "/").last.map(String.init) ?? "unknown") (\(String(format: "%.2f", sortedResults.first?.averageRTF ?? 0))x RTF)")
+            print("   Slowest file: \(sortedResults.last?.audioFile.split(separator: "/").last.map(String.init) ?? "unknown") (\(String(format: "%.2f", sortedResults.last?.averageRTF ?? 0))x RTF)")
+
+            let rtfRange = (sortedResults.last?.averageRTF ?? 0) - (sortedResults.first?.averageRTF ?? 0)
+            print("   RTF range: \(String(format: "%.2f", rtfRange))x")
+
+            if rtfRange > 0.5 {
+                print("   ⚠️ High variability between files - consider file-specific optimization")
+            } else if rtfRange > 0.2 {
+                print("   ⚠️ Moderate variability between files")
+            } else {
+                print("   ✅ Consistent performance across files")
+            }
+        }
+    }
+
+    static func saveBatchSpeedTestResults(_ summary: BatchSpeedTestSummary, to file: String) async throws {
+        let encoder = JSONEncoder()
+        encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
+        encoder.dateEncodingStrategy = .iso8601
+
+        let data = try encoder.encode(summary)
+        try data.write(to: URL(fileURLWithPath: file))
+    }
+
     // MARK: - AMI Benchmark Implementation
 
     static func runAMISDMBenchmark(
@@ -833,7 +1443,7 @@ struct DiarizationCLI {
         // Find optimal assignment using Hungarian Algorithm for globally optimal solution
         let predSpeakerArray = Array(predSpeakers).sorted()  // Consistent ordering
         let gtSpeakerArray = Array(gtSpeakers).sorted()      // Consistent ordering
-        
+
         // Build numerical overlap matrix for Hungarian algorithm
         var numericalOverlapMatrix: [[Int]] = []
         for predSpeaker in predSpeakerArray {
@@ -843,24 +1453,24 @@ struct DiarizationCLI {
             }
             numericalOverlapMatrix.append(row)
         }
-        
+
         // Convert overlap matrix to cost matrix (higher overlap = lower cost)
         let costMatrix = HungarianAlgorithm.overlapToCostMatrix(numericalOverlapMatrix)
-        
+
         // Solve optimal assignment
         let assignments = HungarianAlgorithm.minimumCostAssignment(costs: costMatrix)
-        
+
         // Create speaker mapping from Hungarian result
         var mapping: [String: String] = [:]
         var totalAssignmentCost: Float = 0
         var totalOverlap = 0
-        
+
         for (predIndex, gtIndex) in assignments.assignments.enumerated() {
             if gtIndex != -1 && predIndex < predSpeakerArray.count && gtIndex < gtSpeakerArray.count {
                 let predSpeaker = predSpeakerArray[predIndex]
                 let gtSpeaker = gtSpeakerArray[gtIndex]
                 let overlap = overlapMatrix[predSpeaker]![gtSpeaker]!
-                
+
                 if overlap > 0 {  // Only assign if there's actual overlap
                     mapping[predSpeaker] = gtSpeaker
                     totalOverlap += overlap
@@ -868,10 +1478,10 @@ struct DiarizationCLI {
                 }
             }
         }
-        
+
         totalAssignmentCost = assignments.totalCost
         print("🔍 HUNGARIAN RESULT: Total assignment cost: \(String(format: "%.1f", totalAssignmentCost)), Total overlap: \(totalOverlap) frames")
-        
+
         // Handle unassigned predicted speakers
         for predSpeaker in predSpeakerArray {
             if mapping[predSpeaker] == nil {
@@ -1317,6 +1927,135 @@ struct DiarizationCLI {
         }
         return embedding
     }
+
+    static func printSpeedTestResults(
+        _ results: [SpeedTestResult],
+        avgRTF: Float,
+        avgProcessingTime: Double,
+        minRTF: Float,
+        maxRTF: Float,
+        stdDevRTF: Float,
+        audioFile: String,
+        detailed: Bool
+    ) {
+        print("\n🏁 Speed Test Results")
+        let separator = String(repeating: "=", count: 75)
+        print("\(separator)")
+
+        // Print table header
+        print("│ Iteration │  RTF   │ Processing │ Speakers │ Segments │")
+        let headerSep = "├───────────┼────────┼────────────┼──────────┼──────────┤"
+        print("\(headerSep)")
+
+        // Print individual results
+        for result in results.sorted(by: { $0.iteration < $1.iteration }) {
+            let iterStr = String(result.iteration).padding(toLength: 9, withPad: " ", startingAt: 0)
+            let rtfStr = String(format: "%.2fx", result.realTimeFactor).padding(toLength: 6, withPad: " ", startingAt: 0)
+            let procStr = String(format: "%.1fs", result.processingTimeSeconds).padding(toLength: 10, withPad: " ", startingAt: 0)
+            let speakerStr = String(result.speakerCount).padding(toLength: 8, withPad: " ", startingAt: 0)
+            let segmentStr = String(result.segmentCount).padding(toLength: 8, withPad: " ", startingAt: 0)
+
+            print("│ \(iterStr) │ \(rtfStr) │ \(procStr) │ \(speakerStr) │ \(segmentStr) │")
+        }
+
+        // Print summary section
+        let midSep = "├───────────┼────────┼────────────┼──────────┼──────────┤"
+        print("\(midSep)")
+
+        let avgRtfStr = String(format: "%.2fx", avgRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
+        let avgProcStr = String(format: "%.1fs", avgProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
+        let avgSpeakers = results.reduce(0) { $0 + $1.speakerCount } / results.count
+        let avgSpeakerStr = String(format: "%.1f", Float(avgSpeakers)).padding(toLength: 8, withPad: " ", startingAt: 0)
+        let avgSegments = results.reduce(0) { $0 + $1.segmentCount } / results.count
+        let avgSegmentStr = String(format: "%.1f", Float(avgSegments)).padding(toLength: 8, withPad: " ", startingAt: 0)
+
+        print("│ AVERAGE   │ \(avgRtfStr) │ \(avgProcStr) │ \(avgSpeakerStr) │ \(avgSegmentStr) │")
+        let bottomSep = "└───────────┴────────┴────────────┴──────────┴──────────┘"
+        print("\(bottomSep)")
+
+        // Print detailed statistics
+        print("\n📊 Performance Statistics:")
+        print("   Audio file: \(audioFile)")
+        print("   Audio duration: \(String(format: "%.1f", results.first?.audioDurationSeconds ?? 0))s")
+        print("   Iterations: \(results.count)")
+        print("   Average RTF: \(String(format: "%.2f", avgRTF))x")
+        print("   Min RTF: \(String(format: "%.2f", minRTF))x")
+        print("   Max RTF: \(String(format: "%.2f", maxRTF))x")
+        print("   RTF Std Dev: \(String(format: "%.2f", stdDevRTF))x")
+        print("   Average processing time: \(String(format: "%.1f", avgProcessingTime))s")
+        print("   Average speakers detected: \(String(format: "%.1f", Float(avgSpeakers)))")
+        print("   Average segments: \(String(format: "%.1f", Float(avgSegments)))")
+
+        // Performance assessment
+        print("\n🎯 Performance Assessment:")
+        if avgRTF < 0.1 {
+            print("   🚀 EXCELLENT: Real-time factor < 0.1x (10x faster than real-time)")
+        } else if avgRTF < 0.5 {
+            print("   ✅ VERY GOOD: Real-time factor < 0.5x (2x faster than real-time)")
+        } else if avgRTF < 1.0 {
+            print("   👍 GOOD: Real-time factor < 1.0x (faster than real-time)")
+        } else if avgRTF < 2.0 {
+            print("   ⚠️ MODERATE: Real-time factor < 2.0x (slower than real-time)")
+        } else {
+            print("   🐌 SLOW: Real-time factor >= 2.0x (significantly slower than real-time)")
+        }
+
+        // Research comparison
+        print("\n📝 Research Comparison:")
+        print("   Your Results:          \(String(format: "%.2f", avgRTF))x RTF")
+        print("   Pyannote (2021):       0.15x RTF (GPU)")
+        print("   EEND (2019):           0.8x RTF (CPU)")
+        print("   x-vector clustering:   1.2x RTF (CPU)")
+
+        // Detailed timing breakdown if requested
+        if detailed {
+            print("\n🔍 Detailed Timing Analysis:")
+            let sortedRTFs = results.map { $0.realTimeFactor }.sorted()
+            let medianRTF = sortedRTFs[sortedRTFs.count / 2]
+            let p95RTF = sortedRTFs[Int(Double(sortedRTFs.count) * 0.95)]
+            let p99RTF = sortedRTFs[Int(Double(sortedRTFs.count) * 0.99)]
+
+            print("   Median RTF: \(String(format: "%.2f", medianRTF))x")
+            print("   95th percentile RTF: \(String(format: "%.2f", p95RTF))x")
+            print("   99th percentile RTF: \(String(format: "%.2f", p99RTF))x")
+
+            // Consistency analysis
+            let consistency = (1.0 - stdDevRTF / avgRTF) * 100
+            print("   Consistency: \(String(format: "%.1f", consistency))%")
+
+            if consistency > 90 {
+                print("   🎯 EXCELLENT: Very consistent performance")
+            } else if consistency > 80 {
+                print("   ✅ GOOD: Consistent performance")
+            } else if consistency > 70 {
+                print("   ⚠️ MODERATE: Some performance variability")
+            } else {
+                print("   🚨 POOR: High performance variability")
+            }
+        }
+
+        // Optimization suggestions
+        print("\n💡 Optimization Suggestions:")
+        if avgRTF > 1.0 {
+            print("   • Consider reducing clustering threshold for faster processing")
+            print("   • Increase min-duration-on to reduce segment count")
+            print("   • Use GPU acceleration if available")
+            print("   • Consider batch processing for multiple files")
+        } else {
+            print("   • Performance is already excellent!")
+            print("   • Consider increasing accuracy parameters if needed")
+            print("   • Ready for production deployment")
+        }
+    }
+
+    static func saveSpeedTestResults(_ summary: SpeedTestSummary, to file: String) async throws {
+        let encoder = JSONEncoder()
+        encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
+        encoder.dateEncodingStrategy = .iso8601
+
+        let data = try encoder.encode(summary)
+        try data.write(to: URL(fileURLWithPath: file))
+    }
 }
 
 // MARK: - Data Structures
@@ -1381,6 +2120,47 @@ struct BenchmarkSummary: Codable {
     }
 }
 
+struct SpeedTestResult: Codable {
+    let iteration: Int
+    let processingTimeSeconds: TimeInterval
+    let realTimeFactor: Float
+    let speakerCount: Int
+    let segmentCount: Int
+    let audioDurationSeconds: Float
+}
+
+struct SpeedTestSummary: Codable {
+    let audioFile: String
+    let iterations: Int
+    let warmupRuns: Int
+    let averageRTF: Float
+    let averageProcessingTime: Double
+    let minRTF: Float
+    let maxRTF: Float
+    let stdDevRTF: Float
+    let results: [SpeedTestResult]
+    let config: DiarizerConfig
+    let timestamp: Date
+
+    init(
+        audioFile: String, iterations: Int, warmupRuns: Int, averageRTF: Float,
+        averageProcessingTime: Double, minRTF: Float, maxRTF: Float, stdDevRTF: Float,
+        results: [SpeedTestResult], config: DiarizerConfig
+    ) {
+        self.audioFile = audioFile
+        self.iterations = iterations
+        self.warmupRuns = warmupRuns
+        self.averageRTF = averageRTF
+        self.averageProcessingTime = averageProcessingTime
+        self.minRTF = minRTF
+        self.maxRTF = maxRTF
+        self.stdDevRTF = stdDevRTF
+        self.results = results
+        self.config = config
+        self.timestamp = Date()
+    }
+}
+
 struct DiarizationMetrics {
     let der: Float
     let jer: Float
@@ -1670,3 +2450,50 @@ private class AMIMeetingsXMLDelegate: NSObject, XMLParserDelegate {
         parsingError = parseError
     }
 }
+
+struct BatchSpeedTestResult: Codable {
+    let audioFile: String
+    let averageRTF: Float
+    let averageProcessingTime: Double
+    let minRTF: Float
+    let maxRTF: Float
+    let stdDevRTF: Float
+    let results: [SpeedTestResult]
+}
+
+struct BatchSpeedTestSummary: Codable {
+    let audioFiles: [String]
+    let iterations: Int
+    let warmupRuns: Int
+    let overallAverageRTF: Float
+    let overallAverageProcessingTime: Double
+    let overallMinRTF: Float
+    let overallMaxRTF: Float
+    let overallStdDevRTF: Float
+    let totalProcessingTime: Double
+    let totalAudioDuration: Float
+    let results: [BatchSpeedTestResult]
+    let config: DiarizerConfig
+    let timestamp: Date
+
+    init(
+        audioFiles: [String], iterations: Int, warmupRuns: Int, overallAverageRTF: Float,
+        overallAverageProcessingTime: Double, overallMinRTF: Float, overallMaxRTF: Float,
+        overallStdDevRTF: Float, totalProcessingTime: Double, totalAudioDuration: Float,
+        results: [BatchSpeedTestResult], config: DiarizerConfig
+    ) {
+        self.audioFiles = audioFiles
+        self.iterations = iterations
+        self.warmupRuns = warmupRuns
+        self.overallAverageRTF = overallAverageRTF
+        self.overallAverageProcessingTime = overallAverageProcessingTime
+        self.overallMinRTF = overallMinRTF
+        self.overallMaxRTF = overallMaxRTF
+        self.overallStdDevRTF = overallStdDevRTF
+        self.totalProcessingTime = totalProcessingTime
+        self.totalAudioDuration = totalAudioDuration
+        self.results = results
+        self.config = config
+        self.timestamp = Date()
+    }
+}

From ddebd8fdaf8de787a46f6a6518086fc9c6e030ac Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 18:49:08 -0400
Subject: [PATCH 2/6] remove redundant additional commands

---
 Sources/DiarizationCLI/main.swift | 822 ------------------------------
 1 file changed, 822 deletions(-)

diff --git a/Sources/DiarizationCLI/main.swift b/Sources/DiarizationCLI/main.swift
index 15842ae72..f8cb77826 100644
--- a/Sources/DiarizationCLI/main.swift
+++ b/Sources/DiarizationCLI/main.swift
@@ -43,8 +43,6 @@ struct DiarizationCLI {
                 benchmark    Run AMI SDM benchmark evaluation with real annotations
                 process      Process a single audio file
                 download     Download datasets for benchmarking
-                speedtest    Run end-to-end pipeline speed test
-                batchspeedtest Run batch speed test
                 help         Show this help message
 
             BENCHMARK OPTIONS:
@@ -57,10 +55,6 @@ struct DiarizationCLI {
                 --debug                 Enable debug mode
                 --output <file>         Output results to JSON file
                 --auto-download         Automatically download dataset if not found
-                --speed-test            Enable speed testing with timing measurements
-                --speed-iterations <int> Number of speed test iterations [default: 3]
-                --speed-warmup <int>    Number of warmup runs for speed test [default: 1]
-                --detailed-timing       Show detailed per-component timing breakdown
 
             NOTE: Benchmark now uses real AMI manual annotations from Tests/ami_public_1.6.2/
                   If annotations are not found, falls back to simplified placeholder.
@@ -75,33 +69,6 @@ struct DiarizationCLI {
                 --dataset <name>     Dataset to download (ami-sdm, ami-ihm, all) [default: all]
                 --force             Force re-download even if files exist
 
-            SPEEDTEST OPTIONS:
-                <audio-file>         Audio file to test (.wav, .m4a, .mp3)
-                --iterations <int>   Number of test iterations [default: 5]
-                --warmup <int>       Number of warmup runs [default: 2]
-                --threshold <float>  Clustering threshold 0.0-1.0 [default: 0.7]
-                --min-duration-on <float>   Minimum speaker segment duration in seconds [default: 1.0]
-                --min-duration-off <float>  Minimum silence between speakers in seconds [default: 0.5]
-                --min-activity <float>      Minimum activity threshold in frames [default: 10.0]
-                --output <file>      Output results to JSON file
-                --debug              Enable debug mode
-                --detailed           Show detailed per-component timing
-
-            NOTE: Benchmark now uses real AMI manual annotations from Tests/ami_public_1.6.2/
-                  If annotations are not found, falls back to simplified placeholder.
-
-            BATCH SPEEDTEST OPTIONS:
-                --files <file1,file2,...>  Comma-separated list of audio files to test
-                --iterations <int>        Number of test iterations [default: 3]
-                --warmup <int>           Number of warmup runs [default: 1]
-                --threshold <float>       Clustering threshold 0.0-1.0 [default: 0.7]
-                --min-duration-on <float> Minimum speaker segment duration in seconds [default: 1.0]
-                --min-duration-off <float> Minimum silence between speakers in seconds [default: 0.5]
-                --min-activity <float>      Minimum activity threshold in frames [default: 10.0]
-                --debug                   Enable debug mode
-                --output <file>           Output results to JSON file
-                --detailed              Show detailed per-file timing
-
             EXAMPLES:
                 # Download AMI datasets
                 swift run fluidaudio download --dataset ami-sdm
@@ -112,29 +79,11 @@ struct DiarizationCLI {
                 # Run benchmark with custom threshold and save results
                 swift run fluidaudio benchmark --threshold 0.8 --output results.json
 
-                # Run benchmark with speed testing enabled
-                swift run fluidaudio benchmark --speed-test --speed-iterations 5 --detailed-timing
-
-                # Run benchmark with both accuracy and speed testing
-                swift run fluidaudio benchmark --speed-test --threshold 0.7 --output comprehensive_results.json
-
                 # Process a single audio file
                 swift run fluidaudio process meeting.wav
 
                 # Process file with custom settings
                 swift run fluidaudio process meeting.wav --threshold 0.6 --output output.json
-
-                # Run speed test on audio file
-                swift run fluidaudio speedtest meeting.wav
-
-                # Run speed test with custom iterations and detailed timing
-                swift run fluidaudio speedtest meeting.wav --iterations 10 --warmup 3 --detailed
-
-                # Run speed test with custom parameters and save results
-                swift run fluidaudio speedtest meeting.wav --threshold 0.8 --output speed_results.json
-
-                # Run batch speed test on multiple files
-                swift run fluidaudio batchspeedtest --files test1.wav,test2.wav --iterations 5 --warmup 2 --detailed
             """)
     }
 
@@ -382,560 +331,6 @@ struct DiarizationCLI {
         }
     }
 
-    static func runSpeedTest(arguments: [String]) async {
-        guard !arguments.isEmpty else {
-            print("❌ No audio file specified")
-            printUsage()
-            exit(1)
-        }
-
-        let audioFile = arguments[0]
-
-        // Check for help flags first
-        if audioFile == "--help" || audioFile == "-h" {
-            printUsage()
-            return
-        }
-
-        var iterations = 5
-        var warmupRuns = 2
-        var threshold: Float = 0.7
-        var minDurationOn: Float = 1.0
-        var minDurationOff: Float = 0.5
-        var minActivityThreshold: Float = 10.0
-        var debugMode = false
-        var outputFile: String?
-        var detailedTiming = false
-
-        // Parse remaining arguments
-        var i = 1
-        while i < arguments.count {
-            switch arguments[i] {
-            case "--iterations":
-                if i + 1 < arguments.count {
-                    iterations = Int(arguments[i + 1]) ?? 5
-                    i += 1
-                }
-            case "--warmup":
-                if i + 1 < arguments.count {
-                    warmupRuns = Int(arguments[i + 1]) ?? 2
-                    i += 1
-                }
-            case "--threshold":
-                if i + 1 < arguments.count {
-                    threshold = Float(arguments[i + 1]) ?? 0.7
-                    i += 1
-                }
-            case "--min-duration-on":
-                if i + 1 < arguments.count {
-                    minDurationOn = Float(arguments[i + 1]) ?? 1.0
-                    i += 1
-                }
-            case "--min-duration-off":
-                if i + 1 < arguments.count {
-                    minDurationOff = Float(arguments[i + 1]) ?? 0.5
-                    i += 1
-                }
-            case "--min-activity":
-                if i + 1 < arguments.count {
-                    minActivityThreshold = Float(arguments[i + 1]) ?? 10.0
-                    i += 1
-                }
-            case "--debug":
-                debugMode = true
-            case "--output":
-                if i + 1 < arguments.count {
-                    outputFile = arguments[i + 1]
-                    i += 1
-                }
-            case "--detailed":
-                detailedTiming = true
-            default:
-                print("⚠️ Unknown option: \(arguments[i])")
-            }
-            i += 1
-        }
-
-        print("⚡ Starting End-to-End Pipeline Speed Test")
-        print("   Audio file: \(audioFile)")
-        print("   Iterations: \(iterations)")
-        print("   Warmup runs: \(warmupRuns)")
-        print("   Clustering threshold: \(threshold)")
-        print("   Min duration on: \(minDurationOn)s")
-        print("   Min duration off: \(minDurationOff)s")
-        print("   Min activity threshold: \(minActivityThreshold)")
-        print("   Debug mode: \(debugMode ? "enabled" : "disabled")")
-        print("   Detailed timing: \(detailedTiming ? "enabled" : "disabled")")
-
-        let config = DiarizerConfig(
-            clusteringThreshold: threshold,
-            minDurationOn: minDurationOn,
-            minDurationOff: minDurationOff,
-            minActivityThreshold: minActivityThreshold,
-            debugMode: debugMode
-        )
-
-        let manager = DiarizerManager(config: config)
-
-        do {
-            try await manager.initialize()
-            print("✅ Models initialized successfully")
-        } catch {
-            print("❌ Failed to initialize models: \(error)")
-            print("💡 Make sure you have network access for model downloads")
-            exit(1)
-        }
-
-        // Load audio file once
-        let audioSamples: [Float]
-        do {
-            audioSamples = try await loadAudioFile(path: audioFile)
-            let duration = Float(audioSamples.count) / 16000.0
-            print("✅ Loaded audio: \(audioSamples.count) samples (\(String(format: "%.1f", duration))s)")
-        } catch {
-            print("❌ Failed to load audio file: \(error)")
-            exit(1)
-        }
-
-        // Run warmup iterations
-        print("\n🔥 Running \(warmupRuns) warmup iterations...")
-        for i in 1...warmupRuns {
-            print("   Warmup \(i)/\(warmupRuns)...")
-            do {
-                let _ = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
-            } catch {
-                print("   ⚠️ Warmup \(i) failed: \(error)")
-            }
-        }
-
-        // Run actual speed test iterations
-        print("\n⚡ Running \(iterations) speed test iterations...")
-        var timingResults: [SpeedTestResult] = []
-        let duration = Float(audioSamples.count) / 16000.0
-
-        for i in 1...iterations {
-            print("   Iteration \(i)/\(iterations)...")
-
-            let startTime = Date()
-            do {
-                let result = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
-                let processingTime = Date().timeIntervalSince(startTime)
-                let rtf = Float(processingTime) / duration
-
-                let speedResult = SpeedTestResult(
-                    iteration: i,
-                    processingTimeSeconds: processingTime,
-                    realTimeFactor: rtf,
-                    speakerCount: result.speakerDatabase.count,
-                    segmentCount: result.segments.count,
-                    audioDurationSeconds: duration
-                )
-
-                timingResults.append(speedResult)
-                print("     ✅ RTF: \(String(format: "%.2f", rtf))x, \(result.speakerDatabase.count) speakers, \(result.segments.count) segments")
-
-            } catch {
-                print("     ❌ Iteration \(i) failed: \(error)")
-            }
-        }
-
-        guard !timingResults.isEmpty else {
-            print("❌ No successful iterations completed")
-            return
-        }
-
-        // Calculate statistics
-        let avgRTF = timingResults.map { $0.realTimeFactor }.reduce(0, +) / Float(timingResults.count)
-        let avgProcessingTime = timingResults.map { $0.processingTimeSeconds }.reduce(0, +) / Double(timingResults.count)
-        let minRTF = timingResults.map { $0.realTimeFactor }.min()!
-        let maxRTF = timingResults.map { $0.realTimeFactor }.max()!
-        let stdDevRTF = calculateStandardDeviation(timingResults.map { $0.realTimeFactor })
-
-        // Print results
-        printSpeedTestResults(
-            timingResults,
-            avgRTF: avgRTF,
-            avgProcessingTime: avgProcessingTime,
-            minRTF: minRTF,
-            maxRTF: maxRTF,
-            stdDevRTF: stdDevRTF,
-            audioFile: audioFile,
-            detailed: detailedTiming
-        )
-
-        // Save results if requested
-        if let outputFile = outputFile {
-            let summary = SpeedTestSummary(
-                audioFile: audioFile,
-                iterations: iterations,
-                warmupRuns: warmupRuns,
-                averageRTF: avgRTF,
-                averageProcessingTime: avgProcessingTime,
-                minRTF: minRTF,
-                maxRTF: maxRTF,
-                stdDevRTF: stdDevRTF,
-                results: timingResults,
-                config: config
-            )
-
-            do {
-                try await saveSpeedTestResults(summary, to: outputFile)
-                print("💾 Speed test results saved to: \(outputFile)")
-            } catch {
-                print("⚠️ Failed to save results: \(error)")
-            }
-        }
-    }
-
-    static func runBatchSpeedTest(arguments: [String]) async {
-        // Check for help flags first
-        if arguments.contains("--help") || arguments.contains("-h") {
-            printUsage()
-            return
-        }
-
-        var audioFiles: [String] = []
-        var iterations = 3
-        var warmupRuns = 1
-        var threshold: Float = 0.7
-        var minDurationOn: Float = 1.0
-        var minDurationOff: Float = 0.5
-        var minActivityThreshold: Float = 10.0
-        var debugMode = false
-        var outputFile: String?
-        var detailedTiming = false
-
-        // Parse arguments
-        var i = 0
-        while i < arguments.count {
-            switch arguments[i] {
-            case "--files":
-                // Collect all file paths until next option
-                i += 1
-                while i < arguments.count && !arguments[i].hasPrefix("--") {
-                    audioFiles.append(arguments[i])
-                    i += 1 }
-                continue
-            case "--iterations":
-                if i + 1 < arguments.count {
-                    iterations = Int(arguments[i + 1]) ?? 3
-                    i += 1
-                }
-            case "--warmup":
-                if i + 1 < arguments.count {
-                    warmupRuns = Int(arguments[i + 1]) ?? 1
-                    i += 1
-                }
-            case "--threshold":
-                if i + 1 < arguments.count {
-                    threshold = Float(arguments[i + 1]) ?? 0.7
-                    i += 1
-                }
-            case "--min-duration-on":
-                if i + 1 < arguments.count {
-                    minDurationOn = Float(arguments[i + 1]) ?? 1.0
-                    i += 1
-                }
-            case "--min-duration-off":
-                if i + 1 < arguments.count {
-                    minDurationOff = Float(arguments[i + 1]) ?? 0.5
-                    i += 1
-                }
-            case "--min-activity":
-                if i + 1 < arguments.count {
-                    minActivityThreshold = Float(arguments[i + 1]) ?? 10.0
-                    i += 1
-                }
-            case "--debug":
-                debugMode = true
-            case "--output":
-                if i + 1 < arguments.count {
-                    outputFile = arguments[i + 1]
-                    i += 1
-                }
-            case "--detailed":
-                detailedTiming = true
-            default:
-                if !arguments[i].hasPrefix("--") {
-                    audioFiles.append(arguments[i])
-                } else {
-                    print("⚠️ Unknown option: \(arguments[i])")
-                }
-            }
-            i += 1
-        }
-
-        // If no files specified, use default test files
-        if audioFiles.isEmpty {
-            print("📁 No audio files specified, using default test files...")
-            // You can add default test files here
-            audioFiles = ["test1.wav", "test2.wav"] // Placeholder
-        }
-
-        print("⚡ Starting Batch Speed Test")
-        print("   Audio files: \(audioFiles.count)")
-        print("   Iterations per file: \(iterations)")
-        print("   Warmup runs: \(warmupRuns)")
-        print("   Clustering threshold: \(threshold)")
-        print("   Min duration on: \(minDurationOn)s")
-        print("   Min duration off: \(minDurationOff)s")
-        print("   Min activity threshold: \(minActivityThreshold)")
-
-        let config = DiarizerConfig(
-            clusteringThreshold: threshold,
-            minDurationOn: minDurationOn,
-            minDurationOff: minDurationOff,
-            minActivityThreshold: minActivityThreshold,
-            debugMode: debugMode
-        )
-
-        let manager = DiarizerManager(config: config)
-
-        do {
-            try await manager.initialize()
-            print("✅ Models initialized successfully")
-        } catch {
-            print("❌ Failed to initialize models: \(error)")
-            exit(1)
-        }
-
-        var allResults: [BatchSpeedTestResult] = []
-        var totalProcessingTime: Double = 0
-        var totalAudioDuration: Float = 0
-
-        for (fileIndex, audioFile) in audioFiles.enumerated() {
-            print("\n📁 Testing file \(fileIndex + 1)/\(audioFiles.count): \(audioFile)")
-
-            guard FileManager.default.fileExists(atPath: audioFile) else {
-                print("   ❌ File not found: \(audioFile)")
-                continue
-            }
-
-            // Load audio file
-            let audioSamples: [Float]
-            do {
-                audioSamples = try await loadAudioFile(path: audioFile)
-                let duration = Float(audioSamples.count) / 16000.0
-                print("   ✅ Loaded audio: \(String(format: "%.1f", duration))s")
-                totalAudioDuration += duration
-            } catch {
-                print("   ❌ Failed to load audio file: \(error)")
-                continue
-            }
-
-            // Run warmup iterations
-            for i in 1...warmupRuns {
-                print("   🔥 Warmup \(i)/\(warmupRuns)...")
-                do {
-                    let _ = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
-                } catch {
-                    print("   ⚠️ Warmup \(i) failed: \(error)")
-                }
-            }
-
-            // Run speed test iterations
-            var fileResults: [SpeedTestResult] = []
-            let duration = Float(audioSamples.count) / 16000.0
-
-            for i in 1...iterations {
-                print("   ⚡ Iteration \(i)/\(iterations)...")
-
-                let startTime = Date()
-                do {
-                    let result = try await manager.performCompleteDiarization(audioSamples, sampleRate: 16000)
-                    let processingTime = Date().timeIntervalSince(startTime)
-                    let rtf = Float(processingTime) / duration
-                    totalProcessingTime += processingTime
-
-                    let speedResult = SpeedTestResult(
-                        iteration: i,
-                        processingTimeSeconds: processingTime,
-                        realTimeFactor: rtf,
-                        speakerCount: result.speakerDatabase.count,
-                        segmentCount: result.segments.count,
-                        audioDurationSeconds: duration
-                    )
-
-                    fileResults.append(speedResult)
-                    print("     ✅ RTF: \(String(format: "%.2f", rtf))x")
-
-                } catch {
-                    print("     ❌ Iteration \(i) failed: \(error)")
-                }
-            }
-
-            if !fileResults.isEmpty {
-                let avgRTF = fileResults.map { $0.realTimeFactor }.reduce(0, +) / Float(fileResults.count)
-                let avgProcessingTime = fileResults.map { $0.processingTimeSeconds }.reduce(0, +) / Double(fileResults.count)
-                let minRTF = fileResults.map { $0.realTimeFactor }.min()!
-                let maxRTF = fileResults.map { $0.realTimeFactor }.max()!
-                let stdDevRTF = calculateStandardDeviation(fileResults.map { $0.realTimeFactor })
-
-                let batchResult = BatchSpeedTestResult(
-                    audioFile: audioFile,
-                    averageRTF: avgRTF,
-                    averageProcessingTime: avgProcessingTime,
-                    minRTF: minRTF,
-                    maxRTF: maxRTF,
-                    stdDevRTF: stdDevRTF,
-                    results: fileResults
-                )
-
-                allResults.append(batchResult)
-            }
-        }
-
-        guard !allResults.isEmpty else {
-            print("❌ No successful tests completed")
-            return
-        }
-
-        // Calculate overall statistics
-        let overallAvgRTF = allResults.map { $0.averageRTF }.reduce(0, +) / Float(allResults.count)
-        let overallAvgProcessingTime = allResults.map { $0.averageProcessingTime }.reduce(0, +) / Double(allResults.count)
-        let overallMinRTF = allResults.map { $0.minRTF }.min()!
-        let overallMaxRTF = allResults.map { $0.maxRTF }.max()!
-        let overallStdDevRTF = calculateStandardDeviation(allResults.map { $0.averageRTF })
-
-        // Print batch results
-        printBatchSpeedTestResults(
-            allResults,
-            overallAvgRTF: overallAvgRTF,
-            overallAvgProcessingTime: overallAvgProcessingTime,
-            overallMinRTF: overallMinRTF,
-            overallMaxRTF: overallMaxRTF,
-            overallStdDevRTF: overallStdDevRTF,
-            totalProcessingTime: totalProcessingTime,
-            totalAudioDuration: totalAudioDuration,
-            detailed: detailedTiming
-        )
-
-        // Save results if requested
-        if let outputFile = outputFile {
-            let summary = BatchSpeedTestSummary(
-                audioFiles: audioFiles,
-                iterations: iterations,
-                warmupRuns: warmupRuns,
-                overallAverageRTF: overallAvgRTF,
-                overallAverageProcessingTime: overallAvgProcessingTime,
-                overallMinRTF: overallMinRTF,
-                overallMaxRTF: overallMaxRTF,
-                overallStdDevRTF: overallStdDevRTF,
-                totalProcessingTime: totalProcessingTime,
-                totalAudioDuration: totalAudioDuration,
-                results: allResults,
-                config: config
-            )
-
-            do {
-                try await saveBatchSpeedTestResults(summary, to: outputFile)
-                print("💾 Batch speed test results saved to: \(outputFile)")
-            } catch {
-                print("⚠️ Failed to save results: \(error)")
-            }
-        }
-    }
-
-    static func printBatchSpeedTestResults(
-        _ results: [BatchSpeedTestResult],
-        overallAvgRTF: Float,
-        overallAvgProcessingTime: Double,
-        overallMinRTF: Float,
-        overallMaxRTF: Float,
-        overallStdDevRTF: Float,
-        totalProcessingTime: Double,
-        totalAudioDuration: Float,
-        detailed: Bool
-    ) {
-        print("\n🏁 Batch Speed Test Results")
-        let separator = String(repeating: "=", count: 80)
-        print("\(separator)")
-
-        // Print table header
-        print("│ File Name        │  RTF   │ Processing │ Speakers │ Segments │ Duration │")
-        let headerSep = "├─────────────────┼────────┼────────────┼──────────┼──────────┼──────────┤"
-        print("\(headerSep)")
-
-        // Print individual file results
-        for result in results.sorted(by: { $0.averageRTF < $1.averageRTF }) {
-            let fileName = result.audioFile.split(separator: "/").last.map(String.init) ?? result.audioFile
-            let fileNameStr = String(fileName.prefix(15)).padding(toLength: 15, withPad: " ", startingAt: 0)
-            let rtfStr = String(format: "%.2fx", result.averageRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
-            let procStr = String(format: "%.1fs", result.averageProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
-            let avgSpeakers = result.results.reduce(0) { $0 + $1.speakerCount } / result.results.count
-            let speakerStr = String(format: "%.1f", Float(avgSpeakers)).padding(toLength: 8, withPad: " ", startingAt: 0)
-            let avgSegments = result.results.reduce(0) { $0 + $1.segmentCount } / result.results.count
-            let segmentStr = String(format: "%.1f", Float(avgSegments)).padding(toLength: 8, withPad: " ", startingAt: 0)
-            let durationStr = String(format: "%.1fs", result.results.first?.audioDurationSeconds ?? 0).padding(toLength: 8, withPad: " ", startingAt: 0)
-
-            print("│ \(fileNameStr) │ \(rtfStr) │ \(procStr) │ \(speakerStr) │ \(segmentStr) │ \(durationStr) │")
-        }
-
-        // Print summary section
-        let midSep = "├─────────────────┼────────┼────────────┼──────────┼──────────┼──────────┤"
-        print("\(midSep)")
-
-        let avgRtfStr = String(format: "%.2fx", overallAvgRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
-        let avgProcStr = String(format: "%.1fs", overallAvgProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
-        let totalDurationStr = String(format: "%.1fs", totalAudioDuration).padding(toLength: 8, withPad: " ", startingAt: 0)
-
-        print("│ OVERALL AVERAGE │ \(avgRtfStr) │ \(avgProcStr) │          │          │ \(totalDurationStr) │")
-        let bottomSep = "└─────────────────┴────────┴────────────┴──────────┴──────────┴──────────┘"
-        print("\(bottomSep)")
-
-        // Print overall statistics
-        print("\n📊 Overall Performance Statistics:")
-        print("   Files tested: \(results.count)")
-        print("   Total audio duration: \(String(format: "%.1f", totalAudioDuration))s")
-        print("   Total processing time: \(String(format: "%.1f", totalProcessingTime))s")
-        print("   Overall average RTF: \(String(format: "%.2f", overallAvgRTF))x")
-        print("   Overall min RTF: \(String(format: "%.2f", overallMinRTF))x")
-        print("   Overall max RTF: \(String(format: "%.2f", overallMaxRTF))x")
-        print("   Overall RTF Std Dev: \(String(format: "%.2f", overallStdDevRTF))x")
-
-        // Performance assessment
-        print("\n🎯 Overall Performance Assessment:")
-        if overallAvgRTF < 0.1 {
-            print("   🚀 EXCELLENT: Real-time factor < 0.1x (10x faster than real-time)")
-        } else if overallAvgRTF < 0.5 {
-            print("   ✅ VERY GOOD: Real-time factor < 0.5x (2x faster than real-time)")
-        } else if overallAvgRTF < 1.0 {
-            print("   👍 GOOD: Real-time factor < 1.0x (faster than real-time)")
-        } else if overallAvgRTF < 2.0 {
-            print("   ⚠️ MODERATE: Real-time factor < 2.0x (slower than real-time)")
-        } else {
-            print("   🐌 SLOW: Real-time factor >= 2.0x (significantly slower than real-time)")
-        }
-
-        // File-by-file analysis if detailed
-        if detailed {
-            print("\n🔍 File-by-File Analysis:")
-            let sortedResults = results.sorted(by: { $0.averageRTF < $1.averageRTF })
-            print("   Fastest file: \(sortedResults.first?.audioFile.split(separator: "/").last.map(String.init) ?? "unknown") (\(String(format: "%.2f", sortedResults.first?.averageRTF ?? 0))x RTF)")
-            print("   Slowest file: \(sortedResults.last?.audioFile.split(separator: "/").last.map(String.init) ?? "unknown") (\(String(format: "%.2f", sortedResults.last?.averageRTF ?? 0))x RTF)")
-
-            let rtfRange = (sortedResults.last?.averageRTF ?? 0) - (sortedResults.first?.averageRTF ?? 0)
-            print("   RTF range: \(String(format: "%.2f", rtfRange))x")
-
-            if rtfRange > 0.5 {
-                print("   ⚠️ High variability between files - consider file-specific optimization")
-            } else if rtfRange > 0.2 {
-                print("   ⚠️ Moderate variability between files")
-            } else {
-                print("   ✅ Consistent performance across files")
-            }
-        }
-    }
-
-    static func saveBatchSpeedTestResults(_ summary: BatchSpeedTestSummary, to file: String) async throws {
-        let encoder = JSONEncoder()
-        encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
-        encoder.dateEncodingStrategy = .iso8601
-
-        let data = try encoder.encode(summary)
-        try data.write(to: URL(fileURLWithPath: file))
-    }
-
     // MARK: - AMI Benchmark Implementation
 
     static func runAMISDMBenchmark(
@@ -1927,135 +1322,6 @@ struct DiarizationCLI {
         }
         return embedding
     }
-
-    static func printSpeedTestResults(
-        _ results: [SpeedTestResult],
-        avgRTF: Float,
-        avgProcessingTime: Double,
-        minRTF: Float,
-        maxRTF: Float,
-        stdDevRTF: Float,
-        audioFile: String,
-        detailed: Bool
-    ) {
-        print("\n🏁 Speed Test Results")
-        let separator = String(repeating: "=", count: 75)
-        print("\(separator)")
-
-        // Print table header
-        print("│ Iteration │  RTF   │ Processing │ Speakers │ Segments │")
-        let headerSep = "├───────────┼────────┼────────────┼──────────┼──────────┤"
-        print("\(headerSep)")
-
-        // Print individual results
-        for result in results.sorted(by: { $0.iteration < $1.iteration }) {
-            let iterStr = String(result.iteration).padding(toLength: 9, withPad: " ", startingAt: 0)
-            let rtfStr = String(format: "%.2fx", result.realTimeFactor).padding(toLength: 6, withPad: " ", startingAt: 0)
-            let procStr = String(format: "%.1fs", result.processingTimeSeconds).padding(toLength: 10, withPad: " ", startingAt: 0)
-            let speakerStr = String(result.speakerCount).padding(toLength: 8, withPad: " ", startingAt: 0)
-            let segmentStr = String(result.segmentCount).padding(toLength: 8, withPad: " ", startingAt: 0)
-
-            print("│ \(iterStr) │ \(rtfStr) │ \(procStr) │ \(speakerStr) │ \(segmentStr) │")
-        }
-
-        // Print summary section
-        let midSep = "├───────────┼────────┼────────────┼──────────┼──────────┤"
-        print("\(midSep)")
-
-        let avgRtfStr = String(format: "%.2fx", avgRTF).padding(toLength: 6, withPad: " ", startingAt: 0)
-        let avgProcStr = String(format: "%.1fs", avgProcessingTime).padding(toLength: 10, withPad: " ", startingAt: 0)
-        let avgSpeakers = results.reduce(0) { $0 + $1.speakerCount } / results.count
-        let avgSpeakerStr = String(format: "%.1f", Float(avgSpeakers)).padding(toLength: 8, withPad: " ", startingAt: 0)
-        let avgSegments = results.reduce(0) { $0 + $1.segmentCount } / results.count
-        let avgSegmentStr = String(format: "%.1f", Float(avgSegments)).padding(toLength: 8, withPad: " ", startingAt: 0)
-
-        print("│ AVERAGE   │ \(avgRtfStr) │ \(avgProcStr) │ \(avgSpeakerStr) │ \(avgSegmentStr) │")
-        let bottomSep = "└───────────┴────────┴────────────┴──────────┴──────────┘"
-        print("\(bottomSep)")
-
-        // Print detailed statistics
-        print("\n📊 Performance Statistics:")
-        print("   Audio file: \(audioFile)")
-        print("   Audio duration: \(String(format: "%.1f", results.first?.audioDurationSeconds ?? 0))s")
-        print("   Iterations: \(results.count)")
-        print("   Average RTF: \(String(format: "%.2f", avgRTF))x")
-        print("   Min RTF: \(String(format: "%.2f", minRTF))x")
-        print("   Max RTF: \(String(format: "%.2f", maxRTF))x")
-        print("   RTF Std Dev: \(String(format: "%.2f", stdDevRTF))x")
-        print("   Average processing time: \(String(format: "%.1f", avgProcessingTime))s")
-        print("   Average speakers detected: \(String(format: "%.1f", Float(avgSpeakers)))")
-        print("   Average segments: \(String(format: "%.1f", Float(avgSegments)))")
-
-        // Performance assessment
-        print("\n🎯 Performance Assessment:")
-        if avgRTF < 0.1 {
-            print("   🚀 EXCELLENT: Real-time factor < 0.1x (10x faster than real-time)")
-        } else if avgRTF < 0.5 {
-            print("   ✅ VERY GOOD: Real-time factor < 0.5x (2x faster than real-time)")
-        } else if avgRTF < 1.0 {
-            print("   👍 GOOD: Real-time factor < 1.0x (faster than real-time)")
-        } else if avgRTF < 2.0 {
-            print("   ⚠️ MODERATE: Real-time factor < 2.0x (slower than real-time)")
-        } else {
-            print("   🐌 SLOW: Real-time factor >= 2.0x (significantly slower than real-time)")
-        }
-
-        // Research comparison
-        print("\n📝 Research Comparison:")
-        print("   Your Results:          \(String(format: "%.2f", avgRTF))x RTF")
-        print("   Pyannote (2021):       0.15x RTF (GPU)")
-        print("   EEND (2019):           0.8x RTF (CPU)")
-        print("   x-vector clustering:   1.2x RTF (CPU)")
-
-        // Detailed timing breakdown if requested
-        if detailed {
-            print("\n🔍 Detailed Timing Analysis:")
-            let sortedRTFs = results.map { $0.realTimeFactor }.sorted()
-            let medianRTF = sortedRTFs[sortedRTFs.count / 2]
-            let p95RTF = sortedRTFs[Int(Double(sortedRTFs.count) * 0.95)]
-            let p99RTF = sortedRTFs[Int(Double(sortedRTFs.count) * 0.99)]
-
-            print("   Median RTF: \(String(format: "%.2f", medianRTF))x")
-            print("   95th percentile RTF: \(String(format: "%.2f", p95RTF))x")
-            print("   99th percentile RTF: \(String(format: "%.2f", p99RTF))x")
-
-            // Consistency analysis
-            let consistency = (1.0 - stdDevRTF / avgRTF) * 100
-            print("   Consistency: \(String(format: "%.1f", consistency))%")
-
-            if consistency > 90 {
-                print("   🎯 EXCELLENT: Very consistent performance")
-            } else if consistency > 80 {
-                print("   ✅ GOOD: Consistent performance")
-            } else if consistency > 70 {
-                print("   ⚠️ MODERATE: Some performance variability")
-            } else {
-                print("   🚨 POOR: High performance variability")
-            }
-        }
-
-        // Optimization suggestions
-        print("\n💡 Optimization Suggestions:")
-        if avgRTF > 1.0 {
-            print("   • Consider reducing clustering threshold for faster processing")
-            print("   • Increase min-duration-on to reduce segment count")
-            print("   • Use GPU acceleration if available")
-            print("   • Consider batch processing for multiple files")
-        } else {
-            print("   • Performance is already excellent!")
-            print("   • Consider increasing accuracy parameters if needed")
-            print("   • Ready for production deployment")
-        }
-    }
-
-    static func saveSpeedTestResults(_ summary: SpeedTestSummary, to file: String) async throws {
-        let encoder = JSONEncoder()
-        encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
-        encoder.dateEncodingStrategy = .iso8601
-
-        let data = try encoder.encode(summary)
-        try data.write(to: URL(fileURLWithPath: file))
-    }
 }
 
 // MARK: - Data Structures
@@ -2120,47 +1386,6 @@ struct BenchmarkSummary: Codable {
     }
 }
 
-struct SpeedTestResult: Codable {
-    let iteration: Int
-    let processingTimeSeconds: TimeInterval
-    let realTimeFactor: Float
-    let speakerCount: Int
-    let segmentCount: Int
-    let audioDurationSeconds: Float
-}
-
-struct SpeedTestSummary: Codable {
-    let audioFile: String
-    let iterations: Int
-    let warmupRuns: Int
-    let averageRTF: Float
-    let averageProcessingTime: Double
-    let minRTF: Float
-    let maxRTF: Float
-    let stdDevRTF: Float
-    let results: [SpeedTestResult]
-    let config: DiarizerConfig
-    let timestamp: Date
-
-    init(
-        audioFile: String, iterations: Int, warmupRuns: Int, averageRTF: Float,
-        averageProcessingTime: Double, minRTF: Float, maxRTF: Float, stdDevRTF: Float,
-        results: [SpeedTestResult], config: DiarizerConfig
-    ) {
-        self.audioFile = audioFile
-        self.iterations = iterations
-        self.warmupRuns = warmupRuns
-        self.averageRTF = averageRTF
-        self.averageProcessingTime = averageProcessingTime
-        self.minRTF = minRTF
-        self.maxRTF = maxRTF
-        self.stdDevRTF = stdDevRTF
-        self.results = results
-        self.config = config
-        self.timestamp = Date()
-    }
-}
-
 struct DiarizationMetrics {
     let der: Float
     let jer: Float
@@ -2450,50 +1675,3 @@ private class AMIMeetingsXMLDelegate: NSObject, XMLParserDelegate {
         parsingError = parseError
     }
 }
-
-struct BatchSpeedTestResult: Codable {
-    let audioFile: String
-    let averageRTF: Float
-    let averageProcessingTime: Double
-    let minRTF: Float
-    let maxRTF: Float
-    let stdDevRTF: Float
-    let results: [SpeedTestResult]
-}
-
-struct BatchSpeedTestSummary: Codable {
-    let audioFiles: [String]
-    let iterations: Int
-    let warmupRuns: Int
-    let overallAverageRTF: Float
-    let overallAverageProcessingTime: Double
-    let overallMinRTF: Float
-    let overallMaxRTF: Float
-    let overallStdDevRTF: Float
-    let totalProcessingTime: Double
-    let totalAudioDuration: Float
-    let results: [BatchSpeedTestResult]
-    let config: DiarizerConfig
-    let timestamp: Date
-
-    init(
-        audioFiles: [String], iterations: Int, warmupRuns: Int, overallAverageRTF: Float,
-        overallAverageProcessingTime: Double, overallMinRTF: Float, overallMaxRTF: Float,
-        overallStdDevRTF: Float, totalProcessingTime: Double, totalAudioDuration: Float,
-        results: [BatchSpeedTestResult], config: DiarizerConfig
-    ) {
-        self.audioFiles = audioFiles
-        self.iterations = iterations
-        self.warmupRuns = warmupRuns
-        self.overallAverageRTF = overallAverageRTF
-        self.overallAverageProcessingTime = overallAverageProcessingTime
-        self.overallMinRTF = overallMinRTF
-        self.overallMaxRTF = overallMaxRTF
-        self.overallStdDevRTF = overallStdDevRTF
-        self.totalProcessingTime = totalProcessingTime
-        self.totalAudioDuration = totalAudioDuration
-        self.results = results
-        self.config = config
-        self.timestamp = Date()
-    }
-}

From 52f1115c9f560b96559e20e9b5e6531797a96a59 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 19:03:20 -0400
Subject: [PATCH 3/6] add time to the benchmark.yml

---
 .github/workflows/benchmark.yml | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 67c5efa2b..33bf13a4b 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -33,23 +33,30 @@ jobs:
         id: benchmark
         run: |
           echo "🚀 Running single file benchmark..."
-          # Run benchmark with ES2004a file and save results to JSON
-          swift run fluidaudio benchmark --auto-download --single-file ES2004a --output benchmark_results.json
+          swift run fluidaudio benchmark --auto-download --single-file ES2004a --output benchmark_results.json | tee benchmark.log
+
+          # Extract total time from CLI output
+          if grep -q "Total benchmark execution time:" benchmark.log; then
+            BENCHMARK_TIME=$(grep "Total benchmark execution time:" benchmark.log | grep -o '[0-9.]*')
+            echo "BENCHMARK_TIME=${BENCHMARK_TIME}" >> $GITHUB_OUTPUT
+          else
+            echo "BENCHMARK_TIME=NA" >> $GITHUB_OUTPUT
+          fi
 
           # Extract key metrics from JSON output
           if [ -f benchmark_results.json ]; then
             # Parse JSON results (using basic tools available in GitHub runners)
             AVERAGE_DER=$(cat benchmark_results.json | grep -o '"averageDER":[0-9]*\.?[0-9]*' | cut -d':' -f2)
-            AVERAGE_JER=$(cat benchmark_results.json | grep -o '"averageJER":[0-9]*\.?[0-9]*' | cut -d':' -f2) 
+            AVERAGE_JER=$(cat benchmark_results.json | grep -o '"averageJER":[0-9]*\.?[0-9]*' | cut -d':' -f2)
             PROCESSED_FILES=$(cat benchmark_results.json | grep -o '"processedFiles":[0-9]*' | cut -d':' -f2)
-            
+
             # Get first result details
             RTF=$(cat benchmark_results.json | grep -o '"realTimeFactor":[0-9]*\.?[0-9]*' | head -1 | cut -d':' -f2)
             DURATION=$(cat benchmark_results.json | grep -o '"durationSeconds":[0-9]*\.?[0-9]*' | head -1 | cut -d':' -f2)
             SPEAKER_COUNT=$(cat benchmark_results.json | grep -o '"speakerCount":[0-9]*' | head -1 | cut -d':' -f2)
-            
+
             echo "DER=${AVERAGE_DER}" >> $GITHUB_OUTPUT
-            echo "JER=${AVERAGE_JER}" >> $GITHUB_OUTPUT  
+            echo "JER=${AVERAGE_JER}" >> $GITHUB_OUTPUT
             echo "RTF=${RTF}" >> $GITHUB_OUTPUT
             echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT
             echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT
@@ -76,15 +83,17 @@ jobs:
               const rtf = parseFloat('${{ steps.benchmark.outputs.RTF }}').toFixed(2);
               const duration = parseFloat('${{ steps.benchmark.outputs.DURATION }}').toFixed(1);
               const speakerCount = '${{ steps.benchmark.outputs.SPEAKER_COUNT }}';
-              
+              const benchmarkTime = '${{ steps.benchmark.outputs.BENCHMARK_TIME }}';
+
               comment += `**Test File:** ES2004a (${duration}s audio)\n\n`;
               comment += '| Metric | Value | Target | Status |\n';
               comment += '|--------|-------|--------|---------|\n';
               comment += `| **DER** (Diarization Error Rate) | ${der}% | < 30% | ${der < 30 ? '✅' : '❌'} |\n`;
               comment += `| **JER** (Jaccard Error Rate) | ${jer}% | < 25% | ${jer < 25 ? '✅' : '❌'} |\n`;
               comment += `| **RTF** (Real-Time Factor) | ${rtf}x | < 1.0x | ${rtf < 1.0 ? '✅' : '❌'} |\n`;
-              comment += `| **Speakers Detected** | ${speakerCount} | - | ℹ️ |\n\n`;
-              
+              comment += `| **Speakers Detected** | ${speakerCount} | - | ℹ️ |\n`;
+              comment += `| **Benchmark Runtime** | ${benchmarkTime}s | - | ℹ️ |\n\n`;
+
               // Performance assessment
               if (der < 20) {
                 comment += '🎉 **Excellent Performance!** - Competitive with state-of-the-art research\n';
@@ -93,12 +102,12 @@ jobs:
               } else {
                 comment += '⚠️ **Performance Below Target** - Consider parameter optimization\n';
               }
-              
+
               comment += '\n📊 **Research Comparison:**\n';
               comment += '- Powerset BCE (2023): 18.5% DER\n';
               comment += '- EEND (2019): 25.3% DER\n';
               comment += '- x-vector clustering: 28.7% DER\n';
-              
+
             } else {
               comment += '❌ **Benchmark Failed**\n\n';
               comment += 'The single file benchmark could not complete successfully. ';

From 9ed9ce353e0f9df809b44ebc4f888974f3743f4c Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 19:59:57 -0400
Subject: [PATCH 4/6] show benchmark_results.json contents

---
 .github/workflows/benchmark.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 33bf13a4b..25159c176 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -68,6 +68,13 @@ jobs:
           fi
         timeout-minutes: 25
 
+      - name: Show benchmark_results.json
+        if: always()
+        run: |
+          echo "--- benchmark_results.json ---"
+          cat benchmark_results.json || echo "benchmark_results.json not found"
+          echo "-----------------------------"
+
       - name: Comment PR with Benchmark Results
         if: always()
         uses: actions/github-script@v7

From f3a13e6f4f22cdc420ff25547123067b792327cf Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 20:20:21 -0400
Subject: [PATCH 5/6] github comment bot failing

---
 .github/workflows/benchmark.yml | 80 ++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 25159c176..399bb2e60 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -75,56 +75,56 @@ jobs:
           cat benchmark_results.json || echo "benchmark_results.json not found"
           echo "-----------------------------"
 
+      - name: Extract benchmark metrics with jq
+        id: extract
+        run: |
+          DER=$(jq '.averageDER' benchmark_results.json)
+          JER=$(jq '.averageJER' benchmark_results.json)
+          RTF=$(jq '.results[0].realTimeFactor' benchmark_results.json)
+          DURATION=$(jq '.results[0].durationSeconds' benchmark_results.json)
+          SPEAKER_COUNT=$(jq '.results[0].speakerCount' benchmark_results.json)
+          echo "DER=${DER}" >> $GITHUB_OUTPUT
+          echo "JER=${JER}" >> $GITHUB_OUTPUT
+          echo "RTF=${RTF}" >> $GITHUB_OUTPUT
+          echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT
+          echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT
+
       - name: Comment PR with Benchmark Results
         if: always()
         uses: actions/github-script@v7
         with:
           script: |
-            const success = '${{ steps.benchmark.outputs.SUCCESS }}' === 'true';
+            const der = parseFloat('${{ steps.extract.outputs.DER }}');
+            const jer = parseFloat('${{ steps.extract.outputs.JER }}');
+            const rtf = parseFloat('${{ steps.extract.outputs.RTF }}');
+            const duration = parseFloat('${{ steps.extract.outputs.DURATION }}').toFixed(1);
+            const speakerCount = '${{ steps.extract.outputs.SPEAKER_COUNT }}';
+            const benchmarkTime = '${{ steps.benchmark.outputs.BENCHMARK_TIME }}';
 
             let comment = '## 🎯 Single File Benchmark Results\n\n';
-
-            if (success) {
-              const der = parseFloat('${{ steps.benchmark.outputs.DER }}').toFixed(1);
-              const jer = parseFloat('${{ steps.benchmark.outputs.JER }}').toFixed(1);
-              const rtf = parseFloat('${{ steps.benchmark.outputs.RTF }}').toFixed(2);
-              const duration = parseFloat('${{ steps.benchmark.outputs.DURATION }}').toFixed(1);
-              const speakerCount = '${{ steps.benchmark.outputs.SPEAKER_COUNT }}';
-              const benchmarkTime = '${{ steps.benchmark.outputs.BENCHMARK_TIME }}';
-
-              comment += `**Test File:** ES2004a (${duration}s audio)\n\n`;
-              comment += '| Metric | Value | Target | Status |\n';
-              comment += '|--------|-------|--------|---------|\n';
-              comment += `| **DER** (Diarization Error Rate) | ${der}% | < 30% | ${der < 30 ? '✅' : '❌'} |\n`;
-              comment += `| **JER** (Jaccard Error Rate) | ${jer}% | < 25% | ${jer < 25 ? '✅' : '❌'} |\n`;
-              comment += `| **RTF** (Real-Time Factor) | ${rtf}x | < 1.0x | ${rtf < 1.0 ? '✅' : '❌'} |\n`;
-              comment += `| **Speakers Detected** | ${speakerCount} | - | ℹ️ |\n`;
-              comment += `| **Benchmark Runtime** | ${benchmarkTime}s | - | ℹ️ |\n\n`;
-
-              // Performance assessment
-              if (der < 20) {
-                comment += '🎉 **Excellent Performance!** - Competitive with state-of-the-art research\n';
-              } else if (der < 30) {
-                comment += '✅ **Good Performance** - Meeting target benchmarks\n';
-              } else {
-                comment += '⚠️ **Performance Below Target** - Consider parameter optimization\n';
-              }
-
-              comment += '\n📊 **Research Comparison:**\n';
-              comment += '- Powerset BCE (2023): 18.5% DER\n';
-              comment += '- EEND (2019): 25.3% DER\n';
-              comment += '- x-vector clustering: 28.7% DER\n';
-
+            comment += `**Test File:** ES2004a (${duration}s audio)\n\n`;
+            comment += '| Metric | Value | Target | Status |\n';
+            comment += '|--------|-------|--------|---------|\n';
+            comment += `| **DER** (Diarization Error Rate) | ${der.toFixed(1)}% | < 30% | ${der < 30 ? '✅' : '❌'} |\n`;
+            comment += `| **JER** (Jaccard Error Rate) | ${jer.toFixed(1)}% | < 25% | ${jer < 25 ? '✅' : '❌'} |\n`;
+            comment += `| **RTF** (Real-Time Factor) | ${rtf.toFixed(2)}x | < 1.0x | ${rtf < 1.0 ? '✅' : '❌'} |\n`;
+            comment += `| **Speakers Detected** | ${speakerCount} | - | ℹ️ |\n`;
+            comment += `| **Benchmark Runtime** | ${benchmarkTime}s | - | ℹ️ |\n\n`;
+
+            // Performance assessment
+            if (der < 20) {
+              comment += '🎉 **Excellent Performance!** - Competitive with state-of-the-art research\n';
+            } else if (der < 30) {
+              comment += '✅ **Good Performance** - Meeting target benchmarks\n';
             } else {
-              comment += '❌ **Benchmark Failed**\n\n';
-              comment += 'The single file benchmark could not complete successfully. ';
-              comment += 'This may be due to:\n';
-              comment += '- Network issues downloading test data\n';
-              comment += '- Model initialization problems\n';
-              comment += '- Audio processing errors\n\n';
-              comment += 'Please check the workflow logs for detailed error information.';
+              comment += '⚠️ **Performance Below Target** - Consider parameter optimization\n';
             }
 
+            comment += '\n📊 **Research Comparison:**\n';
+            comment += '- Powerset BCE (2023): 18.5% DER\n';
+            comment += '- EEND (2019): 25.3% DER\n';
+            comment += '- x-vector clustering: 28.7% DER\n';
+
             comment += '\n\n---\n*Automated benchmark using AMI corpus ES2004a test file*';
 
             github.rest.issues.createComment({

From f8722f0d29a699a80f16564b9b49c4ca24c71f73 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sun, 29 Jun 2025 20:40:53 -0400
Subject: [PATCH 6/6] fix JER

---
 Sources/DiarizationCLI/main.swift | 71 +++++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/Sources/DiarizationCLI/main.swift b/Sources/DiarizationCLI/main.swift
index f8cb77826..3dc0324ea 100644
--- a/Sources/DiarizationCLI/main.swift
+++ b/Sources/DiarizationCLI/main.swift
@@ -786,11 +786,74 @@ struct DiarizationCLI {
     static func calculateJaccardErrorRate(
         predicted: [TimedSpeakerSegment], groundTruth: [TimedSpeakerSegment]
     ) -> Float {
-        let totalGTDuration = groundTruth.reduce(0) { $0 + $1.durationSeconds }
-        let totalPredDuration = predicted.reduce(0) { $0 + $1.durationSeconds }
+        // If no segments in either prediction or ground truth, return 100% error
+        if predicted.isEmpty && groundTruth.isEmpty {
+            return 0.0  // Perfect match - both empty
+        } else if predicted.isEmpty || groundTruth.isEmpty {
+            return 100.0  // Complete mismatch - one empty, one not
+        }
+
+        // Use the same frame size as DER calculation for consistency
+        let frameSize: Float = 0.01
+        let totalDuration = max(
+            predicted.map { $0.endTimeSeconds }.max() ?? 0,
+            groundTruth.map { $0.endTimeSeconds }.max() ?? 0
+        )
+        let totalFrames = Int(totalDuration / frameSize)
+
+        // Get optimal speaker mapping using existing Hungarian algorithm
+        let speakerMapping = findOptimalSpeakerMapping(
+            predicted: predicted,
+            groundTruth: groundTruth,
+            totalDuration: totalDuration
+        )
+
+        var intersectionFrames = 0
+        var unionFrames = 0
+
+        // Calculate frame-by-frame Jaccard
+        for frame in 0..<totalFrames {
+            let frameTime = Float(frame) * frameSize
+
+            let gtSpeaker = findSpeakerAtTime(frameTime, in: groundTruth)
+            let predSpeaker = findSpeakerAtTime(frameTime, in: predicted)
+
+            // Map predicted speaker to ground truth speaker using optimal mapping
+            let mappedPredSpeaker = predSpeaker.flatMap { speakerMapping[$0] }
+
+            switch (gtSpeaker, mappedPredSpeaker) {
+            case (nil, nil):
+                // Both silent - no contribution to intersection or union
+                continue
+            case (nil, _):
+                // Ground truth silent, prediction has speaker
+                unionFrames += 1
+            case (_, nil):
+                // Ground truth has speaker, prediction silent
+                unionFrames += 1
+            case let (gt?, pred?):
+                // Both have speakers
+                unionFrames += 1
+                if gt == pred {
+                    // Same speaker - contributes to intersection
+                    intersectionFrames += 1
+                }
+                // Different speakers - only contributes to union
+            }
+        }
+
+        // Calculate Jaccard Index
+        let jaccardIndex = unionFrames > 0 ? Float(intersectionFrames) / Float(unionFrames) : 0.0
+
+        // Convert to error rate: JER = 1 - Jaccard Index
+        let jer = (1.0 - jaccardIndex) * 100.0
+
+        // Debug logging for first few calculations
+        if predicted.count > 0 && groundTruth.count > 0 {
+            print("🔍 JER DEBUG: Intersection: \(intersectionFrames), Union: \(unionFrames), Jaccard Index: \(String(format: "%.3f", jaccardIndex)), JER: \(String(format: "%.1f", jer))%")
+        }
 
-        let durationDiff = abs(totalGTDuration - totalPredDuration)
-        return (durationDiff / max(totalGTDuration, totalPredDuration)) * 100
+        return jer
     }
 
     static func findSpeakerAtTime(_ time: Float, in segments: [TimedSpeakerSegment]) -> String? {