In [2]:
import speech_recognition as sr
import time
from collections import Counter, deque
import threading
import re
import queue

class ComprehensiveSpeechAnalyzer:
    def __init__(self):
        # Initialize speech recognizer with optimal settings
        self.recognizer = sr.Recognizer()
        self.recognizer.energy_threshold = 300
        self.recognizer.dynamic_energy_threshold = True
        self.recognizer.pause_threshold = 0.5
        
        # Filler word patterns with regex for contextual detection
        self.filler_patterns = [
            r'\b(um|umm|uh|uhh)\b',  # Common verbal pauses
            r'\b(er|erm|eh)\b',  # Hesitation sounds
            r'\b(ah|ahm)\b',  # More hesitation sounds
            r'\blike\b(?! (to|the|a|an|this|that|it|as))',  # "Like" when used as filler
            r'\b(you\s*know)\b',  # "You know" filler phrase
            r'\b(i\s*mean)\b(?! to)',  # "I mean" as filler
            r'\b(sort\s*of|kind\s*of)\b',  # Hedge phrases
            r'\b(basically|literally|actually)\b',  # Common unnecessary adverbs
            r'\bjust\b(?! (a|the|one|for|to|in|as))',  # "Just" when used as filler
            r'\bso\b(?= (um|uh|like|yeah|anyway))',  # "So" when followed by another filler
            r'\b(right|yeah|okay)\b(?! (now|away|then|there|here|after))'  # Confirmation fillers
        ]
        
        # Statistics
        self.filler_count = Counter()
        self.repeat_patterns = Counter()
        self.total_words = 0
        self.total_sentences = 0
        self.start_time = None
        self.running = False
        
        # For repetition detection
        self.recent_segments = deque(maxlen=10)  # Store recent text segments
        self.recent_phrases = {}  # Track recent phrases for repetition detection
        
        # For pause detection
        self.audio_queue = queue.Queue()
        self.last_speech_time = None
        self.pauses = []
        
        # For continuous speech monitoring
        self.last_text = ""
        self.latest_transcription = ""
    
    def start_detection(self):
        """Start the comprehensive speech analysis"""
        self.running = True
        self.start_time = time.time()
        self.last_speech_time = time.time()
        
        print("Starting comprehensive speech analysis...")
        print("CALIBRATING: Please remain quiet for 2 seconds")
        
        # Thread for audio capture
        self.audio_thread = threading.Thread(target=self._audio_capture_loop)
        self.audio_thread.daemon = True
        self.audio_thread.start()
        
        # Thread for speech processing
        self.processing_thread = threading.Thread(target=self._speech_processing_loop)
        self.processing_thread.daemon = True
        self.processing_thread.start()
        
        # Thread for pause detection
        self.pause_thread = threading.Thread(target=self._pause_detection_loop)
        self.pause_thread.daemon = True
        self.pause_thread.start()
    
    def stop_detection(self):
        """Stop the analysis process"""
        self.running = False
        for thread in [getattr(self, attr) for attr in ['audio_thread', 'processing_thread', 'pause_thread'] 
                      if hasattr(self, attr)]:
            if thread.is_alive():
                thread.join(timeout=1)
        self._show_final_results()
    
    def _audio_capture_loop(self):
        """Continuously capture audio in small chunks"""
        with sr.Microphone() as source:
            # Adjust for ambient noise
            self.recognizer.adjust_for_ambient_noise(source, duration=2)
            print("\nLISTENING: Speak normally - analyzing your speech patterns")
            print("Press Ctrl+C to stop and see final results\n")
            
            while self.running:
                try:
                    # Use shorter phrase_time_limit for better responsiveness
                    audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=2.5)
                    # Update last speech time
                    self.last_speech_time = time.time()
                    # Add to processing queue
                    self.audio_queue.put(audio)
                    
                except sr.WaitTimeoutError:
                    # No speech detected - might be a pause
                    pass
                except Exception as e:
                    if self.running:
                        print(f"Audio capture error: {e}")
    
    def _pause_detection_loop(self):
        """Dedicated thread for detecting long pauses"""
        pause_threshold = 1.5  # Consider pauses longer than 1.5 seconds
        while self.running:
            current_time = time.time()
            if self.last_speech_time and (current_time - self.last_speech_time) > pause_threshold:
                pause_duration = current_time - self.last_speech_time
                if pause_duration < 10:  # Ignore extremely long pauses (likely not speaking)
                    self.pauses.append(pause_duration)
                    print(f"\n[PAUSE DETECTED: {pause_duration:.1f} seconds]")
                    # Reset to avoid detecting the same pause multiple times
                    self.last_speech_time = current_time
            time.sleep(0.2)  # Check for pauses 5 times per second
    
    def _speech_processing_loop(self):
        """Process audio segments and analyze speech patterns"""
        while self.running:
            try:
                if not self.audio_queue.empty():
                    audio = self.audio_queue.get(block=False)
                    try:
                        # Try Google recognition first for better accuracy
                        text = self.recognizer.recognize_google(audio).lower()
                        self._analyze_speech(text)
                    except:
                        try:
                            # Fall back to Sphinx if Google fails
                            text = self.recognizer.recognize_sphinx(audio).lower()
                            self._analyze_speech(text)
                        except:
                            pass
            except queue.Empty:
                pass
            except Exception as e:
                if self.running:
                    print(f"Processing error: {e}")
            time.sleep(0.1)
    
    def _analyze_speech(self, text):
        """Comprehensive analysis of speech patterns"""
        if not text.strip():
            return
            
        # Update the latest transcription
        self.latest_transcription = text
        
        # Count sentences and words
        sentences = re.split(r'[.!?]+', text)
        valid_sentences = [s for s in sentences if s.strip()]
        self.total_sentences += len(valid_sentences)
        
        words = re.findall(r'\b\w+\b', text)
        self.total_words += len(words)
        
        # Store recent segments for repetition detection
        self.recent_segments.append(text)
        
        # ---- FILLER WORD DETECTION ----
        found_fillers = []
        for pattern in self.filler_patterns:
            matches = re.finditer(pattern, text)
            for match in matches:
                filler = match.group().strip()
                self.filler_count[filler] += 1
                found_fillers.append((filler, match.start()))
        
        # ---- REPETITION DETECTION ----
        repeated_phrases = self._detect_repetitions(text)
        
        # Prepare output with highlighting
        highlighted_text = text
        
        # Highlight filler words
        for filler, _ in sorted(found_fillers, key=lambda x: x[1], reverse=True):
            highlighted_text = re.sub(r'\b' + re.escape(filler) + r'\b', 
                                   f"[FILLER: {filler}]", 
                                   highlighted_text)
        
        # Highlight repetitions
        for phrase in repeated_phrases:
            if len(phrase.split()) > 1:  # Only highlight multi-word repetitions
                highlighted_text = highlighted_text.replace(phrase, f"[REPEATED: {phrase}]")
        
        # Display the results
        if found_fillers or repeated_phrases:
            print(f"\nDetected: {highlighted_text}")
            
            # Show mini report if issues found
            self._show_mini_report(found_fillers, repeated_phrases)
    
    def _detect_repetitions(self, text):
        """Detect repeated phrases and words in recent speech"""
        repeated_phrases = []
        
        # Extract n-grams (2-4 words) from the current text
        words = text.split()
        phrases = []
        
        # Get 2-word phrases
        for i in range(len(words) - 1):
            phrases.append(' '.join(words[i:i+2]))
        
        # Get 3-word phrases if enough words
        if len(words) >= 3:
            for i in range(len(words) - 2):
                phrases.append(' '.join(words[i:i+3]))
        
        # Get 4-word phrases if enough words
        if len(words) >= 4:
            for i in range(len(words) - 3):
                phrases.append(' '.join(words[i:i+4]))
        
        # Check current phrases against recent speech
        for phrase in phrases:
            if len(phrase.split()) < 2:  # Skip single words
                continue
                
            # Skip very short phrases and common phrases
            if len(phrase) < 5 or phrase in ['i think that', 'and then the', 'going to be']:
                continue
                
            # Check if this phrase appeared in recent segments
            for old_segment in list(self.recent_segments)[:-1]:  # All except current
                if phrase in old_segment:
                    # This is a repeated phrase
                    repeated_phrases.append(phrase)
                    self.repeat_patterns[phrase] += 1
                    break
        
        return repeated_phrases
    
    def _show_mini_report(self, fillers, repetitions):
        """Show real-time feedback on speech patterns"""
        issues = []
        
        # Report on fillers
        if fillers:
            filler_str = ', '.join([f[0] for f in fillers[:3]])
            if len(fillers) > 3:
                filler_str += f" (+{len(fillers)-3} more)"
            issues.append(f"Fillers: {filler_str}")
        
        # Report on repetitions
        if repetitions:
            rep_str = ', '.join([f'"{r}"' for r in repetitions[:2]])
            if len(repetitions) > 2:
                rep_str += f" (+{len(repetitions)-2} more)"
            issues.append(f"Repetitions: {rep_str}")
        
        # Show combined report
        if issues:
            print(f"  --> Issues detected: {' | '.join(issues)}")
    
    def _show_final_results(self):
        """Display comprehensive final analysis"""
        elapsed_minutes = (time.time() - self.start_time) / 60
        total_fillers = sum(self.filler_count.values())
        total_repetitions = sum(self.repeat_patterns.values())
        
        print("\n" + "="*70)
        print("               COMPREHENSIVE SPEECH PATTERN ANALYSIS")
        print("="*70)
        print(f"Session Duration: {elapsed_minutes:.2f} minutes")
        print(f"Words Spoken: {self.total_words}")
        print(f"Sentences: {self.total_sentences}")
        
        # SECTION: Filler Words
        print("\n" + "-"*70)
        print("FILLER WORD ANALYSIS")
        print("-"*70)
        print(f"Total Filler Words: {total_fillers}")
        
        if self.total_words > 0:
            print(f"Filler Word Frequency: {(total_fillers/self.total_words)*100:.2f}% of total words")
        
        if elapsed_minutes > 0:
            print(f"Filler Rate: {total_fillers/elapsed_minutes:.2f} per minute")
        
        # Show breakdown of filler words
        if total_fillers > 0:
            print("\nFiller Word Distribution:")
            print(f"{"Filler Word":<15} | {"Count":<5} | {"Percentage":<10}")
            print("-" * 40)
            for word, count in self.filler_count.most_common(10):  # Top 10
                percentage = (count / total_fillers) * 100
                print(f"{word:<15} | {count:<5} | {percentage:>8.1f}%")
        
        # SECTION: Repetitions
        print("\n" + "-"*70)
        print("REPETITION ANALYSIS")
        print("-"*70)
        print(f"Total Repeated Phrases: {total_repetitions}")
        
        if total_repetitions > 0:
            print("\nMost Common Repetitions:")
            print(f"{"Repeated Phrase":<25} | {"Count":<5}")
            print("-" * 40)
            for phrase, count in self.repeat_patterns.most_common(8):  # Top 8
                print(f"{phrase:<25} | {count:<5}")
        
        # SECTION: Pauses
        print("\n" + "-"*70)
        print("PAUSE ANALYSIS")
        print("-"*70)
        if self.pauses:
            avg_pause = sum(self.pauses) / len(self.pauses)
            print(f"Number of Long Pauses: {len(self.pauses)}")
            print(f"Average Pause Duration: {avg_pause:.2f} seconds")
            print(f"Longest Pause: {max(self.pauses):.2f} seconds")
            print(f"Pause Frequency: {len(self.pauses)/elapsed_minutes:.2f} per minute")
        else:
            print("No significant pauses detected.")
        
        # SECTION: Overall Assessment
        print("\n" + "-"*70)
        print("OVERALL SPEECH ASSESSMENT")
        print("-"*70)
        
        # Calculate overall fluency score (0-100)
        filler_score = max(0, 100 - (total_fillers/max(self.total_words, 1))*200)
        repetition_score = max(0, 100 - (total_repetitions/max(self.total_sentences, 1))*50)
        pause_score = 100
        if self.pauses:
            pause_score = max(0, 100 - (len(self.pauses)/elapsed_minutes)*10)
        
        overall_score = (filler_score * 0.4) + (repetition_score * 0.4) + (pause_score * 0.2)
        
        # Display score
        print(f"Fluency Score: {overall_score:.1f}/100")
        
        # Determine assessment level
        if overall_score >= 90:
            assessment = "Excellent"
        elif overall_score >= 75:
            assessment = "Very Good"
        elif overall_score >= 60:
            assessment = "Good"
        elif overall_score >= 40:
            assessment = "Fair"
        else:
            assessment = "Needs Improvement"
        
        print(f"Assessment: {assessment}")
        
        # Specific improvement recommendations
        print("\nIMPROVEMENT RECOMMENDATIONS:")
        recommendations = []
        
        if total_fillers > 0:
            top_fillers = self.filler_count.most_common(2)
            recommendations.append(f"• Reduce use of the filler words: {', '.join([w for w, _ in top_fillers])}")
        
        if total_repetitions > 0:
            recommendations.append(f"• Watch for repetitive phrases in your speech")
        
        if self.pauses and len(self.pauses)/elapsed_minutes > 3:
            recommendations.append(f"• Work on reducing frequency of long pauses")
        elif not self.pauses and total_fillers > 0:
            recommendations.append(f"• Try using strategic pauses instead of filler words")
        
        if not recommendations:
            recommendations.append("• Continue your excellent speaking patterns")
            recommendations.append("• Consider increasing speaking tempo for more dynamic delivery")
        
        for rec in recommendations:
            print(rec)


# Example usage
if __name__ == "__main__":
    analyzer = ComprehensiveSpeechAnalyzer()
    try:
        analyzer.start_detection()
        # Keep main thread alive
        while analyzer.running:
            time.sleep(0.5)
    except KeyboardInterrupt:
        print("\n\nStopping analysis...")
        analyzer.stop_detection()

Starting comprehensive speech analysis...
CALIBRATING: Please remain quiet for 2 seconds

[PAUSE DETECTED: 1.6 seconds]

LISTENING: Speak normally - analyzing your speech patterns
Press Ctrl+C to stop and see final results


[PAUSE DETECTED: 1.6 seconds]

[PAUSE DETECTED: 1.6 seconds]

Detected: [FILLER: you know] [FILLER: basically] so what is happening
  --> Issues detected: Fillers: you know, basically

[PAUSE DETECTED: 1.6 seconds]

[PAUSE DETECTED: 1.6 seconds]

Detected: [FILLER: [REPEATED: you know]] [FILLER: basically] you
  --> Issues detected: Fillers: you know, basically | Repetitions: "you know", "know basically" (+1 more)

Detected: [FILLER: [REPEATED: you know]] [FILLER: basically] [REPEATED: what is]
  --> Issues detected: Fillers: you know, basically | Repetitions: "you know", "know basically" (+2 more)

[PAUSE DETECTED: 1.7 seconds]

Detected: [FILLER: basically] [FILLER: [REPEATED: you know]]
  --> Issues detected: Fillers: you know, basically | Repetitions: "basicall