## Step 1: Install Dependencies

This will install all required packages.

In [None]:
!pip install -q openai-whisper librosa soundfile language-tool-python textstat pyyaml numpy scipy

## Step 2: Import Required Libraries

In [None]:
import os
import json
import whisper
import librosa
import numpy as np
import warnings
from pathlib import Path
from typing import Dict, List, Optional, Tuple

warnings.filterwarnings('ignore')
print("‚úì Libraries imported successfully!")

## Step 3: Audio Processor Class

Handles loading and preprocessing of audio files.

In [None]:
class AudioProcessor:
    """Process audio files for speech-to-text transcription."""
    
    def __init__(self, sample_rate: int = 16000, max_duration: int = 300):
        self.sample_rate = sample_rate
        self.max_duration = max_duration
    
    def load_audio(self, audio_path: str) -> Tuple[np.ndarray, int]:
        """Load audio file from path."""
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")
        
        audio, sr = librosa.load(audio_path, sr=self.sample_rate, mono=True)
        return audio, sr
    
    def preprocess_audio(self, audio: np.ndarray) -> np.ndarray:
        """Preprocess audio data."""
        # Trim silence
        audio, _ = librosa.effects.trim(audio, top_db=20)
        
        # Limit duration
        max_samples = self.sample_rate * self.max_duration
        if len(audio) > max_samples:
            audio = audio[:max_samples]
        
        # Normalize
        audio = librosa.util.normalize(audio)
        return audio
    
    def process_audio_file(self, audio_path: str) -> Tuple[np.ndarray, dict]:
        """Complete processing pipeline for an audio file."""
        audio, sr = self.load_audio(audio_path)
        
        duration = len(audio) / sr
        info = {
            'path': str(audio_path),
            'sample_rate': sr,
            'duration_seconds': duration,
            'samples': len(audio)
        }
        
        processed_audio = self.preprocess_audio(audio)
        info['processed_duration'] = len(processed_audio) / sr
        
        return processed_audio, info

print("‚úì AudioProcessor class defined")

## Step 4: Transcriber Class

Converts speech to text using OpenAI Whisper.

In [None]:
class Transcriber:
    """Transcribe audio to text using Whisper."""
    
    def __init__(self, model_name: str = "base", device: str = "cpu", language: str = "en"):
        self.model_name = model_name
        self.language = language
        self.device = device
        
        print(f"Loading Whisper model '{model_name}' on {device}...")
        self.model = whisper.load_model(model_name, device=device)
        print("‚úì Model loaded successfully!")
    
    def transcribe(self, audio: np.ndarray) -> str:
        """Transcribe audio array to text."""
        result = self.model.transcribe(audio, language=self.language, fp16=False)
        return result['text'].strip()
    
    def transcribe_file(self, audio_path: str) -> Dict:
        """Transcribe audio file and return detailed results."""
        audio_processor = AudioProcessor()
        audio, info = audio_processor.process_audio_file(audio_path)
        
        text = self.transcribe(audio)
        
        return {
            'text': text,
            'file_path': str(audio_path),
            'file_name': Path(audio_path).name,
            'model': self.model_name,
            'word_count': len(text.split()),
            'duration': info['duration_seconds']
        }

print("‚úì Transcriber class defined")

## Step 5: Grammar Scorer Class

Analyzes text for grammar quality and provides scoring.

In [None]:
class GrammarScorer:
    """Analyze text for grammar quality and provide detailed scoring."""
    
    def __init__(self, language: str = "en-US", use_language_tool: bool = True):
        self.language = language
        self.use_language_tool = use_language_tool
        self.grammar_tool = None
        
        if use_language_tool:
            try:
                import language_tool_python
                print(f"Initializing LanguageTool for {language}...")
                self.grammar_tool = language_tool_python.LanguageTool(language)
                print("‚úì LanguageTool initialized!")
            except Exception as e:
                print(f"Warning: Could not initialize LanguageTool: {e}")
                self.use_language_tool = False
    
    def check_grammar(self, text: str) -> List[Dict]:
        """Check text for grammar errors."""
        if not self.use_language_tool or self.grammar_tool is None:
            return []
        
        try:
            matches = self.grammar_tool.check(text)
            errors = []
            for match in matches:
                errors.append({
                    'message': match.message,
                    'category': match.category,
                    'suggestions': match.replacements[:3],
                    'context': match.context
                })
            return errors
        except:
            return []
    
    def analyze_vocabulary(self, text: str) -> Dict:
        """Analyze vocabulary richness."""
        import re
        words = re.findall(r'\b\w+\b', text.lower())
        
        if not words:
            return {'word_count': 0, 'unique_words': 0, 'lexical_diversity': 0.0}
        
        unique_words = set(words)
        lexical_diversity = len(unique_words) / len(words)
        
        return {
            'word_count': len(words),
            'unique_words': len(unique_words),
            'lexical_diversity': lexical_diversity,
            'avg_word_length': np.mean([len(w) for w in words])
        }
    
    def analyze_sentence_structure(self, text: str) -> Dict:
        """Analyze sentence structure."""
        import re
        sentences = re.split(r'[.!?]+', text)
        sentences = [s.strip() for s in sentences if s.strip()]
        
        if not sentences:
            return {'sentence_count': 0, 'avg_sentence_length': 0}
        
        sentence_lengths = [len(s.split()) for s in sentences]
        
        return {
            'sentence_count': len(sentences),
            'avg_sentence_length': np.mean(sentence_lengths),
            'max_sentence_length': max(sentence_lengths),
            'min_sentence_length': min(sentence_lengths)
        }
    
    def calculate_readability(self, text: str) -> Dict:
        """Calculate readability scores."""
        try:
            import textstat
            flesch_score = textstat.flesch_reading_ease(text)
            return {'flesch_reading_ease': max(0, min(100, flesch_score))}
        except:
            # Simplified calculation if textstat fails
            import re
            sentences = re.split(r'[.!?]+', text)
            sentences = [s.strip() for s in sentences if s.strip()]
            words = text.split()
            
            if not sentences or not words:
                return {'flesch_reading_ease': 50}
            
            avg_words = len(words) / len(sentences)
            score = max(0, min(100, 100 - abs(avg_words - 15) * 2))
            return {'flesch_reading_ease': score}
    
    def score_text(self, text: str, weights: Optional[Dict] = None) -> Dict:
        """Comprehensive grammar scoring of text."""
        if not text or not text.strip():
            return {'score': 0.0, 'grade': 'N/A', 'error': 'Empty text'}
        
        if weights is None:
            weights = {
                'grammar_errors': 0.40,
                'sentence_structure': 0.20,
                'vocabulary_richness': 0.20,
                'readability': 0.20
            }
        
        # Check grammar
        grammar_errors = self.check_grammar(text)
        vocab_analysis = self.analyze_vocabulary(text)
        sentence_analysis = self.analyze_sentence_structure(text)
        readability = self.calculate_readability(text)
        
        # Calculate component scores
        word_count = vocab_analysis['word_count']
        error_rate = len(grammar_errors) / max(word_count, 1) * 100
        grammar_score = max(0, 100 - error_rate * 10)
        
        avg_length = sentence_analysis['avg_sentence_length']
        structure_score = 100
        if avg_length < 5:
            structure_score = 60
        elif avg_length > 30:
            structure_score = 70
        
        lexical_div = vocab_analysis['lexical_diversity']
        vocab_score = min(100, lexical_div * 200)
        
        readability_score = readability.get('flesch_reading_ease', 50)
        
        # Calculate final score
        final_score = (
            grammar_score * weights['grammar_errors'] +
            structure_score * weights['sentence_structure'] +
            vocab_score * weights['vocabulary_richness'] +
            readability_score * weights['readability']
        )
        
        # Determine grade
        if final_score >= 90:
            grade = 'A (Excellent)'
        elif final_score >= 75:
            grade = 'B (Good)'
        elif final_score >= 60:
            grade = 'C (Average)'
        elif final_score >= 40:
            grade = 'D (Poor)'
        else:
            grade = 'F (Very Poor)'
        
        return {
            'score': round(final_score, 2),
            'grade': grade,
            'grammar_errors': grammar_errors,
            'error_count': len(grammar_errors),
            'sentence_analysis': sentence_analysis,
            'vocabulary_analysis': vocab_analysis,
            'readability': readability,
            'component_scores': {
                'grammar': round(grammar_score, 2),
                'structure': round(structure_score, 2),
                'vocabulary': round(vocab_score, 2),
                'readability': round(readability_score, 2)
            },
            'text': text,
            'word_count': word_count
        }

print("‚úì GrammarScorer class defined")

## Step 6: Main Grammar Scoring Engine

Combines all components into a complete system.

In [None]:
class GrammarScoringEngine:
    """Complete Grammar Scoring Engine from Voice Samples."""
    
    def __init__(self, whisper_model: str = "base"):
        print("Initializing Grammar Scoring Engine...")
        print("="*60)
        
        self.transcriber = Transcriber(model_name=whisper_model)
        self.grammar_scorer = GrammarScorer(language="en-US")
        
        print("="*60)
        print("‚úì Grammar Scoring Engine initialized!\n")
    
    def score_audio(self, audio_path: str) -> Dict:
        """Process audio file and score grammar."""
        print(f"\nProcessing: {Path(audio_path).name}")
        print("-" * 60)
        
        # Transcribe
        print("1. Transcribing audio...")
        transcription_result = self.transcriber.transcribe_file(audio_path)
        text = transcription_result['text']
        print(f"   Transcribed: {text}")
        print(f"   Word count: {transcription_result['word_count']}")
        
        # Score grammar
        print("\n2. Analyzing grammar...")
        scoring_result = self.grammar_scorer.score_text(text)
        
        # Combine results
        complete_result = {
            'file_name': Path(audio_path).name,
            'file_path': str(audio_path),
            'transcription': transcription_result,
            **scoring_result
        }
        
        # Display results
        print("\n" + "="*60)
        print("RESULTS")
        print("="*60)
        print(f"Overall Score: {scoring_result['score']:.2f}/100")
        print(f"Grade: {scoring_result['grade']}")
        print(f"Errors Found: {scoring_result['error_count']}")
        print(f"Word Count: {scoring_result['word_count']}")
        
        if scoring_result['error_count'] > 0:
            print(f"\nTop Errors:")
            for i, error in enumerate(scoring_result['grammar_errors'][:3], 1):
                print(f"  {i}. {error['message']}")
        
        print("\nComponent Scores:")
        for component, score in scoring_result['component_scores'].items():
            print(f"  - {component.capitalize()}: {score:.2f}/100")
        
        print("="*60 + "\n")
        
        return complete_result

print("‚úì GrammarScoringEngine class defined")

## Step 7: Check Available Datasets

**Important:** Before running this cell, add a speech dataset using the **"+ Add Data"** button on the right sidebar.

Recommended datasets:
- Search for: "speech commands"
- Or use any audio dataset with `.wav` files

In [None]:
# Check available datasets in Kaggle input
input_dir = '/kaggle/input'

if os.path.exists(input_dir):
    print("Available datasets in Kaggle input:")
    print("="*60)
    for item in os.listdir(input_dir):
        dataset_path = os.path.join(input_dir, item)
        print(f"\nüìÅ {item}")
        
        # Show first few items in dataset
        if os.path.isdir(dataset_path):
            contents = os.listdir(dataset_path)[:5]
            for content in contents:
                print(f"   - {content}")
            if len(os.listdir(dataset_path)) > 5:
                print(f"   ... and {len(os.listdir(dataset_path)) - 5} more items")
    print("\n" + "="*60)
else:
    print("‚ö†Ô∏è  No input data found!")
    print("Please add a dataset using the '+ Add Data' button.")

## Step 8: Initialize the Engine

Create an instance of the Grammar Scoring Engine.

In [None]:
# Initialize the engine
# Use 'tiny' for faster processing, 'base' for better accuracy
engine = GrammarScoringEngine(whisper_model='base')

## Step 9: Process a Single Audio File

**Update the path below** to match your dataset structure.

In [None]:
# Example: Process one audio file
# UPDATE THIS PATH to match your dataset!
audio_file = '/kaggle/input/google-speech-commands/cat/004ae714_nohash_0.wav'

if os.path.exists(audio_file):
    result = engine.score_audio(audio_file)
    
    # Show full result as JSON
    print("\nFull Result (JSON):")
    print(json.dumps(result, indent=2))
else:
    print(f"‚ùå File not found: {audio_file}")
    print("\nPlease update the path to match your dataset structure.")
    print("Example paths:")
    print("  - /kaggle/input/your-dataset-name/audio.wav")
    print("  - /kaggle/input/speech-commands/cat/audio.wav")

## Step 10: Batch Processing (Optional)

Process multiple audio files at once.

In [None]:
def process_directory(directory: str, limit: int = 5):
    """Process multiple audio files from a directory."""
    results = []
    
    # Find audio files
    audio_files = list(Path(directory).glob('*.wav'))[:limit]
    
    if not audio_files:
        print(f"No .wav files found in {directory}")
        return results
    
    print(f"\nProcessing {len(audio_files)} audio files...\n")
    
    for i, audio_file in enumerate(audio_files, 1):
        print(f"\n{'='*60}")
        print(f"File {i}/{len(audio_files)}")
        print(f"{'='*60}")
        
        try:
            result = engine.score_audio(str(audio_file))
            results.append(result)
        except Exception as e:
            print(f"‚ùå Error processing {audio_file.name}: {e}")
    
    # Summary
    if results:
        print("\n" + "="*60)
        print("BATCH PROCESSING SUMMARY")
        print("="*60)
        
        scores = [r['score'] for r in results if 'score' in r]
        if scores:
            print(f"Files processed: {len(results)}")
            print(f"Average score: {sum(scores)/len(scores):.2f}")
            print(f"Highest score: {max(scores):.2f}")
            print(f"Lowest score: {min(scores):.2f}")
        print("="*60)
    
    return results

# Example usage (uncomment and update path):
# results = process_directory('/kaggle/input/speech-commands/cat', limit=3)

## Step 11: Test with Custom Text (No Audio)

You can also test the grammar scorer directly with text.

In [None]:
# Test grammar scoring with sample text
sample_text = """
The quick brown fox jumps over the lazy dog. 
This is a well-written sentence with proper grammar and punctuation.
Machine learning algorithms can process vast amounts of data efficiently.
"""

scorer = GrammarScorer()
result = scorer.score_text(sample_text)

print("\nGrammar Analysis of Sample Text:")
print("="*60)
print(f"Score: {result['score']:.2f}/100")
print(f"Grade: {result['grade']}")
print(f"Errors: {result['error_count']}")
print(f"Words: {result['word_count']}")
print("="*60)

## Next Steps

1. **Add a speech dataset** using the "+ Add Data" button
2. **Update the audio file paths** in Step 9 to match your dataset
3. **Run all cells** sequentially from top to bottom
4. **Experiment** with different Whisper models (tiny, base, small)
5. **Process multiple files** using the batch processing function in Step 10

## Tips

- Use `whisper_model='tiny'` for faster processing
- Use `whisper_model='small'` or `'base'` for better accuracy
- Enable GPU in Settings ‚Üí Accelerator for faster transcription
- Single-word audio clips will have limited grammar analysis (expected behavior)

## GitHub Repository

Full source code: [Add your GitHub URL here after pushing]

---

**Built with:** OpenAI Whisper, LanguageTool, Librosa