## 1. Setup and Installation

First, let's install the required packages (uncomment if needed):

In [None]:
# !pip install -r ../requirements.txt

## 2. Import Libraries

In [None]:
import sys
import os
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path.cwd().parent))

from src.audio_processor import AudioProcessor
from src.transcriber import Transcriber
from src.grammar_scorer import GrammarScorer
from src.utils import load_config, save_json, get_audio_files
from main import GrammarScoringEngine

import warnings
warnings.filterwarnings('ignore')

print("✓ Libraries imported successfully!")

## 3. Initialize Components

Let's initialize the Grammar Scoring Engine:

In [None]:
# Initialize the complete engine
engine = GrammarScoringEngine(config_path='../config.yaml')

print("\n✓ Grammar Scoring Engine ready!")

## 4. Download Sample Data from Kaggle (Optional)

If you want to use Kaggle datasets:

In [None]:
from src.kaggle_loader import setup_kaggle_credentials, show_recommended_datasets

# Show recommended datasets
show_recommended_datasets()

# Check Kaggle credentials
setup_kaggle_credentials()

In [None]:
# Uncomment to download a dataset
# from src.kaggle_loader import download_dataset
# dataset_path = download_dataset('google/speech_commands', '../data/raw')
# print(f"Dataset downloaded to: {dataset_path}")

## 5. Test with Sample Audio

Let's test the engine with a sample audio file:

In [None]:
# Example: Process a single audio file
# Replace with your actual audio file path
audio_file = "path/to/your/audio.wav"

# Check if file exists
if os.path.exists(audio_file):
    result = engine.score_audio(audio_file, save_results=True)
    
    print("\n" + "="*70)
    print("RESULTS")
    print("="*70)
    print(f"Score: {result['score']:.2f}/100")
    print(f"Grade: {result['grade']}")
    print(f"Errors Found: {result['error_count']}")
    print(f"Word Count: {result['word_count']}")
else:
    print(f"⚠ Audio file not found: {audio_file}")
    print("Please update the audio_file variable with a valid path.")

## 6. Test Individual Components

You can also test components separately:

### 6.1 Test Audio Processing

In [None]:
# Test audio processor
audio_processor = AudioProcessor(sample_rate=16000)

# If you have an audio file, get its info
# audio_info = audio_processor.get_audio_info("your_audio.wav")
# print(audio_info)

### 6.2 Test Grammar Scoring with Text

In [None]:
# Test grammar scorer with sample text
scorer = GrammarScorer(language="en-US", use_language_tool=True)

sample_text = """
This is a sample text for testing the grammar scoring engine.
The engine can detect various types of errors including spelling mistakes,
grammatical errors, and punctuation issues.
"""

# Score the text
result = scorer.score_text(sample_text)

print("\nGrammar Analysis Results:")
print(f"Overall Score: {result['score']:.2f}/100")
print(f"Grade: {result['grade']}")
print(f"\nComponent Scores:")
for component, score in result['component_scores'].items():
    print(f"  {component.capitalize()}: {score:.2f}")

print(f"\nErrors Found: {result['error_count']}")
if result['error_count'] > 0:
    print("\nError Details:")
    for i, error in enumerate(result['grammar_errors'][:5], 1):
        print(f"  {i}. {error['message']}")

### 6.3 Generate Detailed Feedback

In [None]:
# Generate human-readable feedback
feedback = scorer.generate_feedback(result)
print(feedback)

## 7. Batch Processing

Process multiple audio files at once:

In [None]:
# Process all audio files in a directory
audio_directory = "../data/raw"  # Update with your directory

if os.path.exists(audio_directory):
    results = engine.score_batch(audio_directory)
    
    # Display summary
    if results:
        scores = [r.get('score', 0) for r in results if 'score' in r]
        print(f"\nProcessed {len(results)} files")
        print(f"Average Score: {sum(scores)/len(scores):.2f}")
else:
    print(f"Directory not found: {audio_directory}")

## 8. Visualize Results (Optional)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# If you have batch results, visualize them
# Uncomment the following if you have results

# scores_data = [
#     {
#         'file': r['file_name'],
#         'score': r['score'],
#         'errors': r['error_count']
#     }
#     for r in results if 'score' in r
# ]

# df = pd.DataFrame(scores_data)

# # Plot scores
# fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# df['score'].plot(kind='bar', ax=ax1, color='skyblue')
# ax1.set_title('Grammar Scores by File')
# ax1.set_xlabel('File Index')
# ax1.set_ylabel('Score')
# ax1.axhline(y=75, color='r', linestyle='--', label='Good Threshold')
# ax1.legend()

# df['errors'].plot(kind='bar', ax=ax2, color='coral')
# ax2.set_title('Error Count by File')
# ax2.set_xlabel('File Index')
# ax2.set_ylabel('Number of Errors')

# plt.tight_layout()
# plt.show()

## 9. Export Results

In [None]:
# Results are automatically saved to the results directory
# You can also manually save specific results

# from src.utils import save_json, create_detailed_report

# if 'result' in locals():
#     save_json(result, '../results/custom_result.json')
#     create_detailed_report(result, '../results/custom_report.txt')
#     print("✓ Results exported!")

## Next Steps

1. **Customize Configuration**: Edit `config.yaml` to adjust weights and settings
2. **Add More Audio Files**: Place audio files in the `data/` directory
3. **Explore Results**: Check the `results/` directory for detailed reports
4. **Fine-tune Scoring**: Adjust component weights based on your requirements

For more information, see the README.md file.