In [2]:
import azure.cognitiveservices.speech as speechsdk
import sys, os
sys.path.insert(0, '..')
from src.config_loader import Config

In [4]:
config = Config('../config/settings.yaml', creds_path='../config/.env')
config.load_creds()
speech_config = speechsdk.SpeechConfig(
    subscription=os.getenv("AZURE_TTS_KEY"),
    region=config.get_setting("azure_region")
)

<h2> Documentation - Pronuncition AssessmentConfig </h2>

https://learn.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.pronunciationassessmentconfig?view=azure-python&source=docs

<h2>Pronounciation Assement Tests</h2>

In [None]:
pronunciation_config = speechsdk.PronunciationAssessmentConfig(
    reference_text="", 
    grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, # scale of 0-100
    granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme, # evaluates at phoneme level (more specific than sylables or words)
    enable_miscue=False)
pronunciation_config.enable_prosody_assessment()

In [None]:
# Set your reference text and audio file path
reference_text = "White attended South High School in Denver, Co."
audio_file_path = "../data/raw/audio_test.wav"  # Update with your actual file

# Update the pronunciation config with the reference text
pronunciation_config.reference_text = reference_text

# Create audio config and recognizer
audio_config = speechsdk.AudioConfig(filename=audio_file_path)
recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

# Apply pronunciation assessment config
pronunciation_config.apply_to(recognizer)

# Run recognition and get result
result = recognizer.recognize_once_async().get()

# Print the pronunciation assessment result
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
    pron_result = speechsdk.PronunciationAssessmentResult(result)
    print("Pronunciation Score:", pron_result.pronunciation_score)
    print("Accuracy Score:", pron_result.accuracy_score)
    print("Fluency Score:", pron_result.fluency_score)
    print("Completeness Score:", pron_result.completeness_score)
    print("Prosody Score:", pron_result.prosody_score)
else:
    print("Speech not recognized or error occurred:", result.reason)

Pronunciation Score: 86.4
Accuracy Score: 88.0
Fluency Score: 91.0
Completeness Score: 88.0
Prosody Score: 82.5
