In [1]:
%load_ext autoreload
%autoreload 2
%load_ext jupyter_black

In [None]:
import azure_recognizer as ar
import azure.cognitiveservices.speech as sdk
from pathlib import Path
import random
import pandas as pd
import time

## Collect File Paths

In [5]:
l60 = Path("data/lesson_60")
l61 = Path("data/lesson_61")
j_pairs = []
for path in list(l60.rglob("*.wav")) + list(l61.rglob("*.wav")):
    transcription = path.stem.split(" ")[0]
    if "incorrect" in path.stem:
        continue
    if len(transcription) == 1:
        # Skip single phoneme
        continue
    if "-" in transcription:
        # Skip multi-word transcriptions
        continue
    j_pairs.append((path, transcription))

## Match Correct and Incorrect

In [None]:
random.seed(3)

correct = pd.DataFrame(j_pairs, columns=["wav", "text"])
correct["correct"] = True

incorrect = correct.copy()
incorrect["correct"] = False
incorrect["text"] = incorrect["text"].apply(
    lambda x: random.choice([w for w in correct["text"] if w != x])
)

input = pd.concat([correct, incorrect], ignore_index=True)
input[["accuracy", "completeness", "fluency", "pronunciation", "prosody"]] = None

## Write to CSV and Fill CSV with Azure Results
Caching to CSV is helpful necessary because Azure will rate limit and error after too many requests, this allows saving progress throughout

In [20]:
FILE_NAME = "classifier_data.csv"

The following cell will overwrite existing data, uncomment to fully reproduce results

In [23]:
# input.to_csv(FILE_NAME, index=False)

In [None]:
data = pd.read_csv(FILE_NAME)
while data["accuracy"].isna().sum() > 0:
    idx = data["accuracy"].isna().idxmax()
    row = data.loc[idx]
    print(f"Processing {row['wav']} | {row['text']}")
    res = ar.recognize_from_wav(str(row["wav"]), row["text"])

    if res.error_json:
        print(f"Error processing {row['wav']} | {res.error_json}")
        time.sleep(2)
        continue

    pa_result = sdk.PronunciationAssessmentResult(res)
    data.loc[idx, "accuracy"] = pa_result.accuracy_score
    data.loc[idx, "completeness"] = pa_result.completeness_score
    data.loc[idx, "fluency"] = pa_result.fluency_score
    data.loc[idx, "pronunciation"] = pa_result.pronunciation_score
    data.loc[idx, "prosody"] = pa_result.prosody_score
    data.to_csv(FILE_NAME, index=False)