In [5]:
from langchain_anthropic import ChatAnthropic
import phonemizer
from transformers import pipeline
import Levenshtein
from langchain_core.pydantic_v1 import BaseModel, Field



  from .autonotebook import tqdm as notebook_tqdm


In [7]:
ipa = "ɡ ʊ d ɑ f t ɚ n u n"
target = "ɡʊd aftənun"

# remove n spaces from the ipa to try to match the target
ipa_no_spaces = ipa.replace(" ", "")
target_no_spaces = target.replace(" ", "")
ops = Levenshtein.editops(ipa_no_spaces, target)
non_matching = Levenshtein.opcodes(ops, ipa, target)

print(ops)

[('insert', 3, 3), ('replace', 3, 4), ('replace', 6, 7)]


In [175]:
word = "How are you?"
ipa = phonemizer.phonemize(word, language="en-us", backend="espeak")
human = "h aʊ ɑːɹ j uː"

ipa, human

('haʊ ɑːɹ juː ', 'h aʊ ɑːɹ j uː')

In [176]:
ipa = ipa.replace("ː", "").replace("ˈ", "").replace("ˌ", "").removeprefix(" ").removesuffix(" ")
human = human.replace("ː", "").replace("ˈ", "").replace("ˌ", "").removeprefix(" ").removesuffix(" ")

human, ipa

('h aʊ ɑɹ j u', 'haʊ ɑɹ ju')

In [177]:
ratio = Levenshtein.ratio(human, ipa)
ratio

0.9

In [178]:
ops = Levenshtein.editops(human, ipa)
ops

[('delete', 1, 1), ('delete', 9, 8)]

In [179]:
matching = Levenshtein.matching_blocks(ops, human, ipa)
matching

[MatchingBlock(a=0, b=0, size=1),
 MatchingBlock(a=2, b=1, size=7),
 MatchingBlock(a=10, b=8, size=1),
 MatchingBlock(a=11, b=9, size=0)]

In [180]:
non_matching = Levenshtein.opcodes(ops, human, ipa)
non_matching
    

[('equal', 0, 1, 0, 1),
 ('delete', 1, 2, 1, 1),
 ('equal', 2, 9, 1, 8),
 ('delete', 9, 10, 8, 8),
 ('equal', 10, 11, 8, 9)]

In [181]:
# put the actual character in the string
contents = []
for tag, i1, i2, j1, j2 in non_matching:
    print(tag, human[i1:i2], ipa[j1:j2])
    if tag == 'equal':
        contents.append(('equal',human[i1:i2]))
    elif tag == 'replace':
        contents.append(('replace',human[i1:i2], ipa[j1:j2]))
    elif tag == 'delete':
        contents.append(('delete',human[i1:i2]))
    elif tag == 'insert':
        contents.append(('insert',ipa[j1:j2]))
contents

equal h h
delete   
equal aʊ ɑɹ j aʊ ɑɹ j
delete   
equal u u


[('equal', 'h'),
 ('delete', ' '),
 ('equal', 'aʊ ɑɹ j'),
 ('delete', ' '),
 ('equal', 'u')]

In [182]:
llm = ChatAnthropic(
    model="claude-3-5-sonnet-20240620",
)

In [183]:
from typing import List, Dict, Any

class Suggestions(BaseModel):
    word: str = Field(
        "",
        description="The segment of the text",
    )
    suggestions: List[str] = Field(
        [],
        description="List of suggestions for the word",
    )
    importance: int = Field(
        0,
        description="The importance of the suggestion from 1 to 100",
    )


class Feedback(BaseModel):
    general_feedback: List[str] = Field(
        "",
        description="Overall feedback on the text",
    )
    suggestions: List[Suggestions] = Field(
        [],
        description="Suggestions for each segment of the text",
    )


In [184]:
contents_str = "The key differences in the string are highlighted below: "
for tag, *args in contents:
    if tag == 'equal':
        contents_str += f" Same({args[0]}), "
    elif tag == 'replace':
        contents_str += f" Replace({args[0]} with {args[1]}), "
    elif tag == 'delete':
        contents_str += f" Delete({args[0]}), "
    elif tag == 'insert':
        contents_str += f" Insert({args[0]}), "
contents_str = contents_str.removesuffix(", ")
contents_str    

'The key differences in the string are highlighted below:  Same(h),  Delete( ),  Same(aʊ ɑɹ j),  Delete( ),  Same(u)'

In [186]:
prompt = f"""
Given the following ipa transcription that has been generated from the audio file: {human},
The user is trying to say the word: {word} and in ipa transcription, it is: {ipa}.
The provided IPA transcription is modified to include spaces between each character.
The similarity between the ipa transcription and the phonemes is: {ratio},
The user's native language is: English, and the user's age is: 12,,
The target language is: English, and the user's efficiency level is: Beginner.

See below the differences between the ipa transcription and the phonemes:
{contents_str}
"""
structured_feedback = llm.with_structured_output(Feedback)
structured_feedback.invoke(prompt)

Feedback(general_feedback=["Your pronunciation of 'How are you?' is very good! You're really close to perfect pronunciation. There are just a couple of small things we can work on to make it even better."], suggestions=[Suggestions(word='How are you?', suggestions=['Try to say the phrase as one smooth unit, without pauses between words.', "Pay attention to the ending 'u' sound in 'you'. Make sure it's clear and not too short."], importance=80), Suggestions(word='How', suggestions=["Your pronunciation of 'How' is excellent! Keep up the good work."], importance=60), Suggestions(word='are', suggestions=["Your pronunciation of 'are' is very good. Make sure to maintain the 'r' sound clearly."], importance=70), Suggestions(word='you', suggestions=["Try to make the 'u' sound at the end of 'you' a bit longer and clearer."], importance=75)])