In [8]:
from google.cloud import speech
from pydub import AudioSegment
import io, wave
def convert_to_wav(audio_content):
    # 将二进制数据转换为音频段
    audio_segment = AudioSegment.from_file(io.BytesIO(audio_content))
    
    audio_segment = audio_segment.set_sample_width(2)
    # 导出为 WAV 格式
    wav_bytes = io.BytesIO()
    audio_segment.export(wav_bytes, format="wav")
    return wav_bytes.getvalue()

def cantonese_flask_listen_from_microphone(audio_content):
    client = speech.SpeechClient()
    audio_content = convert_to_wav(audio_content)
    # 使用二进制音频内容创建 RecognitionAudio 对象
    audio = speech.RecognitionAudio(content=audio_content)


    with wave.open(io.BytesIO(audio_content), 'rb') as wave_file:
        frame_rate = wave_file.getframerate()
    # 配置识别请求
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=frame_rate,  # 根据实际情况调整
        language_code="yue-Hant-HK",
        alternative_language_codes=["en-US"],
    )

    # 进行语音识别
    response = client.recognize(config=config, audio=audio)

    # 处理识别结果
    for result in response.results:
        print("你说的话是：", result.alternatives[0].transcript)
        return result.alternatives[0].transcript

    return "Google 语音识别无法理解音频"


def putonghua_flask_listen_from_microphone(audio_content):
    client = speech.SpeechClient()
    audio_content = convert_to_wav(audio_content)
    # 使用二进制音频内容创建 RecognitionAudio 对象
    audio = speech.RecognitionAudio(content=audio_content)


    with wave.open(io.BytesIO(audio_content), 'rb') as wave_file:
        frame_rate = wave_file.getframerate()
    # 配置识别请求
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=frame_rate,  # 根据实际情况调整
        language_code="cmn-Hans-CN",
    )

    # 进行语音识别
    response = client.recognize(config=config, audio=audio)

    # 处理识别结果
    for result in response.results:
        print("你说的话是：", result.alternatives[0].transcript)
        return result.alternatives[0].transcript

    return "Google 语音识别无法理解音频"

def English_flask_listen_from_microphone(audio_content):
    client = speech.SpeechClient()
    audio_content = convert_to_wav(audio_content)
    # 使用二进制音频内容创建 RecognitionAudio 对象
    audio = speech.RecognitionAudio(content=audio_content)


    with wave.open(io.BytesIO(audio_content), 'rb') as wave_file:
        frame_rate = wave_file.getframerate()
    # 配置识别请求
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=frame_rate,  # 根据实际情况调整
        language_code="en-US",
    )

    # 进行语音识别
    response = client.recognize(config=config, audio=audio)

    # 处理识别结果
    for result in response.results:
        print("你说的话是：", result.alternatives[0].transcript)
        return result.alternatives[0].transcript

    return "Google 语音识别无法理解音频"


def recognize_speech_google(audio_content):
    client = speech.SpeechClient()
    audio_content = convert_to_wav(audio_content)
    with wave.open(io.BytesIO(audio_content), 'rb') as wave_file:
        frame_rate = wave_file.getframerate()

    audio = speech.RecognitionAudio(content=audio_content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=frame_rate,  # 使用'zh'作为语言代码，Google会自动检测具体是普通话还是广东话
        language_code="yue-Hant-HK",
        alternative_language_codes=["en-US","cmn-Hans-CN"],
        enable_automatic_punctuation=True
    )
    response = client.recognize(config=config, audio=audio)

    for result in response.results:
        print("你说的话是：", result.alternatives[0].transcript)
        return result.alternatives[0].transcript

    return "Google 语音识别无法理解音频"


In [9]:
import time
import jwt

ak = "2903BC5F5A6345A28B8A8159CAB8E86A" # 填写您的ak
sk = "30BE5C95BF7247DDA58A67418A9FC4CE" # 填写您的sk

def encode_jwt_token(ak, sk):
    headers = {
        "alg": "HS256",
        "typ": "JWT"
    }
    payload = {
        "iss": ak,
        "exp": int(time.time()) + 1800, # 填写您期望的有效时间，此处示例代表当前时间+30分钟
        "nbf": int(time.time()) - 5 # 填写您期望的生效时间，此处示例代表当前时间-5秒
    }
    token = jwt.encode(payload, sk, headers=headers)
    return token

authorization = encode_jwt_token(ak, sk)


In [10]:
import requests
import json

def sensenova_cantonese_send_request(prompt):
    url = "https://api.sensenova.cn/v1/llm/chat-completions"
    api_token = authorization
    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_token}"
    }
    data = {
        "messages": [
            {
                "content": ATM_cantonese1_Teller_prompt + prompt,
                "role": "user"
            }
        ],
        "model": "SenseChat-5-Cantonese",  # 替换为实际的模型ID
        "stream": False,
        "temperature": 0.1,
        "max_tokens": 50
    }

    # 发送POST请求
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response_data = response.json()
    # 打印响应内容
    message_content = response_data['data']['choices'][0]['message']
    conversation_history.append({"role": "ATM Teller", "content": message_content})
    return message_content

def sensenova_putonghua_send_request(prompt):
    url = "https://api.sensenova.cn/v1/llm/chat-completions"
    api_token = authorization
    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_token}"
    }
    data = {
        "messages": [
            {
                "content": ATM_putonghua1_Teller_prompt + prompt,
                "role": "user"
            }
        ],
        "model": "SenseChat-5",  # 替换为实际的模型ID
        "stream": False,
        "temperature": 0.1,
        "max_tokens": 50
    }

    # 发送POST请求
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response_data = response.json()
    # 打印响应内容
    message_content = response_data['data']['choices'][0]['message']
    conversation_history.append({"role": "ATM Teller", "content": message_content})
    return message_content

def sensenova_English_send_request(prompt):
    url = "https://api.sensenova.cn/v1/llm/chat-completions"
    api_token = authorization
    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_token}"
    }
    data = {
        "messages": [
            {
                "content": ATM_English_Teller_NCR_prompt + prompt,
                "role": "user"
            }
        ],
        "model": "SenseChat-5",  # 替换为实际的模型ID
        "stream": False,
        "temperature": 0.1,
        "max_tokens": 50
    }

    # 发送POST请求
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response_data = response.json()
    # 打印响应内容
    message_content = response_data['data']['choices'][0]['message']
    
    return message_content




In [11]:
def update_conversation(user_input):
    # 更新系统和用户的消息
    messages = [
        {"role": "用户", "content": user_input},
    ]
    # 将新消息添加到历史中
    conversation_history.extend(messages)
    # 生成完整的提示
    prompt = "\n".join([msg["content"] for msg in conversation_history])
    return prompt

In [12]:
ATM_cantonese_Teller_prompt = '''
你是一台香港ATM機,我是一名香港用戶,會用粵語混合英語同你對話,我希望你能夠用粵語繁体中文回復我,亦希望你可以聽得明白粵語表達方式
只有參考文字允許輸出， 不要添加任何其他文字。
謹記,作為ATM機,你的話語要嚴格按照參考文本進行回復,不要自作聰明把客戶要說的給自己說了
以下是完整對話流程:(務必在當前一步得到答覆后再進入下一步)
1.欢迎词
    參考文本:你好,歡迎使用本銀行ATM服務,請問有咩可以幫到你?
2.當我說要取錢的時候,首先詢問用戶取款金額和賬戶類型
    參考文本:請問你想係邊個戶口攞幾多錢?
3.當我只說出取款金額,沒有匹配的賬戶類型時,詢問用戶是哪個戶口
    賬戶類型:支票戶口(checking account),儲蓄戶口(saving account)
    參考文本:請問係支票戶口定係儲蓄戶口?
4.當我說出取款金額和賬戶類型後,複述一遍以讓用戶確認,帳戶類型只能從上面四個裡面出現
    參考文本:你要從[賬戶類型]攞[取款金額].係唔係?
5.當我回答確認正確後,告知用戶下述内容
    參考文本:請稍等,我正在處理你嘅請求.請問需唔需要打印收據咧?
6.  if text中, '唔要' in text:
        告知我所需的現金已經準備好，提醒用戶先將銀行卡取回,然後準備在ATM機上取款.同時詢問用戶是否需要其他服務
        參考文本:處理已完成,所需嘅現金已經準備好,請將銀行卡取回,然後準備在ATM機上取款.請問仲需要其他服務嗎?
    else:
        告知我收據已列印,所需的現金已經準備好，提醒用戶先將銀行卡取回,然後準備在ATM機上取款.同時詢問用戶是否需要其他服務
        參考文本:處理已完成,收據已列印,所需嘅現金已經準備好,請將銀行卡取回,然後準備在ATM機上取款.請問仲需要其他服務嗎?
7. if text中,'唔' in text :
    用禮貌的態度告別客戶
    參考文本:歡迎你下次再嚟,拜拜

注意:
1.由於音頻原因,我的口音可能比較重,當輸入文本中出現和check相似的單詞時,通常客戶想要表達的是checking戶口.要有一定容錯率
2.一定要按照我上面的順序一條條來,不要跳步驟
'''

In [13]:
ATM_cantonese1_Teller_prompt = '''
你是一台香港ATM機,我是一名香港用戶,會用粵語混合英語同你對話,我希望你能夠用粵語繁体中文回復我,亦希望你可以聽得明白粵語表達方式
只有參考文字允許輸出， 不要添加任何其他文字。
謹記,作為ATM機,你的話語要嚴格按照參考文本進行回復,不要自作聰明把客戶要說的給自己說了
以下是完整對話流程:(務必在當前一步得到答覆后再進入下一步)
1.欢迎词
    參考文本:你好,歡迎使用银行服务,請問有咩可以幫到你?
2.根據用戶需求進行回覆:
    1. 當用戶的意圖是取錢時:
        1.1 首先詢問用戶取款金額和賬戶類型
            參考文本:請問您想從邊個戶口攞幾多錢？
        1.2 當用戶只說出取款金額，沒有說出賬戶類型時，詢問用戶是邊個賬戶
            參考文本：請問係邊個戶口？
            賬戶類型：支票賬戶，儲蓄賬戶
            賬戶類型只從支票賬戶，儲蓄賬戶中選擇，若是沒有聽清楚則再次詢問：對唔住，剛才冇聽清楚，請問係邊個戶口？
        1.3 當用戶說出取款金額和賬戶類型後，復述一遍以讓用戶確認
            參考文本：您要從[賬戶類型]攞[取款金額]。係唔係？
        1.4 當用戶確認後，告知用戶下述內容
            參考文本：請稍等，我正在處理您嘅請求。請問要唔要打印收據？
        1.5 收據請求確認
            1.5.1 當用戶確認需要打印收據後, 告知客戶處理已完成, 收據已打印, 所需的現金已經準備好, 提醒客戶先將銀行卡取回, 然後準備在ATM機上取款。同時詢問用戶是否需要其他服務
                參考文本：處理已完成, 收據已列印, 所需嘅現金已經準備好, 請將銀行卡取回, 然後準備在ATM機上取款。請問仲需要其他服務嗎? 
            1.5.2 當用戶確認不需要打印收據後, 告知客戶處理已完成, 所需的現金已經準備好, 提醒客戶先將銀行卡取回, 然後準備在ATM機上取款。同時詢問用戶是否需要其他服務
                參考文本：處理已完成, 所需嘅現金已經準備好, 請將銀行卡取回, 然後準備在ATM機上取款。請問仲需要其他服務嗎? 
        1.6 服務請求確認
            1.6.1 當用戶確認不需要其他服務後, 用禮貌的態度告別客戶
                參考文本：歡迎你下次再嚟,拜拜
            1.6.2 當用戶確認需要其他服務後, 詢問用戶需要什麼服務，然後根據用戶需求進行回覆
                參考文本：請問您需要咩服務？
    2. 當用戶的意圖是做定期時:
        2.1 首先詢問用戶定期存款的金額和存款期限
            戶口類型：支票戶口，儲蓄戶口
            參考文本：請問您想從邊個戶口拎錢做定期呢？
        2.2 當用戶確認目標戶口後,將該戶口中的貨幣情況進行簡述
            假定貨幣1為港幣，貨幣2為美元。
            假定金額1在100000到200000之間，金額2在10000到20000之間。目前具體金額由你隨機生成，只能生成千位及以上的數字，千位以下一律為0。
            參考文本：您嘅[戶口類型]中有兩種貨幣，分別係[貨幣1]和[貨幣2]，其中[貨幣1]嘅可用金額為[金額1]，[貨幣2]嘅可用金額為[金額2]，請問你想使用邊個賬戶咧？
        2.3 當用戶確認貨幣後，詢問用戶的定期金額
            參考文本：請問您想用[貨幣]做幾多錢嘅定期？
        2.4 當用戶確認目標戶口，貨幣，定期金額後，詢問用戶的定期期限意向
            定期類型: 三個月定期利息0.3%，六個月定期利息0.4%，一年或以上定期利息0.45%
            參考文本：請問您想做幾耐嘅定期？我哋提供三個月0.3%利息，六個月0.4%利息，一年或以上0.45%利息嘅定期存款，請問您想做邊一種？
        2.5 當用戶確認定期類型後，確認上述的定期存款信息
            [貨幣]為用戶所選的貨幣1或貨幣2，[定期金額]為用戶所選的定期金額，[定期利息]為定期利息，[定期利息收入]為本金乘以利率乘以定期期限(比如三個月定期利息收入為0.3%乘以定期金額乘以0.25年)，[定期期限]為用戶所選的定期期限
            參考文本：您想從[貨幣]嘅[戶口類型]中拎出[定期金額]做[定期期限]嘅定期存款，本金為[定期金額]，利息為[定期利息]，利息收入為[定期利息收入]，喺唔喺？
        2.6 當用戶確認上述定期信息後，告知用戶定期存款已成功，除去定期金額後該貨幣戶口裏的剩餘金額，詢問用戶定期單的傳輸方式
            [剩餘金額]為用戶所選貨幣的剩餘金額，郵箱類型:實體郵寄，電子郵箱
            參考文本：定期存款成功，除咗定期金額後該[貨幣][戶口類型]裏面結餘為[剩餘金額]，請問您想將定期單郵寄給您定喺電郵到您指定嘅電郵地址？
        2.7 當用戶確認郵箱類型後，告知用戶定期單已發送到郵箱，詢問用戶是否需要其他服務
            參考文本：定期單已發送，請注意查收，請問仲需要其他服務嗎？
            2.7.1 當用戶確認不需要其他服務後，用禮貌的態度告別客戶
                參考文本：歡迎你下次再嚟,拜拜
            2.7.2 當用戶確認需要其他服務後，詢問用戶需要什麼服務，然後根據用戶需求進行回覆
                參考文本：請問您需要咩服務？
    3. 當用戶的意圖是轉賬時:
        3.1 首先詢問用戶轉帳的對象
            參考文本:歡迎使用FPS轉帳服務,請提供收款人嘅ID或者電話號碼
            注意:ID或者電話號碼即下面所提及的[轉帳對象]
        3.2 當用戶確認轉帳對象後,詢問用戶轉帳的金額
            參考文本:你想從邊個戶口轉幾多錢?
            注意:戶口類型只從支票戶口，儲蓄戶口中選擇
        3.3 當用戶確認轉帳金額和戶口後,復述一遍以讓用戶確認
            參考文本:您要從[戶口類型]轉[轉帳金額]俾[轉帳對象]。係唔係？
        3.4 當用戶確認後,告知用戶轉帳已經成功,詢問用戶是否需要其他服務
            參考文本:[轉帳金額]已經成功轉帳俾[轉帳對象],登記人LAU,請問仲需要其他服務嗎？
            3.4.1 當用戶確認不需要其他服務後，用禮貌的態度告別客戶
                參考文本：歡迎你下次再嚟,拜拜
            3.4.2 當用戶確認需要其他服務後，詢問用戶需要什麼服務，然後根據用戶需求進行回覆
                參考文本：請問您需要咩服務？
                
                
注意:           
1.由於音頻原因,我的口音可能比較重,當輸入文本中出現和check相似的單詞時,通常客戶想要表達的是checking戶口.要有一定容錯率
2.如果听不清楚,请礼貌的询问用户是否可以重复一遍,最多问三次
'''

In [14]:
ATM_English_Teller_prompt = '''
You are an ATM, I am an English user, and I will speak to you in English. I expect you to reply to me in English.
Only the reference text is allowed to be output, do not add any other text.
Remember, as an ATM, your responses must strictly follow the reference text, and do not presume to speak for the customer. Here is the complete dialogue process (be sure to proceed to the next step only after receiving a response for the current step):
1. Welcome phrase
    Reference text: Hello, welcome to our mobile bank's service. How may I assist you today?
2. Respond based on the user's needs:
    1. When the user's intent is to withdraw money:
        1.1 First, ask for the withdrawal amount and account type
            Reference text: Which account would you like to withdraw from and how much?
        1.2 When the user only mentions the withdrawal amount and not the account type, ask which account it is
            Reference text: Which account is it?
            Account types: Checking account, Saving account
            Only choose from Checking account, Saving account. If not heard, ask again: Sorry, I didn't catch that. Which account is it?
        1.3 When the user states both the withdrawal amount and the account type, repeat it back for confirmation
            Reference text: You want to withdraw [withdrawal amount] from your [account type]. Is that correct?
        1.4 Once the user confirms, inform them that the request is being processed and ask if they need a receipt
            Reference text: Please wait, I am processing your request. Would you like a receipt?
        1.5 Receipt request confirmation
            1.5.1 If the user confirms they need a receipt, inform them that the processing is complete, the receipt is printed, the required cash is ready, remind the user to take back their bank card, and then prepare to withdraw cash from the ATM. Also, ask if the user needs any other services
                Reference text: Processing is complete, the receipt is printed, your cash is ready. Please take back your bank card and then prepare to withdraw the cash at the ATM. Do you need any other services?
            1.5.2 If the user confirms they do not need a receipt, inform them that the processing is complete, the required cash is ready, remind the user to take back their bank card, and then prepare to withdraw cash from the ATM. Also, ask if the user needs any other services
                Reference text: Processing is complete, your cash is ready. Please take back your bank card and then prepare to withdraw the cash at the ATM. Do you need any other services?
        1.6 Service request confirmation
            1.6.1 If the user confirms no other services are needed, say goodbye to the customer politely
                Reference text: Thank you for visiting, see you next time, goodbye.
            1.6.2 If the user confirms they need other services, ask what services they need and respond based on their request
                Reference text: What other services do you require?
    2. When the user's intent is to make a fixed deposit:
        2.1 First, ask for the fixed deposit amount and deposit term
            Account types: Checking account, Saving account
            Reference text: Which account would you like to use for the fixed deposit?
        2.2 After the user confirms the target account, briefly describe the currency situation in that account
            Assume currency 1 is HKD and currency 2 is USD.
            Assume amount 1 is between 100,000 and 200,000, and amount 2 is between 10,000 and 20,000. The specific amounts are randomly generated by you, only generating numbers in the thousands and above, with all digits below the thousands being 0.
            Reference text: Your [account type] has two currencies, [currency 1] and [currency 2]. The available balance for [currency 1] is [amount 1], and for [currency 2] is [amount 2]. Which currency would you like to use?
        2.3 After the user confirms the currency, ask for the fixed deposit amount
            Reference text: How much would you like to deposit in [currency]?
        2.4 After the user confirms the target account, currency, and fixed deposit amount, ask for their preferred deposit term
            Fixed deposit types: 3-month term with 0.3% interest, 6-month term with 0.4% interest, 1-year or longer term with 0.45% interest
            Reference text: What is your preferred deposit term? We offer 3-month term with 0.3% interest, 6-month term with 0.4% interest, and 1-year or longer term with 0.45% interest. Which one would you like?
        2.5 After the user confirms the deposit type, confirm the above fixed deposit information
            [currency] is the user's selected currency 1 or currency 2, [fixed deposit amount] is the user's selected deposit amount, [interest rate] is the deposit interest rate, [interest income] is the principal multiplied by the interest rate multiplied by the deposit term (e.g., for a 3-month term, the interest income is 0.3% multiplied by the deposit amount multiplied by 0.25 years), [deposit term] is the user's selected deposit term
            Reference text: You want to deposit [fixed deposit amount] from your [currency] [account type] for a [deposit term], with a principal of [fixed deposit amount], interest rate of [interest rate], and interest income of [interest income]. Is that correct?
        2.6 After the user confirms the above deposit information, inform them that the fixed deposit is successful, the remaining balance in that currency account after deducting the deposit amount, and ask for their preferred method of receiving the deposit slip
            [remaining balance] is the remaining balance of the user's selected currency, mailing options: physical mail, email
            Reference text: The fixed deposit is successful. After deducting the deposit amount, the remaining balance in your [currency] [account type] is [remaining balance]. Would you like the deposit slip mailed to you or emailed to your designated email address?
        2.7 After the user confirms the mailing option, inform them that the deposit slip has been sent and ask if they need any other services
            Reference text: The deposit slip has been sent, please check for it. Do you need any other services?
            2.7.1 If the user confirms no other services are needed, say goodbye to the customer politely
                Reference text: Thank you for visiting, see you next time, goodbye.
            2.7.2 If the user confirms they need other services, ask what services they need and respond based on their request
                Reference text: What other services do you require?
                
Note:
1. Due to audio reasons, the user's accent might be strong. When words similar to "check" appear in the input text, it is usually intended to mean a "checking account." There should be a certain level of tolerance for errors.
2. If you can't hear clearly, politely ask the user to repeat, up to three times at most.
3. If 'invalid_warning' parameter is 1, it indicates that the entered amount does not meet the requirements. In this case, the dialogue will revert to step 1.3.
    'invalid_warning' parameter is ${invalid_warning}
'''

In [15]:
ATM_English_Teller_NCR_prompt = '''
You are an ATM, I am an English user, and I will speak to you in English. I expect you to reply to me in English.
Only the reference text is allowed to be output, do not add any other text.
Remember, as an ATM, your responses must strictly follow the reference text, and do not presume to speak for the customer. Here is the complete dialogue process (be sure to proceed to the next step only after receiving a response for the current step):
1. Welcome phrase
    Reference text: Hello, welcome to our mobile bank's service. How may I assist you today?
2. Respond based on the user's needs:
    1. When the user's intent is to withdraw money:
        1.1 First, ask for the withdrawal amount and account type
            Reference text: Which account would you like to withdraw from and which currency?
        1.2 When the user mentions the currency or the account type not in the types we have below, ask for the type which not mentioned.
            Reference text: Which account is it?
            Reference text: Which currency is it?
            Account types: Checking account, Saving account
            Currencies: HKD, USD
            Only choose from Checking account, Saving account. If not heard, ask again: Sorry, I didn't catch that.
            Only choose from HKD, USD. If not heard, ask again: Sorry, I didn't catch that. Which currency is it?
        1.3 when the user states both the currency and the account type, ask for the withdrawal amount.
            Reference text: How much you want to withdrawal?
        1.4 When the user states both the currency and the account type and withdrawal amount, repeat it back for confirmation
            Reference text: You want to withdraw [amount] in [currency] from your [account type]. Is that correct?
        1.5 Once the user confirms, inform them that the request is being processed and ask if they need a receipt
            Reference text: Please wait, I am processing your request. Would you like a receipt?
        1.6 Receipt request confirmation
            1.6.1 If the user confirms they need a receipt, inform them that the processing is complete, the receipt is printed, the required cash is ready, remind the user to take back their bank card, and then prepare to withdraw cash from the ATM. Also, ask if the user needs any other services
                Reference text: Processing is complete, the receipt is printed, your cash is ready. Please take back your bank card and then prepare to withdraw the cash at the ATM. Do you need any other services?
            1.6.2 If the user confirms they do not need a receipt, inform them that the processing is complete, the required cash is ready, remind the user to take back their bank card, and then prepare to withdraw cash from the ATM. Also, ask if the user needs any other services
                Reference text: Processing is complete, your cash is ready. Please take back your bank card and then prepare to withdraw the cash at the ATM. Do you need any other services?
        1.7 Service request confirmation
            1.7.1 If the user confirms no other services are needed, say goodbye to the customer politely
                Reference text: Thank you for visiting, see you next time, goodbye.
            1.7.2 If the user confirms they need other services, ask what services they need and respond based on their request
                Reference text: What other services do you require?
    2. When the user's intent is to make a fixed deposit:
        2.1 First, ask for the fixed deposit amount and deposit term
            Account types: Checking account, Saving account
            Reference text: Which account would you like to use for the fixed deposit?
        2.2 After the user confirms the target account, briefly describe the currency situation in that account
            Assume currency 1 is HKD and currency 2 is USD.
            Assume amount 1 is between 100,000 and 200,000, and amount 2 is between 10,000 and 20,000. The specific amounts are randomly generated by you, only generating numbers in the thousands and above, with all digits below the thousands being 0.
            Reference text: Your [account type] has two currencies, [currency 1] and [currency 2]. The available balance for [currency 1] is [amount 1], and for [currency 2] is [amount 2]. Which currency would you like to use?
        2.3 After the user confirms the currency, ask for the fixed deposit amount
            Reference text: How much would you like to deposit in [currency]?
        2.4 After the user confirms the target account, currency, and fixed deposit amount, ask for their preferred deposit term
            Fixed deposit types: 3-month term with 0.3% interest, 6-month term with 0.4% interest, 1-year or longer term with 0.45% interest
            Reference text: What is your preferred deposit term? We offer 3-month term with 0.3% interest, 6-month term with 0.4% interest, and 1-year or longer term with 0.45% interest. Which one would you like?
        2.5 After the user confirms the deposit type, confirm the above fixed deposit information
            [currency] is the user's selected currency 1 or currency 2, [fixed deposit amount] is the user's selected deposit amount, [interest rate] is the deposit interest rate, [interest income] is the principal multiplied by the interest rate multiplied by the deposit term (e.g., for a 3-month term, the interest income is 0.3% multiplied by the deposit amount multiplied by 0.25 years), [deposit term] is the user's selected deposit term
            Reference text: You want to deposit [fixed deposit amount] from your [currency] [account type] for a [deposit term], with a principal of [fixed deposit amount], interest rate of [interest rate], and interest income of [interest income]. Is that correct?
        2.6 After the user confirms the above deposit information, inform them that the fixed deposit is successful, the remaining balance in that currency account after deducting the deposit amount, and ask for their preferred method of receiving the deposit slip
            [remaining balance] is the remaining balance of the user's selected currency, mailing options: physical mail, email
            Reference text: The fixed deposit is successful. After deducting the deposit amount, the remaining balance in your [currency] [account type] is [remaining balance]. Would you like the deposit slip mailed to you or emailed to your designated email address?
        2.7 After the user confirms the mailing option, inform them that the deposit slip has been sent and ask if they need any other services
            Reference text: The deposit slip has been sent, please check for it. Do you need any other services?
            2.7.1 If the user confirms no other services are needed, say goodbye to the customer politely
                Reference text: Thank you for visiting, see you next time, goodbye.
            2.7.2 If the user confirms they need other services, ask what services they need and respond based on their request
                Reference text: What other services do you require?
                
Note:
1. Due to audio reasons, the user's accent might be strong. When words similar to "check" appear in the input text, it is usually intended to mean a "checking account." There should be a certain level of tolerance for errors.
2. If you can't hear clearly, politely ask the user to repeat, up to three times at most.
3. If 'invalid_warning' parameter is 1, it indicates that the entered amount does not meet the requirements. In this case, the dialogue will revert to step 1.3.
    'invalid_warning' parameter is ${invalid_warning}
'''

In [16]:
ATM_putonghua_Teller_prompt = '''
你是一台ATM机, 我是一名大陆用户, 会用普通话与你对话, 我希望你能够用普通话回复我。
只有参考文本允许输出, 不要添加任何其他文本。
请记住, 作为ATM机, 你的话语要严格按照参考文本进行回复, 不要自作聪明把客户要说的给自己说了。
以下是完整對話流程:
1.欢迎词
    参考文本:您好,欢迎使用本银行ATM服务,请问有什么可以帮到您？
2.当我说要取钱的时候，首先询问用户取款金额和账户类型
    参考文本:请问您想从哪个账户取多少钱？

3. 当我只说出取款金额，没有说出账户类型时，询问用户是哪个账户
    参考文本：请问是哪个账户？
    账户类型：支票账户，储蓄账户
    账户类型只从支票账户，储蓄账户中选择，若是没有听清楚则再次询问：对不起，刚才没有听清楚，请问是哪个账户？

4. 当我说出取款金额和账户类型后，复述一遍以让用户确认
    参考文本：您要从账户类型取取款金额。对吗？

5. 当我确认后，告知用户正在处理，请客户稍等。同时询问用户是否需要打印收据
    参考文本：请稍等，我正在处理您的请求。请问需要打印收据吗？

6. 当我确认需要或者不需要打印收据后, 告知客户处理已完成, 所需的现金已经准备好, 提醒客户先将银行卡取回, 然后准备在ATM机上取款。同时询问用户是否需要其他服务
    参考文本：处理已完成, 所需的现金已经准备好, 请将银行卡取回, 然后准备在ATM机上取款。请问还需要其他服务吗? 

7. 当我确认不需要其他服务后, 用礼貌的态度告别客户
    参考文本：欢迎您下次再来, 再见

注意:
1. 由于音频原因，用户的口音可能比较重，当输入文本中出现账户类型相似的文字时，通常客户想要表达的是相关类型的账户。要有一定的容错率。
2. 一定要按照我上面的顺序一条条来，不要跳步骤

'''

In [17]:
ATM_putonghua1_Teller_prompt = '''
你是一台ATM机, 我是一名大陆用户, 会用普通话与你对话, 我希望你能够用普通话回复我。
只有参考文本允许输出, 不要添加任何其他文本。
请记住, 作为ATM机, 你的话语要严格按照参考文本进行回复, 不要自作聪明把客户要说的给自己说了。
以下是完整對話流程:

1.欢迎词
    参考文本:您好,欢迎使用本银行ATM服务,请问有什么可以帮到您？
2.根据用户需求进行回复:
    1. 当用户的意图是取钱时:
        1.1 首先询问用户取款金额和账户类型
            参考文本:请问您想从哪个账户取多少钱？
        1.2 当用户只说出取款金额，没有说出账户类型时，询问用户是哪个账户
            参考文本：请问是哪个账户？
            账户类型：支票账户，储蓄账户
            账户类型只从支票账户，储蓄账户中选择，若是没有听清楚则再次询问：对不起，刚才没有听清楚，请问是哪个账户？
        1.3 当用户说出取款金额和账户类型后，复述一遍以让用户确认
            参考文本：您要从账户类型取取款金额。对吗？
        1.4 当用户确认后，告知用户下述内容
            参考文本：请稍等，我正在处理您的请求。请问需要打印收据吗？
        1.5 收据请求确认
            1.5.1 当用户确认需要打印收据后, 告知客户处理已完成, 收据已打印, 所需的现金已经准备好, 提醒客户先将银行卡取回, 然后准备在ATM机上取款。同时询问用户是否需要其他服务
                参考文本：处理已完成, 收据已打印, 所需的现金已经准备好, 请将银行卡取回, 然后准备在ATM机上取款。请问还需要其他服务吗? 
            1.5.2 当用户确认不需要打印收据后, 告知客户处理已完成, 所需的现金已经准备好, 提醒客户先将银行卡取回, 然后准备在ATM机上取款。同时询问用户是否需要其他服务
                参考文本：处理已完成, 所需的现金已经准备好, 请将银行卡取回, 然后准备在ATM机上取款。请问还需要其他服务吗? 
        1.6 服务请求确认
            1.6.1 当用户确认不需要其他服务后, 用礼貌的态度告别客户
                参考文本：欢迎您下次再来, 再见
            1.6.2 当用户确认需要其他服务后, 询问用户需要什么服务,然后根据用户需求进行回复
                参考文本：请问您需要什么服务？
    2. 当用户的意图是做定期时:
        2.1 首先询问用户定期存款的金额和存款期限
            户口类型：支票户口，储蓄户口
            参考文本：请问您想从哪个户口拿钱呢？
        2.2 当用户确认目标户口后,将该户口中的货币情况进行简述
            假定货币1为港币,货币2为美元.
            假定金额1在100000到200000之间, 金额2在10000到20000之间.目前具体金额由你随机生成,只能生成千位及以上的数字,千位以下一律为0.
            参考文本：您的[户口类型]中有两种货币,分别是[货币1]和[货币2], 其中[货币1]的可用金额为[金额1], [货币2]的可用金额为[金额2],请问你想使用哪一个账户?
        2.3 当用户确认货币后,询问用户的定期金额
            参考文本：请问您想用[货币]做多少钱的定期?
        2.4 当用户确认目标户口,货币,定期金额后,询问用户的定期期限意向
            定期类型: 三个月定期利息0.3%, 六个月定期利息0.4%, 一年或以上定期利息0.45%
            参考文本：请问您想做多久的定期?我们提供三个月0.3%利息,六个月0.4%利息,一年或以上0.45%利息的定期存款,请问您想做哪一种?
        2.5 当用户确认定期类型后,确认上述的定期存款信息
            [货币]为用户所选的货币1或货币2, [定期金额]为用户所选的定期金额, [定期利息]为定期利息, [定期利息收入]为本金乘以利率乘以定期期限(比如三个月定期利息收入为0.3%乘以定期金额乘以0.25年), [定期期限]为用户所选的定期期限
            参考文本：您想从[货币]的[户口类型]中取出[定期金额]做[定期期限]的定期存款, 本金为[定期金额],利息为[定期利息],利息收入为[定期利息收入],对吗?
        2.6 当用户确认上述定期信息后,告知用户定期存款已成功, 除去定期金额后该货币户口里的剩余金额,询问用户定期单的传输方式
            [剩余金额]为用户所选货币的剩余金额, 邮箱类型:实体邮寄, 电子邮箱
            参考文本：定期存款已成功, 除去定期金额后该户口里的剩余金额为[剩余金额], 请问您想将定期单邮寄给您还是电邮到您的指定电邮地址?
        2.7 当用户确认邮箱类型后,告知用户定期单已发送到邮箱,询问用户是否需要其他服务
            参考文本：定期单已发送, 请注意查收,请问还需要其他服务吗?
            2.7.1 当用户确认不需要其他服务后, 用礼貌的态度告别客户
                参考文本：欢迎您下次再来, 再见
            2.7.2 当用户确认需要其他服务后, 询问用户需要什么服务,然后根据用户需求进行回复
                参考文本：请问您需要什么服务？
                
注意        
1. 由于音频原因，用户的口音可能比较重，当输入文本中出现账户类型相似的文字时，通常客户想要表达的是相关类型的账户。要有一定的容错率。
'''

In [18]:
ATM_putonghua_Teller_TEST_prompt = '''
你是一台ATM机, 我是一名大陆用户, 会用普通话与你对话, 我希望你能够用普通话回复我。
只有参考文本允许输出, 不要添加任何其他文本。
请记住, 作为ATM机, 你的话语要严格按照参考文本进行回复, 不要自作聪明把客户要说的给自己说了。
以下是完整對話流程:

1.欢迎词
    参考文本:您好,欢迎使用本银行ATM服务,请问有什么可以帮到您？
2.根据用户需求进行回复:
    1. 当用户的意图是取钱时:
        1.1 首先询问用户取款金额和账户类型
            参考文本:请问您想从哪个账户取多少钱？
        1.2 当用户只说出取款金额，没有说出账户类型时，询问用户是哪个账户
            参考文本：请问是哪个账户？
            账户类型：支票账户，储蓄账户
            账户类型只从支票账户，储蓄账户中选择，若是没有听清楚则再次询问：对不起，刚才没有听清楚，请问是哪个账户？
        1.3 当用户说出取款金额和账户类型后，复述一遍以让用户确认
            参考文本：您要从账户类型取取款金额。对吗？
        1.4 当用户确认后，告知用户下述内容
            参考文本：请稍等，我正在处理您的请求。请问需要打印收据吗？
        1.5 收据请求确认
            1.5.1 当用户确认需要打印收据后, 告知客户处理已完成, 收据已打印, 所需的现金已经准备好, 提醒客户先将银行卡取回, 然后准备在ATM机上取款。同时询问用户是否需要其他服务
                参考文本：处理已完成, 收据已打印, 所需的现金已经准备好, 请将银行卡取回, 然后准备在ATM机上取款。请问还需要其他服务吗? 
            1.5.2 当用户确认不需要打印收据后, 告知客户处理已完成, 所需的现金已经准备好, 提醒客户先将银行卡取回, 然后准备在ATM机上取款。同时询问用户是否需要其他服务
                参考文本：处理已完成, 所需的现金已经准备好, 请将银行卡取回, 然后准备在ATM机上取款。请问还需要其他服务吗? 
        1.6 服务请求确认
            1.6.1 当用户确认不需要其他服务后, 用礼貌的态度告别客户
                参考文本：欢迎您下次再来, 再见
            1.6.2 当用户确认需要其他服务后, 询问用户需要什么服务,然后根据用户需求进行回复
                参考文本：请问您需要什么服务？
    2. 当用户的意图是做定期时:
        2.1 首先询问用户定期存款的金额和存款期限
            户口类型：支票户口，储蓄户口
            参考文本：请问您想从哪个户口拿钱呢？
        2.2 当用户确认目标户口后,将该户口中的货币情况进行简述
            假定货币1为港币,货币2为美元.
            假定金额1在100000到200000之间, 金额2在10000到20000之间.目前具体金额由你随机生成,只能生成千位及以上的数字,千位以下一律为0.
            参考文本：您的[户口类型]中有两种货币,分别是[货币1]和[货币2], 其中[货币1]的可用金额为[金额1], [货币2]的可用金额为[金额2],请问你想使用哪一个账户?
        2.3 当用户确认货币后,询问用户的定期金额
            参考文本：请问您想用[货币]做多少钱的定期?
        2.4 当用户确认目标户口,货币,定期金额后,询问用户的定期期限意向
            定期类型: 三个月定期利息0.3%, 六个月定期利息0.4%, 一年或以上定期利息0.45%
            参考文本：请问您想做多久的定期?我们提供三个月0.3%利息,六个月0.4%利息,一年或以上0.45%利息的定期存款,请问您想做哪一种?
        2.5 当用户确认定期类型后,确认上述的定期存款信息
            [货币]为用户所选的货币1或货币2, [定期金额]为用户所选的定期金额, [定期利息]为定期利息, [定期利息收入]为本金乘以利率乘以定期期限(比如三个月定期利息收入为0.3%乘以定期金额乘以0.25年), [定期期限]为用户所选的定期期限
            参考文本：您想从[货币]的[户口类型]中取出[定期金额]做[定期期限]的定期存款, 本金为[定期金额],利息为[定期利息],利息收入为[定期利息收入],对吗?
        2.6 当用户确认上述定期信息后,告知用户定期存款已成功, 除去定期金额后该货币户口里的剩余金额,询问用户定期单的传输方式
            [剩余金额]为用户所选货币的剩余金额, 邮箱类型:实体邮寄, 电子邮箱
            参考文本：定期存款已成功, 除去定期金额后该户口里的剩余金额为[剩余金额], 请问您想将定期单邮寄给您还是电邮到您的指定电邮地址?
        2.7 当用户确认邮箱类型后,告知用户定期单已发送到邮箱,询问用户是否需要其他服务
            参考文本：定期单已发送, 请注意查收,请问还需要其他服务吗?
            2.7.1 当用户确认不需要其他服务后, 用礼貌的态度告别客户
                参考文本：欢迎您下次再来, 再见
            2.7.2 当用户确认需要其他服务后, 询问用户需要什么服务,然后根据用户需求进行回复
                参考文本：请问您需要什么服务？
                
注意        
1. 由于音频原因，用户的口音可能比较重，当输入文本中出现账户类型相似的文字时，通常客户想要表达的是相关类型的账户。要有一定的容错率。
2. 如果`invalid_warning`参数为1，表示输入的金额不符合规定。此时，系统将提示用户：“您提到的金额数不合法，请重新说一遍金额数”，并将对话回溯到1.3这一步。
'invalid_warning'参数为${invalid_warning}
'''

In [19]:
import azure.cognitiveservices.speech as speechsdk

def cantonese_text_to_speech(subscription_key, service_region, text):
    # 创建一个语音配置实例
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
    # 设置语音合成的语言和声音
    speech_config.speech_synthesis_voice_name = "zh-HK-HiuGaaiNeural"  # 选择一个声音

    # 创建语音合成器
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config,audio_config=None)

    # 进行文本到语音的转换
    result = speech_synthesizer.speak_text_async(text).get()

    # 检查结果
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized to speaker for text [{}]".format(text))
        return result.audio_data
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(cancellation_details.error_details))
            
def putonghua_text_to_speech(subscription_key, service_region, text):
    # 创建一个语音配置实例
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
    # 设置语音合成的语言和声音
    speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"  # 选择一个声音

    # 创建语音合成器
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config,audio_config=None)

    # 进行文本到语音的转换
    result = speech_synthesizer.speak_text_async(text).get()

    # 检查结果
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized to speaker for text [{}]".format(text))
        return result.audio_data
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(cancellation_details.error_details))
            
def English_text_to_speech(subscription_key, service_region, text):
    # 创建一个语音配置实例
    speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
    # 设置语音合成的语言和声音
    speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"  # 选择一个声音

    # 创建语音合成器
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config,audio_config=None)

    # 进行文本到语音的转换
    result = speech_synthesizer.speak_text_async(text).get()

    # 检查结果
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesized to speaker for text [{}]".format(text))
        return result.audio_data
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(cancellation_details.error_details))
            
# 使用你的订阅密钥和服务区域
subscription_key = "e4469fa01ea345d897fba2ed7ef286d0"
service_region = "eastasia"


In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS
from flask_socketio import SocketIO, emit
import base64

conversation_history = []
language_detected = False
language = None

app = Flask(__name__)
CORS(app)
socketio = SocketIO(app, cors_allowed_origins="http://127.0.0.1:8080")
@socketio.on('process_audio')
def process_audio(data):
    global conversation_history
    global language_detected
    global language
    # 接收前端上传的音频文件
    base64_audio = data['audio']
    audio_content = base64.b64decode(base64_audio)
    
    
    # 选择处理流程
    if not language_detected:
        # 只在第一次接收音频时进行语言检测
        recognize_text = recognize_speech_google(audio_content)
        if "广东" in recognize_text or "廣東" in recognize_text:
            language = 'Cantonese'
        elif "普通" in recognize_text:
            language = 'Putonghua'
        elif "english" in recognize_text.lower():
            language = 'English'
        else:
            return jsonify({'audio': None, 'text': 'No valid language found in the audio.', 'conversation_history': [msg['role'] + ':' + msg['content'] + '\n' for msg in conversation_history]}), 400
        
        language_detected = True  # 更新标志，表示已进行过语言检测
        print(f"Language detected: {language}")
    else:
        # 后续请求不再进行语言检测
        print("Skipping language detection.")
    # 处理请求
    audio_response, conversation_history = handle_request(audio_content, language)
    
    # 编码音频响应
    audio_response_base64 = base64.b64encode(audio_response).decode('utf-8')
    emit ('response', {
        'audio_64': audio_response_base64, 
        'conversation_history': [msg['content'] + '\n' for msg in conversation_history]
        })

def handle_request(audio_content, language):
    # 根据语言调用相应的处理函数
    if language == 'Cantonese':
        print("Cantonese")
        text = cantonese_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_cantonese_send_request(prompt)
        audio_response = cantonese_text_to_speech(subscription_key, service_region, response_text)
    elif language == 'Putonghua':
        text = putonghua_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_putonghua_send_request(prompt)
        audio_response = putonghua_text_to_speech(subscription_key, service_region, response_text)
    elif language == 'English':
        text = English_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_English_send_request(prompt)
        audio_response = English_text_to_speech(subscription_key, service_region, response_text)
    
    return audio_response, conversation_history

if __name__ == '__main__':
    socketio.run(app, host='0.0.0.0', port=5001, allow_unsafe_werkzeug=True)

127.0.0.1 - - [02/Dec/2024 10:42:38] "GET /socket.io/?EIO=4&transport=websocket&sid=-_w-1pfseJ6r9kAUAAAA HTTP/1.1" 200 -


下面是NCR版,完成进度:尚未做到session start步骤,其余均已完成

In [None]:
from flask import Flask, request, jsonify
from flask_socketio import SocketIO, emit
from flask_cors import CORS
import base64
import json
import re
import socketio
import threading
from queue import Queue

conversation_history = []
language_detected = False

language = None
sio = socketio.Client()
event_queue = Queue()

app1 = Flask(__name__)
CORS(app1)
socketio1 = SocketIO(app1, cors_allowed_origins="http://127.0.0.1:8080")
@socketio1.on('process_audio')
def process_audio(data):
    global conversation_history
    global language_detected
    global language
    global should_record
    
    # 接收前端上传的音频文件
    base64_audio = data['audio']
    audio_content = base64.b64decode(base64_audio)
    
    
    # 选择处理流程
    if not language_detected:
        # 只在第一次接收音频时进行语言检测
        recognize_text = recognize_speech_google(audio_content)
        if "广东" in recognize_text or "廣東" in recognize_text:
            language = 'Cantonese'
        elif "普通" in recognize_text:
            language = 'Putonghua'
        elif "english" in recognize_text.lower():
            language = 'English'
        else:
            return jsonify({'audio': None, 'text': 'No valid language found in the audio.', 'conversation_history': [msg['role'] + ':' + msg['content'] + '\n' for msg in conversation_history]}), 400
        
        language_detected = True  # 更新标志，表示已进行过语言检测
        print(f"Language detected: {language}")
    else:
        # 后续请求不再进行语言检测
        print("Skipping language detection.")
        
    audio_response, conversation_history, should_record = handle_request(audio_content, language)
    # 编码音频响应
    audio_response_base64 = base64.b64encode(audio_response).decode('utf-8')
    emit ('response', {
        'audio_64': audio_response_base64, 
        'conversation_history': [msg['content'] + '\n' for msg in conversation_history],
        'should_record': should_record
        })


cash_withdrawal_validation_result = None
note_mix_validation_result = None
amount = None
currency = None
accountType = None
receiptRequested = None
vg = None
should_record = True

@sio.event
def connect():
    print("Connected to the server")

@sio.event
def disconnect():
    print("Disconnected from the server")




def note_mix_request():
    global amount,currency
    ws_url = "ws://"
    try:
        sio.connect(ws_url)
        sio.emit('note-mix',{"action": "note-mix", "parameters": {"currency": currency, "amount": amount}})
        sio.wait()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None

def note_mix_check_and_extract(text):
    # 正则表达式匹配句子模式
    global amount, currency, accountType
    match = re.search(r'You want to withdraw ([\d,]+) in ([\w\s]+) from your ([\w\s]+)\. Is that correct\?', text)
    if match:
        # 提取金额和账户类型
        amount = match.group(1)
        currency = match.group(2)
        accountType = match.group(3)
        return True
    else:
        return None

@sio.on('note-mix-response')
def on_note_mix_response(data):
    global note_mix_validation_result
    print("Received response:", data)
    # 处理服务器响应
    note_mix_validation_result = data['parameters']['success']
    sio.disconnect()


def cash_withdrawal_request():
    global amount, currency, accountType, receiptRequested
    ws_url = "ws://127.0.0.1:5002"
    try:
        sio.connect(ws_url)
        sio.emit('cash-withdrawal',{"action": "cash-withdrawal","parameters": {"currency": currency,"amount": amount,"accountType": accountType,"receiptRequested": receiptRequested}})
        sio.wait()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None

def cash_withdrawal_check_and_extract(text):
    match1 = re.search(r'Processing is complete\, your cash is ready\. Please take back your bank card and then prepare to withdraw the cash at the ATM\. Do you need any other services\?', text)
    match2 = re.search(r'Processing is complete\, the receipt is printed\, your cash is ready\. Please take back your bank card and then prepare to withdraw the cash at the ATM\. Do you need any other services\?', text)
    if match1:
        return False
    elif match2:
        return True
    else:
        return None

@sio.on('cash-withdrawal-response')    
def on_cash_withdrawal_response(data):
    global cash_withdrawal_validation_result
    print("Received response:", data)
    cash_withdrawal_validation_result = data['event']
    

@sio.on('event')
def on_event(data):
    global vg, event_queue
    print('Event received:', data)
    parameters = data.get('parameters')
    if parameters:
        vg = parameters.get('vg')
        if vg:
            event_queue.put(vg)
            print(f"VG added to queue: {vg}")
            # 断开连接的条件现在检查特定的vg值
            if vg == 'end-transaction':
                print("End-transaction event received, disconnecting")
                sio.disconnect()



def wait_for_event_and_proceed():
    global vg, should_record, event_queue
    print('waiting for event')
    vg = event_queue.get()
    response_text = vg
    print(f"Received vg: {response_text}")
    if vg == 'end-transaction':
        should_record = True
        return response_text, should_record
    else:
        should_record = False
        return response_text, should_record
    
        
        
def close_session():
    ws_url = "ws://127.0.0.1:5002"
    try:
        sio.connect(ws_url)
        sio.emit('close-session',{"action": "close-session"})
        print('session is closed, waiting for next session refresh')
        sio.disconnect()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None
    
    
    

def handle_request(audio_content, language):
    # 根据语言调用相应的处理函数
    global receiptRequested
    global amount
    global invalid_warning
    global note_mix_validation_result
    global cash_withdrawal_validation_result
    global should_record
    
    if language == 'Cantonese':
        text = cantonese_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_cantonese_send_request(prompt)
        audio_response = cantonese_text_to_speech(subscription_key, service_region, response_text)
        return audio_response, conversation_history
    elif language == 'Putonghua':
        text = putonghua_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_putonghua_send_request(prompt)
        audio_response = putonghua_text_to_speech(subscription_key, service_region, response_text)
        return audio_response, conversation_history
    elif language == 'English':
        if should_record == True:
            text = English_flask_listen_from_microphone(audio_content)
            invalid_warning = 0
            prompt = update_conversation(text)
            response_text = sensenova_English_send_request(prompt)  
            if response_text == 'Which account would you like to withdraw from and which currency?':
                print('transaction type: Cash Withdrawal') #后续计划: 在已知transaction type的情况下,可以考虑只要一个服务类型的prompt,将这个type返还给ITM
            if amount == None:
                note_mix_extraction_result = note_mix_check_and_extract(response_text) 
                if note_mix_extraction_result != None:
                    print('target value detected, sending to validation server')
                    try:    
                        note_mix_request()
                        if note_mix_validation_result == True:
                            print('validation server returned true, sending to backend')
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                        else:
                            print('validation server returned false, sending to backend')
                            response_text = "Sorry, amount not in multiple of 100, please re-enter the amount"
                            invalid_warning = 1
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                    except Exception as e:
                        print(f"Error connecting to validation server: {e}")
                elif note_mix_extraction_result == None:
                    print('no target value detected, sending to backend')
                    conversation_history.append({"role": "ATM Teller", "content": response_text})
                    audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                    return audio_response, conversation_history, should_record
            elif amount != None:
                cash_withdrawal_extraction_result = cash_withdrawal_check_and_extract(response_text) # 提取是否需要收据
                if cash_withdrawal_extraction_result != None: # 如果提取到信息
                    print('target transaction detected, sending to validation server')
                    receiptRequested = cash_withdrawal_extraction_result #receiptRequested 就是收据要与否
                    try:
                        cash_withdrawal_request() #将收集到的金额,货币,账户,收据要否发送给validation server
                        if cash_withdrawal_validation_result == 'transaction-started': #如果ITM 返回交易开始
                            response_text, should_record = wait_for_event_and_proceed() #
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            conversation_history.append({"role": "Customer", "content": "No need for reply"})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                        elif cash_withdrawal_validation_result == 'transaction-failed':
                            response_text = "Sorry, your transaction is refused, please try again"
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                    except Exception as e:
                        print(f"Error connecting to validation server: {e}")
                elif cash_withdrawal_extraction_result == None:
                    if response_text == 'Thank you for visiting, see you next time, goodbye.': #后续计划:给前端做conversation history的的文本检测,如果检测到这句话,则在完成播报后冻结页面直至重启
                        close_session() #初定关闭session,后续根据实际情况考虑是否加入冻结前端显示来达到不刷新ai teller就不能继续的目的(既下一个客户服务)
                        conversation_history.append({"role": "ATM Teller", "content": response_text})
                        audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                        return audio_response, conversation_history, should_record
                    else:
                        conversation_history.append({"role": "ATM Teller", "content": response_text})
                        audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                        return audio_response, conversation_history, should_record
        elif should_record == False:
            response_text, should_record = wait_for_event_and_proceed()
            if response_text == 'end-transaction':
                response_text = 'Processing is complete, Do you need any other services?'
                conversation_history.append({"role": "ATM Teller", "content": response_text})
                audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                return audio_response, conversation_history, should_record
            else:
                conversation_history.append({"role": "ATM Teller", "content": response_text})
                conversation_history.append({"role": "Customer", "content": "No need for reply"})
                audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                return audio_response, conversation_history, should_record

        
if __name__ == '__main__':
    socketio1.run(app1, host='0.0.0.0', port=5001, allow_unsafe_werkzeug=True)
    threading.Thread(target=wait_for_event_and_proceed).start()

下面是实验版NCR代码,eventlet貌似无法正常使用

In [None]:
import eventlet
eventlet.monkey_patch()
from flask import Flask, request, jsonify
from flask_socketio import SocketIO, emit
from flask_cors import CORS
import base64
import json
import re
import socketio
from eventlet.queue import Queue
conversation_history = []
language_detected = False

language = None
sio = socketio.Client()

event_queue = Queue()


app1 = Flask(__name__)
CORS(app1)
socketio1 = SocketIO(app1, async_mode='eventlet', cors_allowed_origins="http://127.0.0.1:8080")
@socketio1.on('process_audio')
def process_audio(data):
    global conversation_history
    global language_detected
    global language
    global should_record
    
    # 接收前端上传的音频文件
    base64_audio = data['audio']
    audio_content = base64.b64decode(base64_audio)
    
    
    # 选择处理流程
    if not language_detected:
        # 只在第一次接收音频时进行语言检测
        recognize_text = recognize_speech_google(audio_content)
        if "广东" in recognize_text or "廣東" in recognize_text:
            language = 'Cantonese'
        elif "普通" in recognize_text:
            language = 'Putonghua'
        elif "english" in recognize_text.lower():
            language = 'English'
        else:
            return jsonify({'audio': None, 'text': 'No valid language found in the audio.', 'conversation_history': [msg['role'] + ':' + msg['content'] + '\n' for msg in conversation_history]}), 400
        
        language_detected = True  # 更新标志，表示已进行过语言检测
        print(f"Language detected: {language}")
    else:
        # 后续请求不再进行语言检测
        print("Skipping language detection.")
        
    audio_response, conversation_history, should_record = handle_request(audio_content, language)
    # 编码音频响应
    audio_response_base64 = base64.b64encode(audio_response).decode('utf-8')
    emit ('response', {
        'audio_64': audio_response_base64, 
        'conversation_history': [msg['content'] + '\n' for msg in conversation_history],
        'should_record': should_record
        })


cash_withdrawal_validation_result = None
note_mix_validation_result = None
amount = None
currency = None
accountType = None
receiptRequested = None
vg = None
should_record = True

@sio.event
def connect():
    print("Connected to the server")

@sio.event
def disconnect():
    print("Disconnected from the server")




def note_mix_request():
    global amount,currency
    ws_url = "ws://127.0.0.1:5002"
    try:
        sio.connect(ws_url)
        sio.emit('note-mix',{"action": "note-mix", "parameters": {"currency": currency, "amount": amount}})
        sio.wait()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None

def note_mix_check_and_extract(text):
    # 正则表达式匹配句子模式
    global amount, currency, accountType
    match = re.search(r'You want to withdraw ([\d,]+) in ([\w\s]+) from your ([\w\s]+)\. Is that correct\?', text)
    if match:
        # 提取金额和账户类型
        amount = match.group(1)
        currency = match.group(2)
        accountType = match.group(3)
        return True
    else:
        return None

@sio.on('note-mix-response')
def on_note_mix_response(data):
    global note_mix_validation_result
    print("Received response:", data)
    # 处理服务器响应
    note_mix_validation_result = data['parameters']['success']
    sio.disconnect()


def cash_withdrawal_request():
    global amount, currency, accountType, receiptRequested
    ws_url = "ws://127.0.0.1:5002"
    try:
        sio.connect(ws_url)
        sio.emit('cash-withdrawal',{"action": "cash-withdrawal","parameters": {"currency": currency,"amount": amount,"accountType": accountType,"receiptRequested": receiptRequested}})
        sio.wait()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None

def cash_withdrawal_check_and_extract(text):
    match1 = re.search(r'Processing is complete\, your cash is ready\. Please take back your bank card and then prepare to withdraw the cash at the ATM\. Do you need any other services\?', text)
    match2 = re.search(r'Processing is complete\, the receipt is printed\, your cash is ready\. Please take back your bank card and then prepare to withdraw the cash at the ATM\. Do you need any other services\?', text)
    if match1:
        return False
    elif match2:
        return True
    else:
        return None

@sio.on('cash-withdrawal-response')    
def on_cash_withdrawal_response(data):
    global cash_withdrawal_validation_result
    print("Received response:", data)
    cash_withdrawal_validation_result = data['event']
    

@sio.on('event')
def on_event(data):
    global vg, event_queue
    print('Event received:', data)
    parameters = data.get('parameters')
    if parameters:
        vg = parameters.get('vg')
        if vg:
            event_queue.put(vg)
            print(f"VG added to queue: {vg}")
            # 断开连接的条件现在检查特定的vg值
            if vg == 'end-transaction':
                print("End-transaction event received, disconnecting")
                sio.disconnect()



def wait_for_event_and_proceed():
    global vg, should_record, event_queue
    print('waiting for event')
    vg = event_queue.get()
    response_text = vg
    print(f"Received vg: {response_text}")
    if vg == 'end-transaction':
        should_record = True
        return response_text, should_record
    else:
        should_record = False
        return response_text, should_record
    
@socketio1.on('connect')
def on_connect():
    eventlet.spawn(wait_for_event_and_proceed)
        
        
def close_session():
    ws_url = "ws://127.0.0.1:5002"
    try:
        sio.connect(ws_url)
        sio.emit('close-session',{"action": "close-session"})
        print('session is closed, waiting for next session refresh')
        sio.disconnect()
    except Exception as e:
        print(f"Error connecting to validation server: {e}")
        return None
    
    
    

def handle_request(audio_content, language):
    # 根据语言调用相应的处理函数
    global receiptRequested
    global amount
    global invalid_warning
    global note_mix_validation_result
    global cash_withdrawal_validation_result
    global should_record
    
    if language == 'Cantonese':
        text = cantonese_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_cantonese_send_request(prompt)
        audio_response = cantonese_text_to_speech(subscription_key, service_region, response_text)
        return audio_response, conversation_history
    elif language == 'Putonghua':
        text = putonghua_flask_listen_from_microphone(audio_content)
        prompt = update_conversation(text)
        response_text = sensenova_putonghua_send_request(prompt)
        audio_response = putonghua_text_to_speech(subscription_key, service_region, response_text)
        return audio_response, conversation_history
    elif language == 'English':
        if should_record == True:
            text = English_flask_listen_from_microphone(audio_content)
            invalid_warning = 0
            prompt = update_conversation(text)
            response_text = sensenova_English_send_request(prompt)  
            if response_text == 'Which account would you like to withdraw from and which currency?':
                print('transaction type: Cash Withdrawal') #后续计划: 在已知transaction type的情况下,可以考虑只要一个服务类型的prompt,将这个type返还给ITM
            if amount == None:
                note_mix_extraction_result = note_mix_check_and_extract(response_text) 
                if note_mix_extraction_result != None:
                    print('target value detected, sending to validation server')
                    try:    
                        note_mix_request()
                        if note_mix_validation_result == True:
                            print('validation server returned true, sending to backend')
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                        else:
                            print('validation server returned false, sending to backend')
                            response_text = "Sorry, amount not in multiple of 100, please re-enter the amount"
                            invalid_warning = 1
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                    except Exception as e:
                        print(f"Error connecting to validation server: {e}")
                elif note_mix_extraction_result == None:
                    print('no target value detected, sending to backend')
                    conversation_history.append({"role": "ATM Teller", "content": response_text})
                    audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                    return audio_response, conversation_history, should_record
            elif amount != None:
                cash_withdrawal_extraction_result = cash_withdrawal_check_and_extract(response_text) # 提取是否需要收据
                if cash_withdrawal_extraction_result != None: # 如果提取到信息
                    print('target transaction detected, sending to validation server')
                    receiptRequested = cash_withdrawal_extraction_result #receiptRequested 就是收据要与否
                    try:
                        cash_withdrawal_request() #将收集到的金额,货币,账户,收据要否发送给validation server
                        if cash_withdrawal_validation_result == 'transaction-started': #如果ITM 返回交易开始
                            response_text, should_record = wait_for_event_and_proceed() #
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            conversation_history.append({"role": "Customer", "content": "No need for reply"})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                        elif cash_withdrawal_validation_result == 'transaction-failed':
                            response_text = "Sorry, your transaction is refused, please try again"
                            conversation_history.append({"role": "ATM Teller", "content": response_text})
                            audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                            return audio_response, conversation_history, should_record
                    except Exception as e:
                        print(f"Error connecting to validation server: {e}")
                elif cash_withdrawal_extraction_result == None:
                    if response_text == 'Thank you for visiting, see you next time, goodbye.': #后续计划:给前端做conversation history的的文本检测,如果检测到这句话,则在完成播报后冻结页面直至重启
                        close_session() #初定关闭session,后续根据实际情况考虑是否加入冻结前端显示来达到不刷新ai teller就不能继续的目的(既下一个客户服务)
                        conversation_history.append({"role": "ATM Teller", "content": response_text})
                        audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                        return audio_response, conversation_history, should_record
                    else:
                        conversation_history.append({"role": "ATM Teller", "content": response_text})
                        audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                        return audio_response, conversation_history, should_record
        elif should_record == False:
            response_text, should_record = wait_for_event_and_proceed()
            if response_text == 'end-transaction':
                response_text = 'Processing is complete, Do you need any other services?'
                conversation_history.append({"role": "ATM Teller", "content": response_text})
                audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                return audio_response, conversation_history, should_record
            else:
                conversation_history.append({"role": "ATM Teller", "content": response_text})
                conversation_history.append({"role": "Customer", "content": "No need for reply"})
                audio_response = English_text_to_speech(subscription_key, service_region, response_text)
                return audio_response, conversation_history, should_record

        
if __name__ == '__main__':
    socketio1.run(app1, host='0.0.0.0', port=5001, allow_unsafe_werkzeug=True)