In [1]:
!pip install openai gradio openai-whisper



In [2]:
# 设置OpenAI KEY环境变量
import os
import getpass
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')

OpenAI API Key:··········


目标：使用OpenAI提供的Whisper库实现从语音到文本的转换功能，定义状态机进行对话，并使用Gradio来构建用户界面。

In [3]:
from openai import OpenAI
import gradio as gr
import whisper

# 加载模型
client = OpenAI()
model = whisper.load_model("base")

# 输入音频并转换为文本
def transcribe(file):
    print(file)
    transcription = model.transcribe(file)
    return transcription['text']


# 定义状态和转换
prompts = {'START': 'Classify the intent of the next input. Is it: WRITE_EMAIL, QUESTION, OTHER? Only answer one word.',
           'QUESTION': 'If you can answer the question: ANSWER, if you need more information: MORE, if you cannot answer: OTHER. Only answer one word.',
           'ANSWER': 'Now answer the question as a polite assistant',
           'MORE': 'Now ask for more information as a polite assistant',
           'OTHER': 'Now tell me you cannot answer the question or do the action as a polite assistant',
           'WRITE_EMAIL': 'If the subject or recipient or message is missing, answer "MORE". Else if you have all the information answer "ACTION_WRITE_EMAIL | subject:subject, recipient:recipient, message:message". '}
actions = {
    'ACTION_WRITE_EMAIL':
    "The mail has been sent. Now tell me the action is done in natural language."}
messages = [{"role": "user", "content": prompts['START']}]

# 加载ChatCompletion端点
def generate_answer(messages):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages)
    return (response.choices[0].message.content)

# 从start状态开始
def start(user_input):
    messages.append({"role": "user", "content": user_input})
    return discussion(messages, 'START')

# 定义discussion函数，让系统在各种状态之间切换
def discussion(messages, last_step):
    answer = generate_answer(messages)
    print(answer)
    if answer in prompts.keys():
        # 找到一个新状态，将其添加到消息列表中
        messages.append({"role": "assistant", "content": answer})
        messages.append({"role": "user", "content": prompts[answer]})
        # 递归式遍历状态机
        return discussion(messages, answer)
    elif answer.split("|")[0].strip() in actions.keys():
        # 新状态是一个动作
        return do_action(answer)
    else:
        # 处于END状态
        # 如果上一步是MORE，那么保留消息的历史记录，否则重新开始
        if last_step != 'MORE':
            messages = []
        last_step = 'END'
        return answer

# do_action函数让我们可以调用第三方API（如Gmail API）
def do_action(answer):
    print("Doing action " + answer)
    messages.append({"role": "assistant", "content": answer})
    action = answer.split("|")[0].strip()
    messages.append({"role": "user", "content": actions[action]})
    return discussion(messages, answer)


def start_chat(file):
    input = transcribe(file)
    print(input)
    return start(input)


gr.Interface(
    theme=gr.themes.Soft(),
    fn=start_chat,
    live=True,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs="text").launch()


100%|████████████████████████████████████████| 139M/139M [00:00<00:00, 156MiB/s]


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://54f0126898bb701908.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


