In [1]:
import os
import requests
import base64
from bs4 import BeautifulSoup
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader, WebBaseLoader
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
import gradio as gr

USER_AGENT environment variable not set, consider setting it to identify your requests.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pyaudio
import dashscope
import soundfile as sf
from IPython.display import Audio
from dashscope.audio.asr import Recognition
from dashscope.audio.tts_v2 import *

# API-Key
os.environ["NVIDIA_API_KEY"] = "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"
os.environ["ALIYUN_API_KEY"] = "sk-4d9f8ebafb104f5dadbafa4eeca93e5b"
dashscope.api_key = "sk-4d9f8ebafb104f5dadbafa4eeca93e5b"

In [3]:
# 初始化大模型
instruct_chat = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct")
instruct_llm = instruct_chat | StrOutputParser()

# 定义提示模板
prompt_template = ChatPromptTemplate.from_template("以下是与问题相关的信息：{context}\n用户需求：{input}\n请给出对应的无人机的回答")

def fetch_text_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        text_content = soup.get_text()
        text_content = ' '.join(text_content.split())
        return text_content
    else:
        raise Exception(f"请求失败，状态码: {response.status_code}")

def load_local_knowledge(base_path):
    loader = TextLoader(base_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)
    embeddings = NVIDIAEmbeddings(model="nvidia/llama-3.2-nv-embedqa-1b-v2")
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore

def load_web_knowledge(url):
    text_content = fetch_text_from_url(url)
    with open('./txt/webpage_content.txt', 'w', encoding='utf-8') as file:
        file.write(text_content)
    return load_local_knowledge('./txt/webpage_content.txt')

def retrieve_from_knowledgebase(vectorstore, query, k=3):
    relevant_docs = vectorstore.similarity_search(query, k=k)
    context = "\n".join([doc.page_content for doc in relevant_docs])
    return context

def process_user_input(chat_history, source_type, source_path_or_url, input_text):
    try:
        if source_type not in ["URL", "文件路径"]:
            raise ValueError("无效的知识来源类型，请选择 'URL' 或 '文件路径'。")
        if source_type == "URL":
            vectorstore = load_web_knowledge(source_path_or_url)
        elif source_type == "文件路径":
            vectorstore = load_local_knowledge(source_path_or_url)
        context = retrieve_from_knowledgebase(vectorstore, input_text)
        prompt = prompt_template.format(context=context, input=input_text)
        answer = instruct_llm.invoke(prompt)
        chat_history = chat_history + [(input_text, answer)]
        return "", chat_history
    except Exception as e:
        chat_history = chat_history + [(None, f"发生错误: {str(e)}")]
        return "", chat_history
    
def transcribe_audio(audio_path):
    _, sample_rate = sf.read(audio_path)
    # 假设Recognition类已定义并可以调用
    recognition = Recognition(
        model='paraformer-realtime-v2',
        format='wav',
        sample_rate=sample_rate,
        language_hints=['zh', 'en'],
        callback=None
    )
    result = recognition.call(audio_path)
    sentences = result.output['sentence']
    original_text = [sentence['text'] for sentence in sentences][0]
    return original_text

# 处理音频输入
def process_audio_input(chat_history, source_type, source_path_or_url, audio_file):
    try:
        # 验证 source_type 是否有效
        if source_type not in ["URL", "文件路径"]:
            raise ValueError("无效的知识来源类型，请选择 'URL' 或 '文件路径'。")

        if audio_file:
            input_text = transcribe_audio(audio_file)
            return process_user_input(chat_history, source_type, source_path_or_url, input_text)
        else:
            return "", chat_history
    except Exception as e:
        # 捕获异常并在聊天历史中添加错误信息
        chat_history.append((None, f"发生错误: {str(e)}"))
        return "", chat_history

def analyze_image(image_path):
    # 图像分析逻辑...
    invoke_url = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
    stream = False
    with open(image_path, "rb") as f:
        image_b64 = base64.b64encode(f.read()).decode()
    headers = {
        "Authorization": f"Bearer {os.environ.get('NVIDIA_API_KEY')}",
        "Accept": "text/event-stream" if stream else "application/json"
    }
    payload = {
        "model": 'meta/llama-3.2-11b-vision-instruct',
        "messages": [
            {
                "role": "user",
                "content": f'''Here is an image related to crop growth. Please examine the plants and their growing environment in this picture. If there are any issues, please briefly describe the problem and suggest a quick solution. If there are no problems, please state that the plants are growing well. <img src="data:image/png;base64,{image_b64}" />'''
            }
        ],
        "max_tokens": 512,
        "temperature": 1.00,
        "top_p": 1.00,
        "stream": stream
    }
    try:
        response = requests.post(invoke_url, headers=headers, json=payload)
        result = response.json()['choices'][0]['message']['content']
        return result
    except Exception as e:
        return f"Error: {str(e)}"



In [4]:
def translate_to_chinese(text):
    """
    将文本翻译成中文。
    
    :param text: 需要翻译的英文文本
    :return: 翻译后的中文文本
    """
    # pic_read = ChatNVIDIA(model="thudm/chatglm3-6b")
    pic_read = ChatNVIDIA(model="baichuan-inc/baichuan2-13b-chat")
    pic_prompt_template = ChatPromptTemplate.from_template("请把 {input} 翻译成中文，简单总结并优化格式。")
    pic_chain = pic_prompt_template | pic_read | StrOutputParser()
    translated_text = pic_chain.invoke(text)

    return translated_text

def analyze_and_translate(image_path):
    """
    分析图片并将其结果翻译成中文。
    
    :param image_path: 图片文件路径
    :return: 翻译后的中文结果
    """
    analysis_result = analyze_image(image_path)
    translated_result = translate_to_chinese(analysis_result)
    return translated_result

In [5]:
welcome_message = "您好！欢迎使用无人机综合智能检索系统。请告诉我您的问题、上传音频文件或图片以开始。"

with gr.Blocks(css="""
    .footer {text-align: center;}
    .submit-btn {margin-left: 10px; padding: 5px 10px; font-size: 0.8em; height: 40px; width: 60px;}
    .clear-btn {margin-left: 10px; padding: 5px 10px; font-size: 0.8em; height: 40px; width: 120px;}
    .input-container {display: flex; align-items: flex-end;} /* 使输入框和按钮在同一行并且对齐 */
    .input-box {flex-grow: 1;} /* 使输入框占据更多的空间 */
""") as demo:
    gr.Markdown("## 🚀 无人机综合智能检索系统")
    gr.Markdown("输入URL或文件路径以加载知识并开始聊天。")
    
    with gr.Row():
        source_type = gr.Radio(choices=["URL", "文件路径"], label="知识来源类型", value="文件路径")
        source_path_or_url = gr.Textbox(label="知识来源URL或文件路径", value="./txt/dji_webpage_content.txt")
    
    with gr.Row():
        chatbot = gr.Chatbot(value=[(None, welcome_message)], elem_classes="chatbox-container")
    
    with gr.Row():
        with gr.Column(elem_classes="input-container"):
            submit_btn = gr.Button("提交", variant="primary", size="sm", elem_classes="submit-btn")
            msg = gr.Textbox(label="用户查询", placeholder="在这里输入您的问题... 📝", elem_classes="input-box")
                
    with gr.Row():
        audio_input = gr.Audio(label="上传音频", type="filepath", elem_classes="audio-uploader")
        image_input = gr.Image(type="filepath", label="上传图片", elem_classes="image-uploader")
    
    def process_image_input(chat_history, image_path):
        analysis_result = analyze_and_translate(image_path)
        chat_history = chat_history + [("分析结果:", analysis_result)]
        return "", chat_history
    
    def clear_chat_history():
        # 确保返回的数据格式是正确的
        return [], [(None, welcome_message)]

    submit_btn.click(fn=lambda x, y, z, w: process_user_input(x, y, z, w), inputs=[chatbot, source_type, source_path_or_url, msg], outputs=[msg, chatbot])
    audio_input.change(fn=lambda x: process_audio_input(chatbot.value, source_type.value, source_path_or_url.value, x), inputs=[audio_input], outputs=[msg, chatbot])
    image_input.upload(fn=lambda x: process_image_input(chatbot.value, x), inputs=[image_input], outputs=[msg, chatbot])

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




--------
