In [None]:
from aip import AipSpeech, AipOcr
import speech_recognition as sr
from playsound import playsound
import cv2
import os
import json  # 新增json模块

### 创建百度AIP对象
APP_ID = '118123170'
API_KEY = 'p2iyWvcmf35Grq6cm4fqMm61'
SECRET_KEY = 'OtPn0aOmNmFDvVBD36cAwoaK8KyIyVUW'
client1 = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
client2 = AipOcr(APP_ID, API_KEY, SECRET_KEY)

### 加载药品JSON数据库
def load_medicine_db(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        return json.load(f)

# 请替换为你的实际JSON文件路径
MEDICINE_DB_PATH = 'medicine_db.json'
medicine_db = load_medicine_db(MEDICINE_DB_PATH)

### 相关函数
def get_text(wav_bytes):
    try:
        result = client1.asr(wav_bytes, 'wav', 16000, {'dev_pid': 1537})
        if result['err_no'] == 0:
            return result['result'][0]
        else:
            return f"Error: {result['err_msg']}"
    except Exception as e:
        return f"Exception: {str(e)}"

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

def match_medicine_name(ocr_text, medicine_db):
    ocr_text = ocr_text.lower()
    for medicine in medicine_db:
        if medicine['name'].lower() in ocr_text:
            return medicine['name']
    return None

def text_to_speech(text, output_path='audio.mp3'):
    result = client1.synthesis(text, 'zh', 1, {
        'spd': 5, 'pit': 5, 'vol': 5, 'per': 4
    })
    if not isinstance(result, dict):
        with open(output_path, 'wb') as f:
            f.write(result)
        return output_path
    else:
        print("TTS failed:", result)
        return None

def process_ocr(image_path, medicine_db):
    image_data = get_file_content(image_path)
    ocr_result = client2.basicGeneral(image_data)
    
    if 'words_result' in ocr_result:
        ocr_text = '\n'.join([item['words'] for item in ocr_result['words_result']])
        print("识别到的文字:", ocr_text)
        
        matched_name = match_medicine_name(ocr_text, medicine_db)
        
        if matched_name:
            medicine_info = next((m for m in medicine_db if m['name'] == matched_name), None)
            if medicine_info:
                
                return medicine_info
        return "未在数据库中匹配到该药品"
    return "OCR识别失败"

### 主程序
r = sr.Recognizer()
mic = sr.Microphone()
cap = cv2.VideoCapture(0)
medicine_list = None
try:
    while True:
        print("Listening...")
        with mic as source:
            r.adjust_for_ambient_noise(source)
            audio = r.listen(source)
        audio_data = audio.get_wav_data(convert_rate=16000)

        print("Recognizing...")
        text = get_text(audio_data)
        print("Recognized text:", text)

        if "拍照" in text:
            print("Take picture")
            ret, frame = cap.read()
            if ret:
                image_path = 'captured_image.jpg'
                cv2.imwrite(image_path, frame)
                print("Picture saved:", image_path)
                if os.path.exists('captured_image.jpg'):
                    medicine_list = process_ocr('captured_image.jpg', medicine_db)
                   
        if "是什么" in text:
            if medicine_list:
                print("识别药品")
                print(medicine_list['name'])
                # 语音播报识别结果
                tts_file = text_to_speech(f"识别到的药品是：{medicine_list['name']}")
                if tts_file:
                    playsound(tts_file)
            else:
                print("没有找到已拍摄的图片")
                text_to_speech("请先拍照")
        if"怎么吃" in text:
            print("获取药品用法和用量")
            if medicine_list:
                print(medicine_list['dosage'], medicine_list['usage'])
                # 语音播报识别结果
                tts_file = text_to_speech(f"药品用法是：{medicine_list['dosage']}"f"药品用量是：{medicine_list['usage']}" )
                if tts_file:
                    playsound(tts_file)
            else:
                print("没有找到已拍摄的图片")
                text_to_speech("请先拍照")
        if"怎么用" in text:
            print("获取药品用法")
            if medicine_list:
                print(medicine_list['usage'])
                # 语音播报识别结果
                tts_file = text_to_speech(f"药品用法是：{medicine_list['usage']}")
                if tts_file:
                    playsound(tts_file)
            else:
                print("没有找到已拍摄的图片")
                text_to_speech("请先拍照")
        if "退出" in text:
            print("Exiting...")
            break

except Exception as e:
    print("Error:", e)

finally:
    cap.release()
    cv2.destroyAllWindows()