# Voice Control

In [4]:
import os
import torch
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess

# 本地模型路径（替换为你的实际路径）
model_dir = r"C:\Users\DJH的Y9000K\.cache\modelscope\hub\models\iic\SenseVoiceSmall"

# 验证路径是否存在
if not os.path.exists(model_dir):
    raise FileNotFoundError(f"模型路径不存在: {model_dir}")

# 加载模型（使用本地路径）
model = AutoModel(
    model=model_dir,  # 直接传入本地绝对路径
    trust_remote_code=True,
    remote_code="./model.py",  # 确保model.py在当前工作目录
    vad_model="fsmn-vad",
    vad_kwargs={"max_single_segment_time": 30000},
    device="cuda:0" if torch.cuda.is_available() else "cpu"
)

# 后续识别和解析代码保持不变
def recognize_cantonese_audio(audio_path):
    res = model.generate(
        input=audio_path,
        cache={},
        language="yue",  # 指定粤语
        use_itn=True,
        batch_size_s=60,
        merge_vad=True,
        merge_length_s=15
    )
    return rich_transcription_postprocess(res[0]["text"])

# 固定指令集
command_set = ["开窗", "关窗", "开灯", "关灯"]

# 音频路径（替换为你的文件）
audio_path = "audio/开灯.m4a"

# 执行识别和解析
if os.path.exists(audio_path):
    recognized_text = recognize_cantonese_audio(audio_path)
    print(f"识别结果: {recognized_text}")
    
    matched_command = next((cmd for cmd in command_set if cmd in recognized_text), "")
    print(f"匹配指令: {matched_command if matched_command else '未匹配'}")
else:
    print(f"音频文件不存在: {audio_path}")

funasr version: 1.2.6.
Check update of funasr, and it would cost few times. You may disable it by set `disable_update=True` in AutoModel
You are using the latest version of funasr-1.2.6
Loading remote code successfully: ./model.py


OSError: Not found: "C:\Users\DJH的Y9000K\.cache\modelscope\hub\models\iic\SenseVoiceSmall\chn_jpn_yue_eng_ko_spectok.bpe.model": No such file or directory Error #2

In [None]:
import os
import torch
import shutil
import tempfile
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess

# 创建临时目录（英文路径）
temp_dir = tempfile.mkdtemp()
print(f"创建临时目录: {temp_dir}")

# 原始模型目录（含中文路径）
source_dir = r"C:\Users\DJH的Y9000K\.cache\modelscope\hub\models\iic\SenseVoiceSmall"

# 检查模型目录是否存在
if not os.path.exists(source_dir):
    raise FileNotFoundError(f"模型路径不存在: {source_dir}")

# 将模型复制到临时目录（纯英文路径）
print("正在复制模型文件到临时目录...")
model_temp_dir = os.path.join(temp_dir, "SenseVoiceTemp")
shutil.copytree(source_dir, model_temp_dir, dirs_exist_ok=True)

# 预加载VAD模型避免下载
vad_dir = os.path.join(temp_dir, "speech_fsmn_vad_zh-cn-16k-common-pytorch")
os.makedirs(vad_dir, exist_ok=True)
# 从本地缓存复制VAD模型文件
vad_source_dir = r"C:\Users\DJH的Y9000K\.cache\modelscope\hub\models\iic\speech_fsmn_vad_zh-cn-16k-common-pytorch"
if os.path.exists(vad_source_dir):
    shutil.copytree(vad_source_dir, vad_dir, dirs_exist_ok=True)
else:
    print("警告：未找到本地VAD模型缓存，将从网络下载")

# 加载模型
print("加载模型中...")
model = AutoModel(
    model=model_temp_dir,  # 使用英文路径
    trust_remote_code=True,
    vad_model="fsmn-vad",
    vad_model_path=vad_dir,  # 指定本地VAD模型路径
    vad_kwargs={"max_single_segment_time": 30000},
    device="cuda:0" if torch.cuda.is_available() else "cpu",
    disable_update=True  # 禁止检查更新
)

def recognize_cantonese_audio(audio_path):
    """处理音频识别的函数"""
    try:
        res = model.generate(
            input=audio_path,
            cache={},
            language="yue",  # 指定粤语
            use_itn=True,
            batch_size_s=60,
            merge_vad=True,
            merge_length_s=15
        )
        return rich_transcription_postprocess(res[0]["text"])
    except Exception as e:
        print(f"语音识别失败: {str(e)}")
        return ""

# 固定指令集
command_set = ["开窗", "关窗", "开灯", "关灯"]

# 已经转换好的WAV文件路径
wav_path = "audio/开灯_test.wav"  # 根据实际文件名修改

# 确保WAV文件存在
if os.path.exists(wav_path):
    print(f"开始处理音频: {wav_path}")
    
    # 语音识别
    recognized_text = recognize_cantonese_audio(wav_path)
    print(f"识别结果: {recognized_text}")
    
    # 命令匹配
    matched_command = ""
    max_common_characters = 0
    
    for cmd in command_set:
        # 计算共有字符数量
        common_count = sum(1 for char in cmd if char in recognized_text)
        
        # 如果完全包含某个命令
        if cmd in recognized_text:
            matched_command = cmd
            break
        # 否则选择共有字符最多的命令
        elif common_count > max_common_characters:
            matched_command = cmd
            max_common_characters = common_count
            
    if matched_command:
        print(f"匹配命令: {matched_command}")
    else:
        print("未匹配到有效指令")
else:
    print(f"音频文件不存在: {wav_path}")

# 清理临时目录
print("清理临时文件...")
shutil.rmtree(temp_dir, ignore_errors=True)

创建临时目录: D:\TEMPFI~1\tmpupayp77a
正在复制模型文件到临时目录...
加载模型中...
funasr version: 1.2.6.
Loading remote code successfully: model
Downloading Model from https://www.modelscope.cn to directory: C:\Users\DJH的Y9000K\.cache\modelscope\hub\models\iic\speech_fsmn_vad_zh-cn-16k-common-pytorch
开始处理音频: audio/开灯_test.wav


rtf_avg: 0.163: 100%|[34m██████████[0m| 1/1 [00:00<00:00,  1.52it/s]                                                                                          
rtf_avg: 0.190: 100%|[34m██████████[0m| 1/1 [00:00<00:00,  3.74it/s]
rtf_avg: 0.071, time_speech:  4.032, time_escape: 0.285: 100%|[31m██████████[0m| 1/1 [00:00<00:00,  3.37it/s]

识别结果: 开灯。😔
匹配命令: 开灯
清理临时文件...





: 