### 0.导入Nemo工具库及相关工具类

In [None]:
import nemo
import nemo.collections.asr as nemo_asr
import nemo.collections.tts as nemo_tts

In [None]:
import librosa
import IPython.display as ipd

# Load and listen to the audio file
example_file = "/root/ms_chatbot/test.wav"
audio, sample_rate = librosa.load(example_file)
ipd.Audio(example_file, rate=sample_rate)

###  1.构建机器的“耳朵”实现听写的过程

#### 加载语音识别模型

In [None]:
citrinet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="stt_zh_citrinet_512")# 加载ASR语音识别预训练模型

#### 传入语音文件进行语音识别

In [None]:
asr_result = citrinet.transcribe(paths2audio_files=["test.wav"])
asr_result = " ".join(asr_result)
print(asr_result)

### 2.构建机器的“大脑” 实现聊天的过程

In [None]:
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True).quantize(8).cuda()
model = model.eval()
response, history = model.chat(tokenizer, asr_result+"10个字以内", history=[])
print(response)

### 3.构建机器的“嘴巴”将文字用声音说出来

In [None]:
from nemo.collections.tts.models import FastPitchModel
from matplotlib.pyplot import imshow
from matplotlib import pyplot as plt

#### 加载fastpitch声学模型

In [None]:
spec_generator = FastPitchModel.from_pretrained(model_name="tts_zh_fastpitch_sfspeech")

In [None]:
response

#### 通过Fastpitch声学模型将文字转换成对应频谱图

In [None]:
parsed = spec_generator.parse(response)
spectrogram = spec_generator.generate_spectrogram(tokens=parsed)#调用模型的generate_spectrogram函数生成频谱图
imshow(spectrogram.cpu().detach().numpy()[0,...], origin="lower")#将Tensor转换为numpy array进行可视化
plt.show() #matplotlib展示文字对应的频谱图

#### 加载HiFigan声码器模型

In [None]:
from nemo.collections.tts.models import HifiGanModel
Hifigan = HifiGanModel.restore_from("tts_hifigan.nemo")

#### 使用HiFigan声码器将频谱图合成出语音

In [None]:
import IPython
audio = Hifigan.convert_spectrogram_to_audio(spec=spectrogram) #调用模型convert_spectrogram_to_audio()函数进行频谱到音频文件的转换
IPython.display.Audio(audio.to('cpu').detach().numpy(), rate=22050)

---