add edge-tts, add proxy config

SchneeHertz · Apr 10, 2023 · e3d293a · e3d293a
1 parent 37f94a7
commit e3d293a
Show file tree

Hide file tree

Showing 6 changed files with 71 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -16,10 +16,28 @@
 4. 将libmp3lame.dll放入sox所在文件夹
 5. 打开chat-xiuliu, 点击Open Config, 编辑配置文件
 5. 获取一个openai的API key, 填入配置文件中
-6. 创建一个Azure的语音资源 https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices , 区域见配置文件，将取得的API key 填入配置文件中
+6. 安装Python，然后安装edge-tts `pip install edge-tts`
 7. 修改配置文件的其他部分（可选）
 8. 保存配置文件后重启chat-xiuliu
 
+### 配置文件参考
+```
+{
+  "OPENAI_API_KEY": "sk-",
+  "USE_MODEL": "gpt-3.5-turbo",
+  "SpeechSynthesisVoiceName": "zh-CN-XiaoyiNeural",
+  "ADMIN_NAME": "Chell",
+  "AI_NAME": "休留",
+  "systemPrompt": "你是女高中生休留",
+  "proxy": {
+    "type": "http",
+    "host": "127.0.0.1",
+    "port": 7890
+  }
+}
+```
+
+
 ## 其他
 关注休留喵，关注休留谢谢喵~
 上舰或者[爱发电](https://afdian.net/a/xiuliu)捐赠等额可进舰长群727536542, 管理员提供附加的有限的技术支持
diff --git a/index.js b/index.js
@@ -3,18 +3,19 @@ const path = require('node:path')
 const fs = require('node:fs')
 const { format } = require('node:util')
 const { nanoid } = require('nanoid')
-const { SpeechConfig, AudioConfig, SpeechSynthesizer } = require('microsoft-cognitiveservices-speech-sdk')
 const sound = require('sound-play')
 const _ = require('lodash')
 const { Configuration, OpenAIApi } = require('openai')
 
 const { config, history, STORE_PATH, LOG_PATH, AUDIO_PATH, SPEECH_AUDIO_PATH } = require('./utils/initFile.js')
 const { getSpeechText } = require('./modules/speech.js')
+const { ttsPromise } = require('./modules/edge-tts.js')
 const {
   OPENAI_API_KEY, USE_MODEL,
-  SPEECH_KEY, SPEECH_AREA, SpeechSynthesisLanguage, SpeechSynthesisVoiceName,
+  SpeechSynthesisVoiceName,
   ADMIN_NAME, AI_NAME,
   systemPrompt,
+  proxy
 } = config
 
 let logFile = fs.createWriteStream(path.join(LOG_PATH, `log-${new Date().toLocaleString('zh-CN').replace(/[\/:]/gi, '-')}.txt`), {flags: 'w'})
@@ -80,27 +81,18 @@ const STATUS = {
 
 let speechList = []
 
-const speakPrompt = (text, audioFilename, triggerRecord) => {
-  if (!audioFilename) audioFilename = nanoid()
-  let audioPath = path.join(AUDIO_PATH, `${audioFilename}.wav`)
-  const speechConfig = SpeechConfig.fromSubscription(SPEECH_KEY, SPEECH_AREA)
-  speechConfig.speechSynthesisLanguage = SpeechSynthesisLanguage
-  speechConfig.speechSynthesisVoiceName = SpeechSynthesisVoiceName
-  const audioConfig = AudioConfig.fromAudioFileOutput(audioPath)
-  const synthesizer = new SpeechSynthesizer(speechConfig, audioConfig)
-  synthesizer.speakTextAsync(
-    text,
-    async result => {
-      synthesizer.close()
-      await sound.play(audioPath)
-      if (triggerRecord && STATUS.isSpeechTalk) triggerSpeech()
-      resolveSpeakTextList()
-    },
-    error => {
-      console.log(error)
-      synthesizer.close()
-    }
-  )
+const speakPrompt = async (text, audioFilename, triggerRecord) => {
+  try {
+    if (!audioFilename) audioFilename = nanoid()
+    let audioPath = path.join(AUDIO_PATH, `${audioFilename}.mp3`)
+    await ttsPromise(text, audioPath, SpeechSynthesisVoiceName)
+    await sound.play(audioPath)
+    if (triggerRecord && STATUS.isSpeechTalk) triggerSpeech()
+    resolveSpeakTextList()
+  } catch (e) {
+    console.log(e)
+    resolveSpeakTextList()
+  }
 }
 
 const resolveSpeakTextList = async () => {
@@ -132,7 +124,7 @@ const resloveAdminPrompt = async ({prompt, triggerRecord})=> {
   openai.createChatCompletion({
     model: USE_MODEL,
     messages,
-  })
+  }, { proxy })
   .then(res=>{
     let resText = res.data.choices[0].message.content
     history.conversationHistory.push({
@@ -176,7 +168,7 @@ const updateMemory = ()=>{
   openai.createChatCompletion({
     model: USE_MODEL,
     messages
-  })
+  }, { proxy })
   .then(async res=>{
     history.memory = res.data.choices[0].message.content.slice(0, history.limitHistory.memoryLength)
     fs.writeFileSync(path.join(STORE_PATH, 'history.json'), JSON.stringify(history, null, '  '), {encoding: 'utf-8'})
@@ -186,7 +178,7 @@ const updateMemory = ()=>{
 const triggerSpeech = async ()=>{
   STATUS.isRecording = true
   mainWindow.setProgressBar(100, {mode: 'indeterminate'})
-  let adminTalk = await getSpeechText(openai, SPEECH_AUDIO_PATH)
+  let adminTalk = await getSpeechText(openai, SPEECH_AUDIO_PATH, proxy)
   STATUS.isRecording = false
   mainWindow.setProgressBar(-1)
   messageLogAndSend({

diff --git a/modules/edge-tts.js b/modules/edge-tts.js
@@ -0,0 +1,23 @@
+const { spawn } = require('node:child_process')
+
+let ttsPromise = (text, audioPath, SpeechSynthesisVoiceName = 'zh-CN-XiaoyiNeural')=>{
+  let vttPath = audioPath + '.vtt'
+  return new Promise((resolve, reject)=>{
+    const spawned = spawn('edge-tts', [
+      '-v', SpeechSynthesisVoiceName, '--text', text, '--write-media', audioPath, '--write-subtitles', vttPath
+    ])
+    spawned.on('error', data=>{
+      reject(data)
+    })
+    spawned.on('exit', code=>{
+      if (code === 0) {
+        return resolve(vttPath)
+      }
+      return reject('close code is ' + code)
+    })
+  })
+}
+
+module.exports = {
+  ttsPromise
+}
diff --git a/modules/speech.js b/modules/speech.js
@@ -20,11 +20,13 @@ let recordPromise = (SPEECH_AUDIO_PATH)=>{
   })
 }
 
-let getSpeechText = async (openai, SPEECH_AUDIO_PATH)=>{
+let getSpeechText = async (openai, SPEECH_AUDIO_PATH, proxy)=>{
   let audioFilePath = await recordPromise(SPEECH_AUDIO_PATH)
   const resp = await openai.createTranscription(
     fs.createReadStream(audioFilePath),
-    'whisper-1'
+    'whisper-1',
+    undefined,undefined,undefined,undefined,
+    { proxy }
   )
   return resp.data.text
 }

diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "chat-xiuliu",
   "private": true,
-  "version": "1.0.0",
+  "version": "1.1.0",
   "scripts": {
     "dev": "vite",
     "build": "vite build",

diff --git a/utils/initFile.js b/utils/initFile.js
@@ -33,13 +33,15 @@ try {
   config = {
     OPENAI_API_KEY: '',
     USE_MODEL: 'gpt-3.5-turbo',
-    SPEECH_KEY: '',
-    SPEECH_AREA: 'eastasia',
-    SpeechSynthesisLanguage: 'zh-CN',
     SpeechSynthesisVoiceName: 'zh-CN-XiaoyiNeural',
     ADMIN_NAME: 'Chell',
     AI_NAME: '休留',
-    systemPrompt: '你是女高中生休留'
+    systemPrompt: '你是女高中生休留',
+    proxy: {
+      type: 'http',
+      host: '127.0.0.1',
+      port: 7890
+    }
   }
   fs.writeFileSync(path.join(STORE_PATH, 'config.json'), JSON.stringify(config, null, '  '), {encoding: 'utf-8'})
 }