RVC-Boss · KakaruHayate · Jan 22, 2024 · Jan 24, 2024 · Jan 26, 2024 · Mar 21, 2024
diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py
@@ -7,6 +7,10 @@
 from typing import Dict
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from pytorch_lightning import LightningModule
 from AR.models.t2s_model import Text2SemanticDecoder
 from AR.modules.lr_schedulers import WarmupCosineLRSchedule

diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py
@@ -1,6 +1,10 @@
 # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py
 # reference: https://github.com/lifeiteng/vall-e
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from tqdm import tqdm
 
 from AR.models.utils import make_pad_mask

diff --git a/GPT_SoVITS/AR/models/utils.py b/GPT_SoVITS/AR/models/utils.py
@@ -1,6 +1,10 @@
 # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/utils.py
 # reference: https://github.com/lifeiteng/vall-e
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 import torch.nn.functional as F
 from typing import Tuple
 

diff --git a/GPT_SoVITS/AR/modules/activation.py b/GPT_SoVITS/AR/modules/activation.py
@@ -2,6 +2,10 @@
 from typing import Optional
 from typing import Tuple
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import Tensor
 from torch.nn import Linear
 from torch.nn import Module

diff --git a/GPT_SoVITS/AR/modules/embedding.py b/GPT_SoVITS/AR/modules/embedding.py
@@ -2,6 +2,10 @@
 import math
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 
 

diff --git a/GPT_SoVITS/AR/modules/patched_mha_with_cache.py b/GPT_SoVITS/AR/modules/patched_mha_with_cache.py
@@ -7,6 +7,10 @@
 )
 from torch.nn import functional as F
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 # Tensor = torch.Tensor
 # from typing import Callable, List, Optional, Tuple, Union
 

diff --git a/GPT_SoVITS/AR/modules/scaling.py b/GPT_SoVITS/AR/modules/scaling.py
@@ -21,6 +21,10 @@
 from typing import Union
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 import torch.nn as nn
 from torch import Tensor
 

diff --git a/GPT_SoVITS/AR/modules/transformer.py b/GPT_SoVITS/AR/modules/transformer.py
@@ -10,6 +10,10 @@
 from typing import Union
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from AR.modules.activation import MultiheadAttention
 from AR.modules.scaling import BalancedDoubleSwish
 from torch import nn

diff --git a/GPT_SoVITS/feature_extractor/cnhubert.py b/GPT_SoVITS/feature_extractor/cnhubert.py
@@ -2,6 +2,10 @@
 
 import librosa
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 import torch.nn.functional as F
 import soundfile as sf
 import logging

diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py
@@ -18,6 +18,19 @@
 import pdb
 import torch
 
+device = "cpu"
+
+try:
+    import torch_musa
+    use_torch_musa = True
+except ImportError:
+    use_torch_musa = False
+if use_torch_musa:
+    if "_MUSA_VISIBLE_DEVICES" in os.environ:
+        os.environ["MUSA_VISIBLE_DEVICES"] = os.environ["_MUSA_VISIBLE_DEVICES"]
+    if torch.musa.is_available():
+        device = "musa"
+
 if os.path.exists("./gweight.txt"):
     with open("./gweight.txt", 'r', encoding="utf-8") as file:
         gweight_data = file.read()
@@ -73,8 +86,6 @@
 
 if torch.cuda.is_available():
     device = "cuda"
-else:
-    device = "cpu"
 
 tokenizer = AutoTokenizer.from_pretrained(bert_path)
 bert_model = AutoModelForMaskedLM.from_pretrained(bert_path)

diff --git a/GPT_SoVITS/module/attentions.py b/GPT_SoVITS/module/attentions.py
@@ -1,5 +1,9 @@
 import math
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 from torch.nn import functional as F
 

diff --git a/GPT_SoVITS/module/commons.py b/GPT_SoVITS/module/commons.py
@@ -1,5 +1,9 @@
 import math
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch.nn import functional as F
 
 

diff --git a/GPT_SoVITS/module/core_vq.py b/GPT_SoVITS/module/core_vq.py
@@ -34,6 +34,10 @@
 
 from einops import rearrange, repeat
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 import torch.nn.functional as F
 from tqdm import tqdm

diff --git a/GPT_SoVITS/module/mel_processing.py b/GPT_SoVITS/module/mel_processing.py
@@ -2,6 +2,10 @@
 import os
 import random
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 import torch.nn.functional as F
 import torch.utils.data

diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py
@@ -1,6 +1,10 @@
 import copy
 import math
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 from torch.nn import functional as F
 

diff --git a/GPT_SoVITS/module/modules.py b/GPT_SoVITS/module/modules.py
@@ -1,6 +1,10 @@
 import math
 import numpy as np
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 from torch.nn import functional as F
 

diff --git a/GPT_SoVITS/module/mrte_model.py b/GPT_SoVITS/module/mrte_model.py
@@ -1,6 +1,10 @@
 # This is Multi-reference timbre encoder
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 from torch.nn.utils import remove_weight_norm, weight_norm
 from module.attentions import MultiHeadAttention

diff --git a/GPT_SoVITS/module/quantize.py b/GPT_SoVITS/module/quantize.py
@@ -11,6 +11,10 @@
 import typing as tp
 
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch import nn
 
 from module.core_vq import ResidualVectorQuantization

diff --git a/GPT_SoVITS/module/transforms.py b/GPT_SoVITS/module/transforms.py
@@ -1,4 +1,8 @@
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 from torch.nn import functional as F
 
 import numpy as np

diff --git a/api.py b/api.py
@@ -13,7 +13,7 @@
 `-dt` - `默认参考音频文本`
 `-dl` - `默认参考音频语种, "中文","英文","日文","zh","en","ja"`
 
-`-d` - `推理设备, "cuda","cpu"`
+`-d` - `推理设备, "cuda","cpu","musa"`
 `-a` - `绑定地址, 默认"127.0.0.1"`
 `-p` - `绑定端口, 默认9880, 可在 config.py 中指定`
 `-fp` - `覆盖 config.py 使用全精度`
@@ -124,6 +124,10 @@
 import LangSegment
 from time import time as ttime
 import torch
+try:
+    import torch_musa
+except ImportError:
+    pass
 import librosa
 import soundfile as sf
 from fastapi import FastAPI, Request, HTTPException
@@ -570,7 +574,7 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cu
 parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="默认参考音频路径")
 parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本")
 parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种")
-parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu")
+parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / MUSA ")
 parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0")
 parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880")
 parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度")

diff --git a/config.py b/config.py
@@ -17,10 +17,23 @@
 
 exp_root = "logs"
 python_exec = sys.executable or "python"
+
+infer_device = "cpu"
+
+# 判断是否有摩尔线程显卡可用
+try:
+    import torch_musa
+    use_torch_musa = True
+except ImportError:
+    use_torch_musa = False
+if use_torch_musa:
+    if torch.musa.is_available():
+        infer_device = "musa"
+        is_half=False
+        print("GPT-SoVITS running on MUSA!")
+
 if torch.cuda.is_available():
     infer_device = "cuda"
-else:
-    infer_device = "cpu"
 
 webui_port_main = 9874
 webui_port_uvr5 = 9873

diff --git a/docs/cn/README.md b/docs/cn/README.md
@@ -129,6 +129,42 @@ docker compose -f "docker-compose.yaml" up -d
 docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx
 ```
 
+### 在摩尔线程显卡（MUSA）运行
+
+只能运行在Ubuntu 20.04 LTS（内核版本5.4.X-5.15.X）下，非虚拟机，Intel CORE系列CPU，**目前只支持推理**
+
+1. 前往[摩尔线程应用商店](https://developer.mthreads.com/sdk/download/musa?equipment=&os=Ubuntu&driverVersion=&version=)下载并按顺序安装`musa driver`、`musa_toolkit`、`mudnn`、`mccl`
+
+2. 前往[torch_musa](https://github.com/MooreThreads/torch_musa/releases/tag/v1.1.0)，根据你的显卡和python版本下载`torch`、`torch_musa`，将文件名`-linux_x86_64`后部分删除，使用以下命令安装
+
+```
+pip install torch-2.0.0-cp39-cp39-linux_x86_64.whl
+pip install torch_musa-1.1.0-cp39-cp39-linux_x86_64.whl
+```
+
+3. 安装环境
+
+```
+conda install -c conda-forge gcc
+conda install -c conda-forge gxx
+conda install ffmpeg cmake=3.18 ninja
+```
+
+之后你需要通过source安装torchaudio，因为摩尔线程官方并没有放出编译好的wheel
+```
+git clone https://github.com/pytorch/audio
+cd audio
+USE_CUDA=0 python setup.py install
+```
+
+安装其他依赖
+
+```
+pip install -r requirements.txt
+```
+
+4. 运行`python webui.py`
+
 ## 预训练模型
 
 从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型，并将它们放置在 `GPT_SoVITS\pretrained_models` 中。

diff --git a/webui.py b/webui.py
@@ -79,6 +79,22 @@
 #     gpu_infos.append("%s\t%s" % ("0", "Apple GPU"))
 #     mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存
 
+# 判断是否有摩尔线程显卡可用
+try:
+    import torch_musa
+    use_torch_musa = True
+except ImportError:
+    use_torch_musa = False
+if use_torch_musa:
+    ngpu = torch.musa.device_count()
+    if torch.musa.is_available():
+        for i in range(ngpu):
+            if_gpu_ok = True
+            gpu_name = torch.musa.get_device_name(i)
+            gpu_infos.append("%s\t%s" % ("0", gpu_name))
+            mem.append(int(torch.musa.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4))
+        print("GPT-SoVITS running on MUSA!")
+
 if if_gpu_ok and len(gpu_infos) > 0:
     gpu_info = "\n".join(gpu_infos)
     default_batch_size = min(mem) // 2