diff --git a/GPT_SoVITS/AR/models/t2s_lightning_module.py b/GPT_SoVITS/AR/models/t2s_lightning_module.py index 2dd3f3928..adbb72065 100644 --- a/GPT_SoVITS/AR/models/t2s_lightning_module.py +++ b/GPT_SoVITS/AR/models/t2s_lightning_module.py @@ -7,6 +7,10 @@ from typing import Dict import torch +try: + import torch_musa +except ImportError: + pass from pytorch_lightning import LightningModule from AR.models.t2s_model import Text2SemanticDecoder from AR.modules.lr_schedulers import WarmupCosineLRSchedule diff --git a/GPT_SoVITS/AR/models/t2s_model.py b/GPT_SoVITS/AR/models/t2s_model.py index c8ad3d825..5805a6ce4 100644 --- a/GPT_SoVITS/AR/models/t2s_model.py +++ b/GPT_SoVITS/AR/models/t2s_model.py @@ -1,6 +1,10 @@ # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/t2s_model.py # reference: https://github.com/lifeiteng/vall-e import torch +try: + import torch_musa +except ImportError: + pass from tqdm import tqdm from AR.models.utils import make_pad_mask diff --git a/GPT_SoVITS/AR/models/utils.py b/GPT_SoVITS/AR/models/utils.py index 9678c7e13..706338eab 100644 --- a/GPT_SoVITS/AR/models/utils.py +++ b/GPT_SoVITS/AR/models/utils.py @@ -1,6 +1,10 @@ # modified from https://github.com/yangdongchao/SoundStorm/blob/master/soundstorm/s1/AR/models/utils.py # reference: https://github.com/lifeiteng/vall-e import torch +try: + import torch_musa +except ImportError: + pass import torch.nn.functional as F from typing import Tuple diff --git a/GPT_SoVITS/AR/modules/activation.py b/GPT_SoVITS/AR/modules/activation.py index 5ca888b5e..89b4cddf8 100644 --- a/GPT_SoVITS/AR/modules/activation.py +++ b/GPT_SoVITS/AR/modules/activation.py @@ -2,6 +2,10 @@ from typing import Optional from typing import Tuple import torch +try: + import torch_musa +except ImportError: + pass from torch import Tensor from torch.nn import Linear from torch.nn import Module diff --git a/GPT_SoVITS/AR/modules/embedding.py b/GPT_SoVITS/AR/modules/embedding.py index 3a382f93d..f1736bd32 100644 --- a/GPT_SoVITS/AR/modules/embedding.py +++ b/GPT_SoVITS/AR/modules/embedding.py @@ -2,6 +2,10 @@ import math import torch +try: + import torch_musa +except ImportError: + pass from torch import nn diff --git a/GPT_SoVITS/AR/modules/patched_mha_with_cache.py b/GPT_SoVITS/AR/modules/patched_mha_with_cache.py index 7be241dad..9c4729b4a 100644 --- a/GPT_SoVITS/AR/modules/patched_mha_with_cache.py +++ b/GPT_SoVITS/AR/modules/patched_mha_with_cache.py @@ -7,6 +7,10 @@ ) from torch.nn import functional as F import torch +try: + import torch_musa +except ImportError: + pass # Tensor = torch.Tensor # from typing import Callable, List, Optional, Tuple, Union diff --git a/GPT_SoVITS/AR/modules/scaling.py b/GPT_SoVITS/AR/modules/scaling.py index 9256a8cbf..20679d630 100644 --- a/GPT_SoVITS/AR/modules/scaling.py +++ b/GPT_SoVITS/AR/modules/scaling.py @@ -21,6 +21,10 @@ from typing import Union import torch +try: + import torch_musa +except ImportError: + pass import torch.nn as nn from torch import Tensor diff --git a/GPT_SoVITS/AR/modules/transformer.py b/GPT_SoVITS/AR/modules/transformer.py index 7921f48e7..4d3d286c0 100644 --- a/GPT_SoVITS/AR/modules/transformer.py +++ b/GPT_SoVITS/AR/modules/transformer.py @@ -10,6 +10,10 @@ from typing import Union import torch +try: + import torch_musa +except ImportError: + pass from AR.modules.activation import MultiheadAttention from AR.modules.scaling import BalancedDoubleSwish from torch import nn diff --git a/GPT_SoVITS/feature_extractor/cnhubert.py b/GPT_SoVITS/feature_extractor/cnhubert.py index dc155bddb..a23293f11 100644 --- a/GPT_SoVITS/feature_extractor/cnhubert.py +++ b/GPT_SoVITS/feature_extractor/cnhubert.py @@ -2,6 +2,10 @@ import librosa import torch +try: + import torch_musa +except ImportError: + pass import torch.nn.functional as F import soundfile as sf import logging diff --git a/GPT_SoVITS/inference_webui.py b/GPT_SoVITS/inference_webui.py index 4fe8045d1..48541c171 100644 --- a/GPT_SoVITS/inference_webui.py +++ b/GPT_SoVITS/inference_webui.py @@ -18,6 +18,19 @@ import pdb import torch +device = "cpu" + +try: + import torch_musa + use_torch_musa = True +except ImportError: + use_torch_musa = False +if use_torch_musa: + if "_MUSA_VISIBLE_DEVICES" in os.environ: + os.environ["MUSA_VISIBLE_DEVICES"] = os.environ["_MUSA_VISIBLE_DEVICES"] + if torch.musa.is_available(): + device = "musa" + if os.path.exists("./gweight.txt"): with open("./gweight.txt", 'r', encoding="utf-8") as file: gweight_data = file.read() @@ -73,8 +86,6 @@ if torch.cuda.is_available(): device = "cuda" -else: - device = "cpu" tokenizer = AutoTokenizer.from_pretrained(bert_path) bert_model = AutoModelForMaskedLM.from_pretrained(bert_path) diff --git a/GPT_SoVITS/module/attentions.py b/GPT_SoVITS/module/attentions.py index a2e9e5155..1f57d8901 100644 --- a/GPT_SoVITS/module/attentions.py +++ b/GPT_SoVITS/module/attentions.py @@ -1,5 +1,9 @@ import math import torch +try: + import torch_musa +except ImportError: + pass from torch import nn from torch.nn import functional as F diff --git a/GPT_SoVITS/module/commons.py b/GPT_SoVITS/module/commons.py index e96cf9235..5e7703d19 100644 --- a/GPT_SoVITS/module/commons.py +++ b/GPT_SoVITS/module/commons.py @@ -1,5 +1,9 @@ import math import torch +try: + import torch_musa +except ImportError: + pass from torch.nn import functional as F diff --git a/GPT_SoVITS/module/core_vq.py b/GPT_SoVITS/module/core_vq.py index a5e22d667..557ec9278 100644 --- a/GPT_SoVITS/module/core_vq.py +++ b/GPT_SoVITS/module/core_vq.py @@ -34,6 +34,10 @@ from einops import rearrange, repeat import torch +try: + import torch_musa +except ImportError: + pass from torch import nn import torch.nn.functional as F from tqdm import tqdm diff --git a/GPT_SoVITS/module/mel_processing.py b/GPT_SoVITS/module/mel_processing.py index 503825ec6..80e1bdd7a 100644 --- a/GPT_SoVITS/module/mel_processing.py +++ b/GPT_SoVITS/module/mel_processing.py @@ -2,6 +2,10 @@ import os import random import torch +try: + import torch_musa +except ImportError: + pass from torch import nn import torch.nn.functional as F import torch.utils.data diff --git a/GPT_SoVITS/module/models.py b/GPT_SoVITS/module/models.py index 29676f43f..9e2aeb1d9 100644 --- a/GPT_SoVITS/module/models.py +++ b/GPT_SoVITS/module/models.py @@ -1,6 +1,10 @@ import copy import math import torch +try: + import torch_musa +except ImportError: + pass from torch import nn from torch.nn import functional as F diff --git a/GPT_SoVITS/module/modules.py b/GPT_SoVITS/module/modules.py index f44474558..06905fde6 100644 --- a/GPT_SoVITS/module/modules.py +++ b/GPT_SoVITS/module/modules.py @@ -1,6 +1,10 @@ import math import numpy as np import torch +try: + import torch_musa +except ImportError: + pass from torch import nn from torch.nn import functional as F diff --git a/GPT_SoVITS/module/mrte_model.py b/GPT_SoVITS/module/mrte_model.py index b0cd242c3..cf29c9e6f 100644 --- a/GPT_SoVITS/module/mrte_model.py +++ b/GPT_SoVITS/module/mrte_model.py @@ -1,6 +1,10 @@ # This is Multi-reference timbre encoder import torch +try: + import torch_musa +except ImportError: + pass from torch import nn from torch.nn.utils import remove_weight_norm, weight_norm from module.attentions import MultiHeadAttention diff --git a/GPT_SoVITS/module/quantize.py b/GPT_SoVITS/module/quantize.py index f9a5c632d..1d2354f29 100644 --- a/GPT_SoVITS/module/quantize.py +++ b/GPT_SoVITS/module/quantize.py @@ -11,6 +11,10 @@ import typing as tp import torch +try: + import torch_musa +except ImportError: + pass from torch import nn from module.core_vq import ResidualVectorQuantization diff --git a/GPT_SoVITS/module/transforms.py b/GPT_SoVITS/module/transforms.py index a11f799e0..d859e49b5 100644 --- a/GPT_SoVITS/module/transforms.py +++ b/GPT_SoVITS/module/transforms.py @@ -1,4 +1,8 @@ import torch +try: + import torch_musa +except ImportError: + pass from torch.nn import functional as F import numpy as np diff --git a/api.py b/api.py index ea0e39d04..4f44d0b61 100644 --- a/api.py +++ b/api.py @@ -13,7 +13,7 @@ `-dt` - `默认参考音频文本` `-dl` - `默认参考音频语种, "中文","英文","日文","zh","en","ja"` -`-d` - `推理设备, "cuda","cpu"` +`-d` - `推理设备, "cuda","cpu","musa"` `-a` - `绑定地址, 默认"127.0.0.1"` `-p` - `绑定端口, 默认9880, 可在 config.py 中指定` `-fp` - `覆盖 config.py 使用全精度` @@ -124,6 +124,10 @@ import LangSegment from time import time as ttime import torch +try: + import torch_musa +except ImportError: + pass import librosa import soundfile as sf from fastapi import FastAPI, Request, HTTPException @@ -570,7 +574,7 @@ def handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cu parser.add_argument("-dr", "--default_refer_path", type=str, default="", help="默认参考音频路径") parser.add_argument("-dt", "--default_refer_text", type=str, default="", help="默认参考音频文本") parser.add_argument("-dl", "--default_refer_language", type=str, default="", help="默认参考音频语种") -parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu") +parser.add_argument("-d", "--device", type=str, default=g_config.infer_device, help="cuda / cpu / MUSA ") parser.add_argument("-a", "--bind_addr", type=str, default="0.0.0.0", help="default: 0.0.0.0") parser.add_argument("-p", "--port", type=int, default=g_config.api_port, help="default: 9880") parser.add_argument("-fp", "--full_precision", action="store_true", default=False, help="覆盖config.is_half为False, 使用全精度") diff --git a/config.py b/config.py index 1f741285f..d53789ec6 100644 --- a/config.py +++ b/config.py @@ -17,10 +17,23 @@ exp_root = "logs" python_exec = sys.executable or "python" + +infer_device = "cpu" + +# 判断是否有摩尔线程显卡可用 +try: + import torch_musa + use_torch_musa = True +except ImportError: + use_torch_musa = False +if use_torch_musa: + if torch.musa.is_available(): + infer_device = "musa" + is_half=False + print("GPT-SoVITS running on MUSA!") + if torch.cuda.is_available(): infer_device = "cuda" -else: - infer_device = "cpu" webui_port_main = 9874 webui_port_uvr5 = 9873 diff --git a/docs/cn/README.md b/docs/cn/README.md index 5ff8a763b..1d0318f8e 100644 --- a/docs/cn/README.md +++ b/docs/cn/README.md @@ -129,6 +129,42 @@ docker compose -f "docker-compose.yaml" up -d docker run --rm -it --gpus=all --env=is_half=False --volume=G:\GPT-SoVITS-DockerTest\output:/workspace/output --volume=G:\GPT-SoVITS-DockerTest\logs:/workspace/logs --volume=G:\GPT-SoVITS-DockerTest\SoVITS_weights:/workspace/SoVITS_weights --workdir=/workspace -p 9880:9880 -p 9871:9871 -p 9872:9872 -p 9873:9873 -p 9874:9874 --shm-size="16G" -d breakstring/gpt-sovits:xxxxx ``` +### 在摩尔线程显卡(MUSA)运行 + +只能运行在Ubuntu 20.04 LTS(内核版本5.4.X-5.15.X)下,非虚拟机,Intel CORE系列CPU,**目前只支持推理** + +1. 前往[摩尔线程应用商店](https://developer.mthreads.com/sdk/download/musa?equipment=&os=Ubuntu&driverVersion=&version=)下载并按顺序安装`musa driver`、`musa_toolkit`、`mudnn`、`mccl` + +2. 前往[torch_musa](https://github.com/MooreThreads/torch_musa/releases/tag/v1.1.0),根据你的显卡和python版本下载`torch`、`torch_musa`,将文件名`-linux_x86_64`后部分删除,使用以下命令安装 + +``` +pip install torch-2.0.0-cp39-cp39-linux_x86_64.whl +pip install torch_musa-1.1.0-cp39-cp39-linux_x86_64.whl +``` + +3. 安装环境 + +``` +conda install -c conda-forge gcc +conda install -c conda-forge gxx +conda install ffmpeg cmake=3.18 ninja +``` + +之后你需要通过source安装torchaudio,因为摩尔线程官方并没有放出编译好的wheel +``` +git clone https://github.com/pytorch/audio +cd audio +USE_CUDA=0 python setup.py install +``` + +安装其他依赖 + +``` +pip install -r requirements.txt +``` + +4. 运行`python webui.py` + ## 预训练模型 从 [GPT-SoVITS Models](https://huggingface.co/lj1995/GPT-SoVITS) 下载预训练模型,并将它们放置在 `GPT_SoVITS\pretrained_models` 中。 diff --git a/webui.py b/webui.py index e1c36e1ed..3a6f2109f 100644 --- a/webui.py +++ b/webui.py @@ -79,6 +79,22 @@ # gpu_infos.append("%s\t%s" % ("0", "Apple GPU")) # mem.append(psutil.virtual_memory().total/ 1024 / 1024 / 1024) # 实测使用系统内存作为显存不会爆显存 +# 判断是否有摩尔线程显卡可用 +try: + import torch_musa + use_torch_musa = True +except ImportError: + use_torch_musa = False +if use_torch_musa: + ngpu = torch.musa.device_count() + if torch.musa.is_available(): + for i in range(ngpu): + if_gpu_ok = True + gpu_name = torch.musa.get_device_name(i) + gpu_infos.append("%s\t%s" % ("0", gpu_name)) + mem.append(int(torch.musa.get_device_properties(i).total_memory/ 1024/ 1024/ 1024+ 0.4)) + print("GPT-SoVITS running on MUSA!") + if if_gpu_ok and len(gpu_infos) > 0: gpu_info = "\n".join(gpu_infos) default_batch_size = min(mem) // 2