In [None]:
!pip install -qU sagemaker boto3 bark scipy

In [None]:
%%time

import io
import os
import boto3
import sagemaker
import time
import shutil
import json
from huggingface_hub import snapshot_download

role = sagemaker.get_execution_role()
region = boto3.Session().region_name

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = sagemaker.Session().default_bucket()
prefix = "bark"

In [None]:
#download s5cmd
!curl -L https://github.com/peak/s5cmd/releases/download/v2.0.0/s5cmd_2.0.0_Linux-64bit.tar.gz | tar -xz
!chmod 777 s5cmd

#choose  suno/bark model
repo_id="suno/bark-small"  #change this to yours
#local_dir='/tmp/'+repo_id.split("/")[-1] #absolute or relative directory
%mkdir suno
local_dir=repo_id

#download suno/bark model file from Hugging Face
model_download_path = snapshot_download(repo_id=repo_id,local_dir=local_dir,ignore_patterns=["*.msgpack","*.h5"])
print(model_download_path)
!ls $local_dir

#upload model files to s3 bucket
!./s5cmd sync $local_dir/ s3://$bucket/$repo_id/
!aws s3 ls s3://$bucket/$repo_id/

prepare code, and deploy to sagemaker

In [None]:
#prepare training files
source_dir='code'

if os.path.exists(source_dir):
    shutil.rmtree(source_dir)
!mkdir $source_dir

In [None]:
%%writefile code/requirements.txt
transformers
s3fs
nvgpu
pynvml

In [None]:
%%writefile code/inference.py
import json
import os
import torch
import s3fs

from transformers import AutoProcessor, BarkModel


model = None
processor = None
device = "cuda:0" if torch.cuda.is_available() else "cpu"

cwd = os.getcwd()
print(f"cwd:{cwd}")
dir_list = os.listdir(cwd)
print("Files and directories in '", cwd, "' :")
# prints all files
print(dir_list)

model_local_path=f"/tmp/bark/"

def model_fn(model_dir):
    global processor
    """
    Deserialize and return fitted model.
    """
    print(f"model_dir: {model_dir}")
    
    fs = s3fs.S3FileSystem()
    model_s3 = os.environ.get("model_s3", "s3://sagemaker-us-east-1-845524701534/suno/bark-small/")

    print(f"need copy {model_s3} to {model_local_path}")
    os.makedirs(model_local_path)
    fs.get(model_s3,model_local_path, recursive=True)
    dir_list = os.listdir(model_local_path)
    print("Files and directories in '", model_local_path, "' :")
    print(dir_list)

    print("download completed")
    
    print("model_fn start")
    processor = AutoProcessor.from_pretrained(model_local_path,local_files_only=True)
    print("model_fn start - loaded AutoProcessor")
    model = BarkModel.from_pretrained(model_local_path,local_files_only=True,torch_dtype=torch.float16).to(device)
    print("model_fn start - loaded BarkModel")
    return model


def predict_fn(input_data, model):
    global processor
    print("predict_fn start")
    if input_data is None:
        input_data = {"voice_preset":"v2/en_speaker_6","text":"Hello, this is the default text"}

    if(model is None or processor is None):
        print("model is None or processor is None. Auto loading")
        processor = AutoProcessor.from_pretrained(model_local_path)
        model = BarkModel.from_pretrained(model_local_path).to(device)
        
    print("inputs start")
    inputs = processor(input_data["text"], voice_preset=input_data["voice_preset"]).to(device)
    
    print("model.generate start")
    audio_array = model.generate(**inputs)
    print("output start")
    output = audio_array.cpu().numpy().squeeze()
    return {"output":output}

def input_fn(request_body, request_content_type):
    print(f"input_fn start")
    input_data = json.loads(request_body)
    return input_data

In [None]:

print("local_dir=",local_dir)
#同步code目录下的文件到s3
!./s5cmd sync $local_dir/ s3://$bucket/$repo_id/
%cd
%cd SageMaker
!rm -rf dummy

In [None]:
# 创建空model.tar.gz文件上传到s3路径
framework_version = '2.1.0'
py_version = 'py310'

!touch dummy
!tar czvf model.tar.gz dummy
model_data = 's3://{0}/{1}/model.tar.gz'.format(bucket, 'suno/bark-small')
!aws s3 cp model.tar.gz $model_data



In [None]:
from sagemaker.pytorch.model import PyTorchModel
model_s3 = 's3://{0}/{1}/'.format(bucket,'suno/bark-small')
print("model_s3:",model_s3)
env = {
    'SAGEMAKER_MODEL_SERVER_TIMEOUT':'6000', 
    'SAGEMAKER_MODEL_SERVER_WORKERS': '2', 
    'MMS_MAX_RESPONSE_SIZE':'65535000',
    'TS_MAX_RESPONSE_SIZE':'65535000',
    'model_s3':model_s3,
}

pytorchModel = PyTorchModel(
    name = None,
    model_data = model_data,
    entry_point = 'inference.py',
    source_dir = "./code/",
    role = role,
    framework_version = framework_version, 
    py_version = py_version,
    env = env
)


In [None]:
%%time
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor = pytorchModel.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.2xlarge",
    serializer = JSONSerializer(),
    deserializer = JSONDeserializer(),
)

In [None]:
endpoint_name = predictor.endpoint_name
print(f"endpoint_name: {endpoint_name}") #copy this endpoint_name

In [None]:
from bark import SAMPLE_RATE, generate_audio, preload_models
from IPython.display import Audio

In [None]:
%%time
#test speaker 1
input_data = {"voice_preset":"v2/en_speaker_9","text":"Hello, this is the default text"}
predictions = predictor.predict(data = input_data)

In [None]:
print("type of output:",type(predictions["output"]))
Audio(predictions["output"], rate=SAMPLE_RATE)

In [None]:
%%time
# Create a custom configuration with the timeout
from botocore.config import Config
timeout = 300
config = Config(
    read_timeout=timeout,
    connect_timeout=timeout,
    region_name="us-east-1"
)
runtime = boto3.Session().client('sagemaker-runtime',config=config)
input_data = {"voice_preset":"v2/zh_speaker_3","text":"你还小看这件事！"}
payload = json.dumps(input_data).encode('utf-8')
response = runtime.invoke_endpoint(EndpointName=endpoint_name,
                                   ContentType='application/json',
                                   Body=payload)
print(response)
audio_data = response["Body"].read()
print("type:",type(audio_data))
# string_obj = audio_data.decode('utf-8')
json_obj = json.loads(audio_data)
print("type2",type(json_obj))
wav = json_obj["output"]
print("type3",type(wav))
Audio(wav, rate=SAMPLE_RATE)

In [None]:
import json

input_string = {["123","1232"],["111","222"]}
json_obj = json.loads(input_string)
first_object = json_obj[0]

print(first_object)

In [None]:
%%time
#test speaker 2
input_data = {"voice_preset":"v2/en_speaker_9","text":"Are you Crazy!!! What are you going to do!"}
predictions = predictor.predict(data = input_data)
Audio(predictions["output"], rate=SAMPLE_RATE)

In [None]:
%%time
#test speaker 2
input_data = {"voice_preset":"v2/en_speaker_9","text":"Are you Crazy!!! What are you going to do!"}
predictions = predictor.predict(data = input_data)
Audio(predictions["output"], rate=SAMPLE_RATE)

In [None]:
%%time
#test speaker 2
input_data = {"voice_preset":"v2/en_speaker_9","text":"Are you Crazy!!! What are you going to do!"}
predictions = predictor.predict(data = input_data)
Audio(predictions["output"], rate=SAMPLE_RATE)