In [5]:
from botocore.exceptions import BotoCoreError, ClientError
from contextlib import closing
from tempfile import gettempdir
import boto3, requests, base64, os, fnmatch, sys, subprocess

In [20]:
class TextToVideo:
    def __init__(self, bucket_name, webui_url, voice_id):
        self.bucket_name = bucket_name
        self.webui_url = webui_url
        self.voice_id = voice_id
        self.s3 = boto3.client("s3")
        self.polly = boto3.client("polly")
    
    def download(self, file_name, output_path):
        try:
            self.s3.download_file(self.bucket_name, file_name, output_path)
        except (BotoCoreError, ClientError) as error:
            print(error)
            sys.exit(-1)
        print(f"Download to {output_path}")
    
    def download_to_tmp(self, file_name, output_file_name):
        output_path = os.path.join(gettempdir(), output_file_name)
        try:
            self.s3.download_file(self.bucket_name, file_name, output_path)
            return output_path
        except (BotoCoreError, ClientError) as error:
            print(error)
            sys.exit(-1)
        print(f"Download to {output_path}")
    
    def tts(self, txt, output_file_name):
        try:
            response = self.polly.synthesize_speech(Text=txt, OutputFormat="mp3", VoiceId=self.voice_id)
        except (BotoCoreError, ClientError) as error:
            print(error)
            sys.exit(-1)
        if "AudioStream" in response:
            with closing(response["AudioStream"]) as stream:
                output = os.path.join(gettempdir(), output_file_name)
                try:
                    with open(output, "wb") as file:
                        file.write(stream.read())
                        self.s3.upload_file(output, self.bucket_name, output_file_name)
                        s3_output_link = f"s3://{self.bucket_name}/{output_file_name}"
                        print(f"Written to {s3_output_link}")
                        return output_file_name
                except IOError as error:
                    print(error)
                    sys.exit(-1)

        else:
            print("could not stream audio")
            sys.exit(-1)

    def gen_img(self, prompt, neg_prompt, output_file_name):
        # webui example: https://346f3750a6a0bc47f8.gradio.live/
        url = self.webui_url + 'sdapi/v1/txt2img'
        data = '{"prompt":"' + prompt + '", "negative_prompt":"' + neg_prompt + '", "width":256, "height":256}'
        headers = {'Content-Type': 'application/json'}
        response = requests.post(url, headers=headers, data=data)
        img_64 = response.json()['images'][0]
        img_data = base64.b64decode(img_64)
        output = os.path.join(gettempdir(), output_file_name)
        try:
            with open(output, "wb") as file:
                file.write(img_data)
                self.s3.upload_file(output, self.bucket_name, output_file_name)
                s3_output_link = f"s3://{self.bucket_name}/{output_file_name}"
                print(f"Written to {s3_output_link}")
                return output_file_name
        except IOError as error:
            print(error)
            sys.exit(-1)
            
    def find(self, pattern, path):
        result = []
        for root, dirs, files in os.walk(path):
            for name in files:
                if fnmatch.fnmatch(name, pattern):
                    result.append(os.path.join(root, name))
        return result

    def gen_sad_talker(self, audio_path, img_path, output_file_name):
        # 绝对路径
        output_dir = os.path.join(gettempdir(), "result/")
        os.chdir('/root/stable-diffusion-webui/extensions/SadTalker')
        ret = os.system('python inference.py --enhancer gfpgan --driven_audio ' + audio_path + ' --source_image ' + img_path + ' --result_dir ' + output_dir)
        if ret == 0:
            mp4_list = self.find('*.mp4', output_dir)
            if len(mp4_list) == 0:
                print('output mp4 file not found')
                sys.exit(-1)
            else:
                output = mp4_list[0]
                print(output)
                self.s3.upload_file(output, self.bucket_name, output_file_name)
                s3_output_link = f"s3://{self.bucket_name}/{output_file_name}"
                print(f"Written to {s3_output_link}")
                return output_file_name
        else:
            print('sadtalker error')
            sys.exit(-1)
    
    def ttv(self, prompt, neg_prompt, transcript):
        self.clear_tmp()
        audio_s3 = self.tts(transcript, "audio.mp3")
        audio_local = self.download_to_tmp(audio_s3, "audio.mp3")
        img_s3 = self.gen_img(prompt, neg_prompt, "img.png")
        img_local = self.download_to_tmp(img_s3, "img.png")
        result_s3 = self.gen_sad_talker(audio_local, img_local, "result.mp4")
        return result_s3
    
    def clear_tmp(self):
        try:
            for root, dirs, files in os.walk(gettempdir()):
                for file in files:
                    file_path = os.path.join(root, file)
                    os.remove(file_path)
            print('/tmp cleared')
        except OSError:
            print('error clearing tmp')
            sys.exit(-1)
        

In [7]:
ttv = TextToVideo("aigc-bj-team1", "http://127.0.0.1:7860/", "Joanna")

In [117]:
!ls /tmp

In [19]:
ttv.download("result.mp4", "/root/Polly/result_1.mp4")
ttv.download("audio.mp3", "/root/Polly/audio_1.mp3")
ttv.download("img.png", "/root/Polly/img_1.png")

Download to /root/Polly/result_1.mp4
Download to /root/Polly/audio_1.mp3
Download to /root/Polly/img_1.png


In [39]:
polly.gen_img("A dog", "A cat", "test.png")

Written to s3://aigc-bj-team1/test.png


's3://aigc-bj-team1/test.png'

In [None]:
prompt = 'full frontal face, complete facial view, no obstructions, close-up portrait, slender German noblewoman, blonde hair, blue eyes, aura of elegance, timeless grace, best quality, ultra detailed, perfect lighting, masterpiece, extremely detailed face, detailed eyes, 8K High definition, Ultra Detailed, High quality texture, intricate details, detailed texture, finely detailed, high detail, extremely detailed cg, High quality shadow, Depth of field, Ray tracing, beautiful, ultra high res'
neg_prompt = 'partial face, side view, cropped face, profile view, full body, distant, NSFW, cartoon, lowres, bad anatomy, text, error, missing facial features, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name, young, loli, elf, 3d, illustration ng_deepnegative_v1_75t, low quality face, low quality eyes, low quality body, low detail clothes'
ttv.ttv(prompt, neg_prompt, 'Hello, world!')

/tmp cleared
Written to s3://aigc-bj-team1/audio.mp3
Written to s3://aigc-bj-team1/img.png
using safetensor as default
3DMM Extraction for source image


landmark Det:: 100%|██████████| 1/1 [00:00<00:00, 10.81it/s]
3DMM Extraction In Video:: 100%|██████████| 1/1 [00:00<00:00, 11.53it/s]
mel:: 100%|██████████| 31/31 [00:00<00:00, 31582.08it/s]
audio2exp:: 100%|██████████| 4/4 [00:00<00:00, 36.43it/s]
Face Renderer:: 100%|██████████| 16/16 [00:08<00:00,  1.98it/s]


In [92]:
shutil.rmtree(gettempdir())

In [103]:
!mkdir /tmp

In [14]:
ttv.tts('hello world', '1.mp3')

Written to s3://aigc-bj-team1/1.mp3


'1.mp3'

In [15]:
ttv.download("1.mp3", "/root/Polly/1.mp3")

Download to /root/Polly/1.mp3


In [12]:
from IPython.display import Audio

# Replace 'path_to_your_mp3.mp3' with the path to your mp3 file
audio_file = '1.mp3'

Audio(audio_file)

ValueError: rate must be specified when data is a numpy array or list of audio samples.