# Import Packages

In [None]:
import os
import re
import requests

from openai import OpenAI
from pytube import YouTube
from pydub import AudioSegment

from moviepy.editor import *
from moviepy.video.io.VideoFileClip import VideoFileClip
import moviepy.video.fx.crop as crop_vid

# Set Up Enviroment Variables

In [None]:
def check_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"{path} created successfully")

In [None]:
client = OpenAI()
VIDEO_PATH = "./video/"
AUDIO_PATH = "./audio/"
SERMON_PATH = "./sermon/"
SOCIAL_POST_PATH = "./socialpost/"
SUBTITLE_PATH = "./subtitle/"
SHORTS_PATH = "./shorts/"
IMAGE_PATH = "./image/"

check_directory(VIDEO_PATH)
check_directory(AUDIO_PATH)
check_directory(SERMON_PATH)
check_directory(SOCIAL_POST_PATH)
check_directory(SUBTITLE_PATH)
check_directory(SHORTS_PATH)
check_directory(IMAGE_PATH)

# Function Definition

In [None]:
def download_video(video_url):
    yt = YouTube(video_url)
    yt = yt.streams.get_highest_resolution()
    
    try:
        print("Downloading...")
        yt.download(output_path=VIDEO_PATH)
    except:
        print("Error has occured video can not be downloaded")
    print(f"Download is completed successfully for {video_url}")

In [None]:
def extract_audio():
    video_file = os.listdir(VIDEO_PATH)
    clip = VideoFileClip(VIDEO_PATH + video_file[0])
    audio = clip.audio
    audio.write_audiofile(AUDIO_PATH + video_file[0].replace("mp4", "mp3"))

In [None]:
def compress_audio():
    audio_file = os.listdir(AUDIO_PATH)
    file_size = os.path.getsize(AUDIO_PATH + audio_file[0])
    if file_size <= 25000000:
        return audio_file[0]
    
    audio = AudioSegment.from_file(AUDIO_PATH + audio_file[0])

    # Set output parameters
    channels = 1  # mono
    frame_rate = 16000  # sample rate
    bit_rate = "32k"  # 位元率

    # Audio transcode
    output_audio = audio.set_channels(channels).set_frame_rate(frame_rate)

    # Save output audio
    output_file = AUDIO_PATH + "compressed_" + audio_file[0]
    output_audio.export(output_file, format="mp3", bitrate=bit_rate)


In [None]:
def get_transcript():
    for filename in sorted(os.listdir(AUDIO_PATH)):
        if filename.startswith("compressed_"):
            audio_file = os.path.join(AUDIO_PATH, filename)
            audio_file = open(audio_file, "rb")

            transcript = client.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file, 
                language="zh",
                response_format="srt"
            )
            transcript = transcript.replace(" ", "，")
        elif filename.endswith(".mp3"):
            audio_file = os.path.join(AUDIO_PATH, filename)
            audio_file = open(audio_file, "rb")

            transcript = client.audio.transcriptions.create(
                model="whisper-1", 
                file=audio_file, 
                language="zh",
                response_format="srt"
            )
            transcript = transcript.replace(" ", "，")
    
    return transcript

In [None]:
def save_file(text, path, filename):
    with open(path + filename, "w") as file:
        file.write(text)

In [None]:
def extract_srt_text(srt_string):
    pattern = r'\d+\n(\d+:\d+:\d+,\d+) --> (\d+:\d+:\d+,\d+)\n(.+?)(?=\n\d+|$)'
    subtitles = re.findall(pattern, srt_string, re.DOTALL)

    dialogue_text = [subtitle[2].replace('\n', '，') for subtitle in subtitles]
    dialogue_text = [''.join(dialogue_text)]
    
    return dialogue_text

In [None]:
def extract_text_from_srt(srt_string):
    pattern = re.compile(r'\d+\n\d{2}:\d{2}:\d{2},\d{3}，-->，\d{2}:\d{2}:\d{2},\d{3}\n(.*?)\n\n', re.DOTALL)
    matches = pattern.findall(srt_string)

    cleaned_matches = [match.strip() for match in matches]

    result = ' '.join(cleaned_matches)

    return result

In [None]:
def generate_sermon(text):
    response = client.chat.completions.create(
        model = "gpt-4-1106-preview",
        messages = [
        {"role": "system", "content": "你是一位牧師，下面將提供逐字稿，請加上適當的標點符號，整理成有結構且易讀的內容，並且為產出的內容適當的命名，同時為每一段下一個小標題。"},
        {"role": "user", "content": text}
        ]
    )

    return response.choices[0].message.content

In [None]:
def generate_social_post(text, style):
    response = client.chat.completions.create(
        model = "gpt-4-1106-preview",
        messages = [
        {"role": "system", "content": f"你是一個厲害的社群媒體經營者，下面將提供牧師的講章，請產生五篇社群貼文，並加上主題標籤。風格：{style}"},
        {"role": "user", "content": text}
        ]
    )

    return response.choices[0].message.content

In [None]:
def generate_clip(text):
    response = client.chat.completions.create(
        model = "gpt-4-1106-preview",
        messages = [
        {"role": "system", "content": "你是一個厲害的短影音剪輯師，下面將提供一份字幕檔，請根據字幕檔的內容，給予五個你認為可以剪輯成長度為60秒的短影音段落。時間段落只是請依照SRT字幕檔的格式：時間 --> 時間"},
        {"role": "user", "content": text}
        ]
    )
    clips = response.choices[0].message.content

    matches = re.findall(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', clips)
    if not matches:
        print("Clips not found, trying again")
        return generate_clip(text)

    return matches

In [None]:
def generate_shorts(matches):
    videos = []
    for match in matches:
        start_time = match[0:12]
        end_time = match[17:]
        
        video_file = os.listdir(VIDEO_PATH)
        video_file = os.path.join(VIDEO_PATH, video_file[0])
        video = VideoFileClip(video_file).subclip((start_time),(end_time))
        w, h = video.size
        target_ratio = 1080 / 1920
        current_ratio = w / h

        if current_ratio > target_ratio:
            # The video is wider than the desired aspect ratio, crop the width
            new_width = int(h * target_ratio)
            x_center = w / 2
            y_center = h / 2
            video = crop_vid.crop(video, width=new_width, height=h, x_center=x_center, y_center=y_center)
        else:
            # The video is taller than the desired aspect ratio, crop the height
            new_height = int(w / target_ratio)
            x_center = w / 2
            y_center = h / 2
            video = crop_vid.crop(video, width=w, height=new_height, x_center=x_center, y_center=y_center)

        videos.append(video)

    file_num = 1
    for video in videos:
        video.write_videofile(
            SHORTS_PATH + f"shorts_{file_num}.mp4", 
            codec='mpeg4', 
            audio_codec='aac', 
            temp_audiofile='temp-audio.m4a', 
            remove_temp=True
        )
        print(f"Saved file out_video_{file_num}")
        file_num += 1

In [None]:
def generate_social_post_image(prompt, style):
    response = client.images.generate(
        model="dall-e-3",
        prompt=f"請針對以下內容，設計適合的社群貼文圖，但不要出現文字。\n 風格：{style}。\n" + prompt,
        size="1024x1024",
        quality="standard",
        n=1
    )
    image_url = response.data[0].url
    print(image_url)

    image = requests.get(image_url)
    if image.status_code == 200:
        with open(IMAGE_PATH + "image.png", "wb") as f:
            f.write(image.content)
        print("Image downloaded successfully.")
    else:
        print("Failed to download image. Status code:", image.status_code)

# Main

In [None]:
download_video(input("Please Enter The YouTube URL: "))
extract_audio()

In [None]:
transcript = get_transcript()
save_file(transcript, SUBTITLE_PATH, "subtitle.srt")

In [None]:
srt_text = extract_text_from_srt(transcript)

In [None]:
sermon = generate_sermon(srt_text)
save_file(sermon, SERMON_PATH, "sermon.txt")

In [None]:
social_post_style = input("請輸入您想生成的社群貼文風格。例如：有趣、幽默等")
social_post = generate_social_post(sermon, social_post_style)
save_file(social_post, SOCIAL_POST_PATH, "social_post.txt")

In [None]:
times = generate_clip(transcript)

In [None]:
generate_shorts(times)

In [None]:
image_prompt = input("請輸入您想生成的圖片內容。")
image_style = input("請輸入您想要生成的圖片風格。例如：簡約、卡通等")
generate_social_post_image(image_prompt, image_style)