USE THIS FOR MULTIPLE FILES 

In [None]:
import whisper
import ffmpeg
import os 
import subprocess
from pytube import YouTube
from whisper.utils import WriteSRT,get_writer
from alive_progress import alive_bar
#subtitle to video using moviepy
import sys
import pysrt
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip


def extract_audio(vid_file, aud_file):
# Convert the video to audio
    
    input  = ffmpeg.input(vid_file)
    output = input.output(aud_file, loglevel="quiet", y=None)
    output.run()
    print("find audio mp3 at  ", aud_file )

class YTD:
    def __init__(self,vid_path):
        self.save_path = vid_path
        pass

    def extract_audio(self, aud_file):
        input  = ffmpeg.input(self.save_path)
        output = input.output(aud_file, loglevel="quiet", y=None)
        output.run()
        # print("find audio mp3 at  ", aud_file )
        
    def __call__(self,url, resolution = '720'):

        try:  
            yt = YouTube(url,use_oauth=False,
        allow_oauth_cache=True) 
        except:  
            print("Connection Error.Could  not download!") #to handle exception  
        # filters out all the files with "mp4" extension  
        stream_list = yt.streams.filter(file_extension='mp4')
        stream = stream_list.get_by_resolution(f'{resolution}p')
        if stream != None:
            stream.download(filename = self.save_path)

class audio2text:
    def __init__(self, audio_path, model_type = None):
        self.result = None
        self.model = None
        if not model_type:
            self.model_type = 'base'
        else:
            self.model_type = model_type

        self.audio_path = audio_path
        self.load_model()
    def load_model(self):
        if not self.model:
            self.model = whisper.load_model(self.model_type)
            # print(self.model)
        else:
            print("found cached model: skipping")

    def get_model(self):
        if self.model:
            return self.model
        else:
            print("model N/A")
            # raise ("Model not defined")
    def transcribe(self):
        self.result = self.model.transcribe(audio=self.audio_path, language='en', word_timestamps=True, task="transcribe")
        
    def get_text_result(self):
        return self.result['text']
        
    def get_raw_result(self):
        return self.result

    def get_result_only_please(self):
        self.transcribe()
        return self.get_raw_result()

class SUBS:
    def __init__(self, result,  output_dir = "./"):
    
        self.word_options = {
        "highlight_words": False,
        "max_line_count": 1,
        "max_line_width": 24
    }
        self.output_dir = output_dir
        self.result = result
        self.fname = "SUBS."
    
    def get_vtt_writer(self):
        return get_writer(output_format='vtt', output_dir=self.output_dir)
        
    def get_srt_writer(self):
        return get_writer(output_format='srt', output_dir=self.output_dir)

    def create_srt(self, fname = None):
        srt_writer = self.get_srt_writer()
        if fname == None:
            fname = self.fname 
        srt_writer(self.result, os.path.join(self.output_dir,fname), self.word_options)
        # print(f"saved SRT file : {fname}")
        return fname
        
    def create_vtt(self, fname = None):
        vtt_writer = self.get_vtt_writer()
        if fname == None:
            fname = self.fname 
        vtt_writer(self.result, os.path.join(self.output_dir,fname), self.word_options)
        # print(f"saved VTT file : {fname}")
        return fname


class OVERLAY:
    def __init__(self, video_fname, srt_fname):
        self.srt_fname = srt_fname
        self.video_fname = video_fname
        begin,end = self.video_fname.split('.')
        self.output_video_file = begin+'_subtitled_class_.mp4'
        
    def __call__(self):
        self.video = VideoFileClip(self.video_fname)
        self.subtitles = pysrt.open(self.srt_fname)
        subtitle_clips = self.create_subtitle_clips(self.subtitles,self.video.size)
        final_video = CompositeVideoClip([self.video] + subtitle_clips)
        final_video.write_videofile(self.output_video_file, logger=None, )
        # print(f"Enjoy overlayed video at {self.output_video_file}")
        return self.output_video_file

    def time_to_seconds(self, time_obj):
        return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000

    def create_subtitle_clips(self,subtitles, videosize,fontsize=46, font='Arial', color='white', debug = False):
        subtitle_clips = []
        for subtitle in subtitles:
            start_time = self.time_to_seconds(subtitle.start)
            end_time = self.time_to_seconds(subtitle.end)
            duration = end_time - start_time
    
            video_width, video_height = videosize
            
            text_clip = TextClip(subtitle.text, fontsize=fontsize, font="Komika-Axis", color='white', bg_color = 'black',size=(video_width*2/4, None), method='caption').set_start(start_time).set_duration(duration)
            subtitle_x_position = 'center'
            subtitle_y_position = video_height* 3 / 5 
    
            text_position = (subtitle_x_position, subtitle_y_position)                    
            subtitle_clips.append(text_clip.set_position(text_position))
        return subtitle_clips



if __name__ == "__main__":
    embed_subs = False
    with alive_bar(5,force_tty=True) as bar:
        fname = "mkbhd"
        root = f"{fname}"
    
        if not os.path.exists(root):
            os.makedirs(root)
        
        n = 8

        links = [
            'https://www.youtube.com/shorts/VlIeANvowso',
            'https://www.youtube.com/shorts/pHhpXrIOwsY',
            'https://www.youtube.com/shorts/wpBu5ej7m8g',
            'https://www.youtube.com/shorts/FNnK1J-BdiM',
            'https://www.youtube.com/shorts/tciqW8VKfOU',
            'https://www.youtube.com/shorts/UXB9bnuyZb4',
            'https://www.youtube.com/shorts/DO1PJYAQj0c',
            'https://www.youtube.com/shorts/REVveXIMk7c',
            'https://www.youtube.com/shorts/K6gtEGnbR6I',
            'https://www.youtube.com/shorts/KG6Nu72Avqg',
            'https://www.youtube.com/shorts/4PZSB9bV1Zg',
            'https://www.youtube.com/shorts/NEZSa5NEREs',
            'https://www.youtube.com/shorts/a01edYZIfjc',
            'https://www.youtube.com/shorts/bi1XS6s1rBY',
            'https://www.youtube.com/shorts/2m_y5d68xMM',
        ]
        for n,url in enumerate(links):

            audio_fname = f"{fname}_{n+8}.wav"
            # fname, ext = os.path.splitext(audio_fname)
            video_fname = f"{fname}.mp4"
            # url = 'https://www.youtube.com/shorts/zsxviDibcUg'
            vid_path = os.path.join(root,video_fname)
            audio_path = os.path.join(root,'wav',audio_fname)
            bar()
            # download video from YT 
            yt = YTD(vid_path)
            yt(url,"720")
            #convert downloaded video to audio mp3
            yt.extract_audio(audio_path)
            bar()
            # sub_fname = f"{fname}.srt"
            # sub_path = os.path.join(root,sub_fname)
            # #whisper to transcribe audio_path mp3 to text!
            
            # converter = audio2text(audio_path,'small')
            # bar()
        
            # result = converter.get_result_only_please()
            # #make a srt/vtt from transcribed rawfile

            # subtitle = SUBS(result, output_dir = root)
            # subtitle.create_srt(sub_fname)
            # bar()

            # if embed_subs:
            #     sub_overlayer = OVERLAY(vid_path,sub_path)
            #     result_fname  = sub_overlayer()
            
            # print("Finished")
            # bar()
    



USE THIS FOR NORMAL SINGLE FILE PIPELINE

In [9]:
import whisper
import ffmpeg
import os 
import subprocess
from pytube import YouTube
from whisper.utils import WriteSRT,get_writer
from alive_progress import alive_bar
#subtitle to video using moviepy
import sys
import pysrt
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip


def extract_audio(vid_file, aud_file):
# Convert the video to audio
    
    input  = ffmpeg.input(vid_file)
    output = input.output(aud_file, loglevel="quiet", y=None)
    output.run()
    print("find audio mp3 at  ", aud_file )

class YTD:
    def __init__(self,vid_path):
        self.save_path = vid_path
        pass

    def extract_audio(self, aud_file):
        input  = ffmpeg.input(self.save_path)
        output = input.output(aud_file, loglevel="quiet", y=None)
        output.run()
        # print("find audio mp3 at  ", aud_file )
        
    def __call__(self,url, resolution = '720'):

        try:  
            yt = YouTube(url,use_oauth=False,
        allow_oauth_cache=True) 
        except:  
            print("Connection Error.Could  not download!") #to handle exception  
        # filters out all the files with "mp4" extension  
        stream_list = yt.streams.filter(file_extension='mp4')
        stream = stream_list.get_by_resolution(f'{resolution}p')
        if stream != None:
            stream.download(filename = self.save_path)

class audio2text:
    def __init__(self, audio_path, model_type = None):
        self.result = None
        self.model = None
        if not model_type:
            self.model_type = 'base'
        else:
            self.model_type = model_type

        self.audio_path = audio_path
        self.load_model()
    def load_model(self):
        if not self.model:
            self.model = whisper.load_model(self.model_type)
            # print(self.model)
        else:
            print("found cached model: skipping")

    def get_model(self):
        if self.model:
            return self.model
        else:
            print("model N/A")
            # raise ("Model not defined")
    def transcribe(self):
        self.result = self.model.transcribe(audio=self.audio_path, language='en', word_timestamps=True, task="transcribe")
        
    def get_text_result(self):
        return self.result['text']
        
    def get_raw_result(self):
        return self.result

    def get_result_only_please(self):
        self.transcribe()
        return self.get_raw_result()

class SUBS:
    def __init__(self, result,  output_dir = "./"):
    
        self.word_options = {
        "highlight_words": False,
        "max_line_count": 1,
        "max_line_width": 24
    }
        self.output_dir = output_dir
        self.result = result
        self.fname = "SUBS."
    
    def get_vtt_writer(self):
        return get_writer(output_format='vtt', output_dir=self.output_dir)
        
    def get_srt_writer(self):
        return get_writer(output_format='srt', output_dir=self.output_dir)

    def create_srt(self, fname = None):
        srt_writer = self.get_srt_writer()
        if fname == None:
            fname = self.fname 
        srt_writer(self.result, os.path.join(self.output_dir,fname), self.word_options)
        # print(f"saved SRT file : {fname}")
        return fname
        
    def create_vtt(self, fname = None):
        vtt_writer = self.get_vtt_writer()
        if fname == None:
            fname = self.fname 
        vtt_writer(self.result, os.path.join(self.output_dir,fname), self.word_options)
        # print(f"saved VTT file : {fname}")
        return fname


class OVERLAY:
    def __init__(self, video_fname, srt_fname):
        self.srt_fname = srt_fname
        self.video_fname = video_fname
        begin,end = self.video_fname.split('.')
        self.output_video_file = begin+'_subtitled_class_.mp4'
        
    def __call__(self):
        self.video = VideoFileClip(self.video_fname)
        self.subtitles = pysrt.open(self.srt_fname)
        subtitle_clips = self.create_subtitle_clips(self.subtitles,self.video.size)
        final_video = CompositeVideoClip([self.video] + subtitle_clips)
        final_video.write_videofile(self.output_video_file, logger=None, )
        # print(f"Enjoy overlayed video at {self.output_video_file}")
        return self.output_video_file

    def time_to_seconds(self, time_obj):
        return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000

    def create_subtitle_clips(self,subtitles, videosize,fontsize=46, font='Arial', color='white', debug = False):
        subtitle_clips = []
        for subtitle in subtitles:
            start_time = self.time_to_seconds(subtitle.start)
            end_time = self.time_to_seconds(subtitle.end)
            duration = end_time - start_time
    
            video_width, video_height = videosize
            
            text_clip = TextClip(subtitle.text, fontsize=fontsize, font="Komika-Axis", color='white', bg_color = 'black',size=(video_width*2/4, None), method='caption').set_start(start_time).set_duration(duration)
            subtitle_x_position = 'center'
            subtitle_y_position = video_height* 3 / 5 
    
            text_position = (subtitle_x_position, subtitle_y_position)                    
            subtitle_clips.append(text_clip.set_position(text_position))
        return subtitle_clips



if __name__ == "__main__":
    embed_subs = False
    get_subs = False

    n = 2 + embed_subs + get_subs

    with alive_bar(n,force_tty=True) as bar:
        fname = "obama"
        root = f"{fname}"
    
        if not os.path.exists(root):
            os.makedirs(root)
        
        url = "https://www.youtube.com/watch?v=YJzLC-AAWHw"
        audio_fname = f"{fname}.wav"
        # fname, ext = os.path.splitext(audio_fname)
        video_fname = f"{fname}.mp4"
        # url = 'https://www.youtube.com/shorts/zsxviDibcUg'
        vid_path = os.path.join(root,video_fname)
        audio_path = os.path.join(root,audio_fname)
        bar()
        # download video from YT 
        yt = YTD(vid_path)
        yt(url,"720")
        #convert downloaded video to audio mp3
        yt.extract_audio(audio_path)
        bar()

        if get_subs:
            sub_fname = f"{fname}.srt"
            sub_path = os.path.join(root,sub_fname)
            #whisper to transcribe audio_path mp3 to text!
            
            converter = audio2text(audio_path,'small')
            bar()
        
            result = converter.get_result_only_please()
        #make a srt/vtt from transcribed rawfile

            subtitle = SUBS(result, output_dir = root)
            subtitle.create_srt(sub_fname)
            bar()

        if embed_subs:
            sub_overlayer = OVERLAY(vid_path,sub_path)
            result_fname  = sub_overlayer()
        
            bar()
        print("Finished")




on 2: Finished                                                                  
|████████████████████████████████████████| 2/2 [100%] in 30.4s (0.04/s)         


In [None]:
import google.generativeai as palm
import os
os.environ['PALM_API_KEY'] = "AIzaSyBESQERxKGK4PFV4lOrCYYYiGq1KTOk5uk"
palm.configure(api_key=os.environ['PALM_API_KEY'])
p = ""

response = palm.generate_text(prompt=p)
print(response.result) #  'cold.'