In [2]:
import whisper
import torch
from deep_translator import GoogleTranslator
from pytube import YouTube
import ffmpeg
import pandas as pd
import os
import moviepy.editor as mp
from moviepy.editor import *
import cv2
from moviepy.video.tools.subtitles import SubtitlesClip
from moviepy.config import change_settings
change_settings({"IMAGEMAGICK_BINARY": r"C:\Program Files\ImageMagick-7.1.1-Q16-HDRI\magick.exe"})
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model("medium",download_root=r"D:\WHISPER MODELS")

In [3]:
class Transcriptor:
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    def __init__(self,audio):
        # load audio and pad/trim it to fit 30 seconds
        self.audio=whisper.pad_or_trim(audio)
        # make log-Mel spectrogram and move to the same device as the model
        self.mel=whisper.log_mel_spectrogram(self.audio).to(DEVICE)
    def detect_language(self):
        # detect the spoken language
        mel=self.mel
        _, probs = model.detect_language(mel)
        print(f"Detected language: {max(probs, key=probs.get)}")
        return(max(probs, key=probs.get))
    def transcribe(self):
        # decode the audio and print the recognized text
        mel=self.mel
        self.detect_language()
        options = whisper.DecodingOptions(fp16=False)
        result = whisper.decode(model,mel, options)
        print(result.text)

In [4]:
class Custom_Translator:
    def __init__(self):
        self.name=input("Name of the project: ")
        self.uploaded_vid=input("Path of the video file you want to translate: ")
        self.input_file=input("Name of the video file along with the extension: ")
        self.audio_file=input("Name of the audio file: ")
        self.output_name=input("Name of the captioned video: ")
        path=f"D:\captiontesting\{self.name}"
        if not os.path.exists(path):
            os.makedirs(path)
            print(f"{self.name} created.")
        my_clip = mp.VideoFileClip(self.uploaded_vid)
        my_clip.write_videofile(f'D:/captiontesting/{self.name}/{self.input_file}')
        audio_file_path=f"D:/captiontesting/{self.name}/{self.audio_file}"
        my_clip.audio.write_audiofile(audio_file_path)
        print("Starting Transcription")
        self.result=model.transcribe(audio_file_path,task='translate',fp16=False)
        self.untranslated=self.result['text']
        self.language=""
        self.translated=""
        print("Transcription done successfully!")
    def find_language(self):
        temp=whisper.load_audio(f"D:/captiontesting/{self.name}/{self.audio_file}")
        test1=Transcriptor(temp)
        self.language=test1.detect_language()
    def convert_to_tamil(self):
        if self.language=="ta":
            print("Already in Tamil")
        else:
            self.language="ta"
            self.translated=GoogleTranslator(source="en", target="ta").translate(self.untranslated)
    def caption(self):
        name=self.name
        input_file=self.input_file
        dict1 = {'start':[], 'end':[], 'text':[]}
        for i in self.result['segments']:
            dict1['start'].append(int(i['start']))
            dict1['end'].append(int(i['end']))
            dict1['text'].append(i['text'])
        df = pd.DataFrame.from_dict(dict1)
        vidcap = cv2.VideoCapture(f'D:/captiontesting/{self.name}/{self.input_file}')
        success,image = vidcap.read()
        height = image.shape[0]
        width =image.shape[1]
        generator = lambda txt: TextClip(txt, font='P052-Bold', fontsize=width/50, stroke_width=.7, color='white', stroke_color = 'black', size = (width, height*.25), method='caption')
        subs = tuple(zip(tuple(zip(df['start'].values, df['end'].values)), df['text'].values))
        subtitles = SubtitlesClip(subs, generator)
        video = VideoFileClip(self.uploaded_vid)
        final = CompositeVideoClip([video, subtitles.set_pos(('center','bottom'))])
        final.write_videofile(f'D:/captiontesting/{self.name}/{self.output_name}', fps=video.fps, remove_temp=True, codec="libx264", audio_codec="aac")
        print("File captioned successfully")

In [7]:
ct2=Custom_Translator()


Name of the project: epics
Path of the video file you want to translate: C:\Users\Suresh\Desktop\EPICS TESTING\out.mp4
Name of the video file along with the extension: russia_ukraine.mp4
Name of the audio file: russia_ukraine.mp3
Name of the captioned video: captioned_russia_ukraine.mp4
epics created.
Moviepy - Building video D:/captiontesting/epics/russia_ukraine.mp4.
MoviePy - Writing audio in russia_ukraineTEMP_MPY_wvf_snd.mp3


                                                                                                                                      

MoviePy - Done.
Moviepy - Writing video D:/captiontesting/epics/russia_ukraine.mp4



                                                                                                                                      

Moviepy - Done !
Moviepy - video ready D:/captiontesting/epics/russia_ukraine.mp4
MoviePy - Writing audio in D:/captiontesting/epics/russia_ukraine.mp3


                                                                                                                                      

MoviePy - Done.
Starting Transcription
Transcription done successfully!


In [8]:
ct2.caption()

Moviepy - Building video D:/captiontesting/epics/captioned_russia_ukraine.mp4.
MoviePy - Writing audio in captioned_russia_ukraineTEMP_MPY_wvf_snd.mp4


                                                                                                                                      

MoviePy - Done.
Moviepy - Writing video D:/captiontesting/epics/captioned_russia_ukraine.mp4



                                                                                                                                      

Moviepy - Done !
Moviepy - video ready D:/captiontesting/epics/captioned_russia_ukraine.mp4
File captioned successfully
