In [1]:
#for summarization
from youtube_transcript_api import YouTubeTranscriptApi
import re
from transformers import pipeline

def extract_video_id(url):
    # Extract video ID from the YouTube URL
    match = re.search(r"(?:v=|\/)([a-zA-Z0-9_-]{11}).*", url)
    if match:
        return match.group(1)
    else:
        return None

def get_auto_captions(youtube_url):
    video_id = extract_video_id(youtube_url)
    transcript_text = ""
    
    if video_id:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcript_text = " ".join([entry['text'] for entry in transcript])
            transcript_length = len(transcript_text)
            return transcript_text, transcript_length
        except Exception as e:
            print(f"Error fetching auto-generated captions: {e}")
            return None, None
    else:
        print("Invalid YouTube URL. Please provide a valid video URL.")
        return None, None



In [2]:
summarizer = pipeline('summarization',max_length=56)

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.





In [3]:
def summarize(youtube_url, summarizer, max_length=56):
    # Call get_auto_captions to retrieve transcript text and length
    transcript, transcript_length = get_auto_captions(youtube_url)
    
    if transcript:
        num_iters = int(transcript_length / 1000)
        summarized_text = []

        for i in range(0, num_iters + 1):
            start = i * 1000
            end = (i + 1) * 1000
            input_text = transcript[start:end]
            #print("Input text:\n", input_text)

            out = summarizer(input_text, max_length=max_length)
            out = out[0]
            out = out['summary_text']

            #print("Summarized text:\n", out)
            summarized_text.append(out)
        
        return str(summarized_text)
    else:
        print("Failed to retrieve transcript from the YouTube video.")
        return None




In [4]:
from transformers import pipeline
import warnings


In [5]:
# Suppress warnings
warnings.filterwarnings("ignore")

In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline


In [7]:
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

In [8]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="eng_Latn", tgt_lang='tam_Taml', max_length = 400)

In [9]:
def translate(text, tgt_lang):
    translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang="eng_Latn", tgt_lang=tgt_lang)
    translated_text = translator(text[:1024])[0]['translation_text']
    return translated_text

In [10]:
def summarize_video(youtube_url):
    # Define the summarizer
    summarizer = pipeline('summarization', max_length=56)
    
    # Call summarize_video function to retrieve transcript and summarize
    summarized_text = summarize(youtube_url, summarizer)
    
    if summarized_text:
        return str(summarized_text)
    else:
        return "Failed to retrieve or summarize the video captions."


In [11]:
import gradio as gr

In [16]:
with gr.Blocks() as demo:
    with gr.Row():
        heading = gr.HTML("<h1 style='text-align:center; margin-top: 50px; margin-bottom: 20px; font-family:Bebas Neue; font-size: 40px; color:#126180;'>LANGUAGE SHUFFLE</h1>" + "<br>")
        with gr.Column():
            youtube_url = gr.components.Textbox(lines=1, placeholder='YouTube video URL')
            submit_btn = gr.components.Button("Submit")
            output = gr.components.Textbox(lines=2, label='Captions')

            submit_btn.click(fn=summarize_video, inputs=youtube_url, outputs=output)

        with gr.Column():
            tgt_lang = gr.components.Dropdown(choices=[('hindi', 'hin_Deva'), ('telugu', 'tel_Telu'), ('kanada', 'kan_Knda'), ('tamil', 'tam_Taml'), ('malayalam', 'mal_Mlym')], label='Target Language', visible=True)
            trans= gr.components.Textbox(lines=2, label='Translated Text')
           
            translate_btn = gr.Button('Translate')

            translate_btn.click(translate, inputs=[output, tgt_lang], outputs=trans)


IMPORTANT: You are using gradio version 4.21.0, however version 4.29.0 is available, please upgrade.
--------


In [18]:
iface = demo.launch(share=True, inbrowser=True)

Running on local URL:  http://127.0.0.1:7881

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.
