In [None]:
!pip install streamlit
!pip install pyngrok




In [None]:
# %%writefile app.py
# import streamlit as st

# # Mock functions for demonstration (replace with actual implementations)
# def translate_to_urdu(text):
#     return "یہ اردو میں ترجمہ شدہ متن ہے"  # Example translation

# def summarize_text(text):
#     return "یہ اردو متن کا خلاصہ ہے"  # Example summary

# # Frontend
# st.title("Urdu News Translator and Summarizer")
# input_text = st.text_area("Enter news text (English):")
# if st.button("Translate and Summarize"):
#     # Backend
#     if input_text:
#         translated_text = translate_to_urdu(input_text)
#         summary = summarize_text(translated_text)

#         # Display results
#         st.subheader("Translated Text (Urdu):")
#         st.write(translated_text)
#         st.subheader("Summary (Urdu):")
#         st.write(summary)
#     else:
#         st.error("Please enter some text to process!")


In [None]:
!pip install youtube-transcript-api
!pip show youtube-transcript-api
!pip install googletrans==4.0.0-rc1


In [None]:
%%writefile app.py
import streamlit as st
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from googletrans import Translator
from transformers import pipeline

import re
import os

# Load Hugging Face summarization pipeline
pipe = pipeline("text2text-generation", model="mrm8488/t5-base-finetuned-summarize-news")

# Function to extract YouTube video ID
def get_video_id(youtube_url):
    pattern = r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})'
    match = re.search(pattern, youtube_url)
    return match.group(1) if match else None

# Function to download Hindi transcript
def download_transcript(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = transcript_list.find_generated_transcript(['hi'])
        formatter = TextFormatter()
        transcript_text = formatter.format_transcript(transcript.fetch())
        return transcript_text
    except Exception as e:
        st.error(f"Error downloading transcript: {e}")
        return ""

# Function to translate Hindi transcript to English
def translate_to_english(hindi_text):
    try:
        translator = Translator()
        chunks = [hindi_text[i:i+500] for i in range(0, len(hindi_text), 500)]  # Split into smaller chunks
        translated_chunks = [translator.translate(chunk, src='hi', dest='en').text for chunk in chunks]
        return ' '.join(translated_chunks)
    except Exception as e:
        st.error(f"Error translating transcript: {e}")
        return ""

# Function to summarize the English text
def summarize_text(text, max_length=150, min_length=40):
    try:
        summary = pipe(text, max_length=max_length, min_length=min_length, do_sample=False)
        return summary[0]['generated_text']
    except Exception as e:
        st.error(f"Error during summarization: {e}")
        return ""

# Streamlit App
st.title("YouTube Urdu News Translator and Summarizer")

# User Input
youtube_url = st.text_input("Enter YouTube Video URL:")
if youtube_url:
    video_id = get_video_id(youtube_url)
    if video_id:
        with st.spinner("Fetching subtitles..."):
            hindi_transcript = download_transcript(video_id)
        if hindi_transcript:
            st.success("Subtitles fetched successfully!")
            st.text_area("Subtitles:", hindi_transcript, height=300)

            with st.spinner("Translating to English..."):
                english_transcript = translate_to_english(hindi_transcript)
            if english_transcript:
                st.success("Translation to English completed!")
                st.text_area("English Transcript:", english_transcript, height=300)

                with st.spinner("Summarizing transcript..."):
                    summarized_text = summarize_text(english_transcript)
                if summarized_text:
                    st.success("Summarization completed!")
                    st.text_area("Summary:", summarized_text, height=200)
    else:
        st.error("Invalid YouTube URL. Please check and try again.")


In [None]:
# %%writefile app.py
# import streamlit as st
# from youtube_transcript_api import YouTubeTranscriptApi
# from youtube_transcript_api.formatters import TextFormatter
# from googletrans import Translator
# import re
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# # Load summarization pipeline
# pipe = pipeline("text2text-generation", model="mrm8488/t5-base-finetuned-summarize-news")

# # Helper Functions
# def get_video_id(youtube_url):
#     """Extract the video ID from a YouTube URL."""
#     pattern = r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})'
#     match = re.search(pattern, youtube_url)
#     return match.group(1) if match else None

# def get_video_title(video_id):
#     """Get the title of the YouTube video."""
#     url = f"https://www.youtube.com/watch?v={video_id}"
#     try:
#         response = requests.get(url)
#         response.raise_for_status()
#         matches = re.findall(r'<title>(.*?)</title>', response.text)
#         return matches[0].replace(" - YouTube", "") if matches else "Unknown"
#     except requests.RequestException as e:
#         return "Unknown"

# def download_transcript(video_id):
#     """Download the Hindi transcript and return as a string."""
#     try:
#         transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
#         transcript = transcript_list.find_generated_transcript(['hi'])
#         formatter = TextFormatter()
#         transcript_text = formatter.format_transcript(transcript.fetch())
#         return transcript_text
#     except Exception as e:
#         return ""

# def translate_to_english(hindi_text):
#     """Translate Hindi text to English using Google Translate."""
#     translator = Translator()
#     chunks = [hindi_text[i:i+500] for i in range(0, len(hindi_text), 500)]
#     translated_chunks = []
#     for chunk in chunks:
#         try:
#             translated_chunks.append(translator.translate(chunk, src='hi', dest='en').text)
#         except Exception as e:
#             translated_chunks.append("")  # Add empty string for failed chunks
#     return ' '.join(translated_chunks)

# def summarize_text(text, max_length=150, min_length=40):
#     """Summarize the given text."""
#     try:
#         summary = pipe(text, max_length=max_length, min_length=min_length, do_sample=False)
#         return summary[0]['generated_text']
#     except Exception as e:
#         return "Error during summarization."

# # Streamlit Frontend
# def main():
#     st.title("YouTube Hindi Transcript Translator & Summarizer")
#     st.markdown("""
#         This app extracts the Hindi transcript from a YouTube video, translates it into English,
#         and provides a summarized version.
#     """)

#     youtube_url = st.text_input("Enter YouTube Video URL:")
#     if youtube_url:
#         video_id = get_video_id(youtube_url)

#         if not video_id:
#             st.error("Invalid YouTube URL. Please enter a valid link.")
#             return

#         # Fetch Hindi transcript
#         st.write("Fetching Hindi transcript...")
#         hindi_transcript = download_transcript(video_id)
#         if not hindi_transcript:
#             st.error("Unable to download Hindi transcript.")
#             return

#         st.write("Hindi Transcript:")
#         st.text_area("Hindi Transcript:", hindi_transcript, height=200)

#         # Translate to English
#         st.write("Translating transcript to English...")
#         english_transcript = translate_to_english(hindi_transcript)
#         if not english_transcript:
#             st.error("Unable to translate Hindi transcript to English.")
#             return

#         st.write("English Transcript:")
#         st.text_area("English Transcript:", english_transcript, height=200)

#         # Summarize transcript
#         st.write("Summarizing transcript...")
#         summarized_text = summarize_text(english_transcript)
#         if summarized_text == "Error during summarization.":
#             st.error("Failed to summarize the transcript.")
#             return

#         st.write("Summary:")
#         st.text_area("Summary:", summarized_text, height=200)

# # Run Streamlit App
# if __name__ == "__main__":
#     main()


In [None]:
!ngrok config add-authtoken 2pvz1CKzjHKNf0iZmcT9DncCcz2_7CZ74jqQtHRMif68KBbaS


In [None]:
!ngrok config check

In [None]:
!pip install pyngrok # Install the pyngrok library

from pyngrok import ngrok # Now you can import and use pyngrok
# Start Streamlit in the background
!streamlit run app.py &>/dev/null&

# Create an Ngrok tunnel, explicitly specifying HTTP
public_url = ngrok.connect(proto="http", addr=8501)
print(f"Streamlit app is live at: {public_url}")
