In [1]:
!pip install -q streamlit youtube-transcript-api langchain langchain-huggingface langchain-community faiss-cpu sentence-transformers pyngrok

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.0/485.0 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [5]:
# --- Write your Streamlit app to a file ---
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace, HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from google.colab import userdata

st.set_page_config(page_title="YouTube Video Q&A", page_icon="📺", layout="wide")

if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
    try:
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HF_TOKEN")
    except Exception as e:
        st.error(f"Hugging Face API token not found! Error: {e}", icon="🔒")
        st.stop()

@st.cache_data
def get_transcript(video_url):
    try:
        video_id = video_url.split("v=")[1].split("&")[0]
        ytt_api = YouTubeTranscriptApi()
        transcript_list = ytt_api.list(video_id=video_id)
        target_transcript = transcript_list.find_transcript(['en'])
        transcript_data = target_transcript.fetch()
        return " ".join(chunk.text for chunk in transcript_data)
    except TranscriptsDisabled:
        st.error("Transcripts are disabled for this video.")
        return None
    except Exception as e:
        st.error(f"Could not retrieve transcript. Error: {e}")
        return None

@st.cache_resource
def create_rag_chain(_transcript):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.create_documents([_transcript])
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_documents(chunks, embeddings)
    retriever = vector_store.as_retriever()
    endpoint = HuggingFaceEndpoint(repo_id="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=512, temperature=0.1)
    llm = ChatHuggingFace(llm=endpoint)
    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helpful assistant. Answer the question based ONLY on the provided context."),
        ("human", "CONTEXT:\\n{context}\\n\\nQUESTION:\\n{question}")
    ])
    def format_docs(docs):
        return "\\n\\n".join(doc.page_content for doc in docs)
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain

st.title("📺 YouTube Video Q&A Assistant")
st.markdown("Enter a YouTube video URL, and I'll answer your questions about it.")

if 'rag_chain' not in st.session_state:
    st.session_state.rag_chain = None

with st.sidebar:
    st.header("Video Input")
    youtube_url = st.text_input("Enter YouTube URL:", placeholder="https://www.youtube.com/watch?v=...")
    if st.button("Analyze Video", type="primary"):
        if youtube_url:
            with st.spinner("Fetching transcript and building knowledge base..."):
                transcript = get_transcript(youtube_url)
                if transcript:
                    st.session_state.rag_chain = create_rag_chain(transcript)
                    st.success("Video analysis complete! You can now ask questions.")
        else:
            st.warning("Please enter a YouTube URL.")

if st.session_state.rag_chain:
    st.header("Ask a Question")
    question = st.text_input("What would you like to know?", placeholder="e.g., What is the main topic of the video?")
    if question:
        with st.spinner("Thinking..."):
            answer = st.session_state.rag_chain.invoke(question)
            st.markdown("### Answer")
            st.write(answer)
else:
    st.info("Please analyze a video to start asking questions.")
''')


In [6]:
# Find the process ID (PID) of the ngrok process
!pkill ngrok

# Verify that the process is no longer running
!pgrep ngrok

In [7]:
import os
from pyngrok import ngrok
from google.colab import userdata

# --- Automatically fetch secrets ---
# Ensure your secrets are named 'HF_TOKEN' and 'NGROK_TOKEN' in the Colab Secrets (🔑) tab
try:
    hf_token = userdata.get('HF_TOKEN')
    ngrok_token = userdata.get('ngrok_token')

    if not hf_token or not ngrok_token:
        raise ValueError("One or both tokens are not set in Colab Secrets.")

except Exception as e:
    print(f"Error fetching secrets: {e}")
    print("Please ensure you have saved 'HF_TOKEN' and 'NGROK_TOKEN' in the Colab Secrets manager.")
else:
    # Set the tokens for your app and ngrok
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_token
    ngrok.set_auth_token(ngrok_token)

    # Run streamlit in the background
    !nohup streamlit run app.py &

    # Create a public URL to the streamlit app
    public_url = ngrok.connect(8501)
    print(f"Click this link to open your app: {public_url}")

nohup: appending output to 'nohup.out'
Click this link to open your app: NgrokTunnel: "https://3555f39cbc42.ngrok-free.app" -> "http://localhost:8501"


In [1]:
# %%writefile app.py

# import streamlit as st
# import os
# from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace, HuggingFaceEndpoint
# from langchain_community.vectorstores import FAISS
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_core.runnables import RunnablePassthrough
# from langchain_core.output_parsers import StrOutputParser
# from google.colab import userdata

# # --- App Configuration ---
# st.set_page_config(
#     page_title="YouTube Video Q&A",
#     page_icon="📺",
#     layout="wide"
# )

# # --- Hugging Face API Token ---
# # For Streamlit Community Cloud, set the HF_TOKEN in the secrets manager
# # For local development, you can use an environment variable
# if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
#     try:
#         # Get the token from Colab secrets
#         os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HF_TOKEN")
#     except Exception as e:
#         st.error(f"Hugging Face API token not found! Please set it in your Colab secrets. Error: {e}", icon="🔒")
#         st.stop()

# # --- Caching Functions ---
# # Use Streamlit's caching to avoid re-running expensive functions
# @st.cache_data
# def get_transcript(video_url):
#     """Fetches the transcript for a given YouTube video URL."""
#     try:
#         # Extract video ID from URL
#         video_id = video_url.split("v=")[1].split("&")[0]
#         ytt_api = YouTubeTranscriptApi()
#         transcript_list = ytt_api.list(video_id=video_id)
#         target_transcript = transcript_list.find_transcript(['en'])
#         transcript_data = target_transcript.fetch()
#         return " ".join(chunk.text for chunk in transcript_data)
#     except TranscriptsDisabled:
#         st.error("Transcripts are disabled for this video.")
#         return None
#     except Exception as e:
#         st.error(f"Could not retrieve transcript. Please check the URL. Error: {e}")
#         return None

# @st.cache_resource
# def create_rag_chain(_transcript):
#     """Creates a RAG chain from the transcript text."""
#     # 1. Split text into chunks
#     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#     chunks = splitter.create_documents([_transcript])

#     # 2. Create embeddings and vector store
#     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
#     vector_store = FAISS.from_documents(chunks, embeddings)
#     retriever = vector_store.as_retriever()

#     # 3. Define the LLM
#     # --- CORRECTED CODE ---
#     # First, create the endpoint for the model
#     endpoint = HuggingFaceEndpoint(
#         repo_id="HuggingFaceH4/zephyr-7b-beta",
#         max_new_tokens=512,
#         temperature=0.1
#     )
#     # Then, wrap it in the ChatHuggingFace class
#     llm = ChatHuggingFace(llm=endpoint)
#     # --- END OF CORRECTION ---

#     # 4. Define the prompt template
#     prompt = ChatPromptTemplate.from_messages([
#         ("system", "You are a helpful assistant. Answer the question based ONLY on the provided context. If the context is insufficient, just say you don't know."),
#         ("human", "CONTEXT:\n{context}\n\nQUESTION:\n{question}")
#     ])

#     # 5. Build the RAG chain
#     def format_docs(docs):
#         return "\n\n".join(doc.page_content for doc in docs)

#     rag_chain = (
#         {"context": retriever | format_docs, "question": RunnablePassthrough()}
#         | prompt
#         | llm
#         | StrOutputParser()
#     )
#     return rag_chain

# # --- Streamlit App UI ---
# st.title("📺 YouTube Video Q&A Assistant")
# st.markdown("Enter a YouTube video URL, and I'll answer your questions about it.")

# # Initialize session state for the RAG chain
# if 'rag_chain' not in st.session_state:
#     st.session_state.rag_chain = None

# with st.sidebar:
#     st.header("Video Input")
#     youtube_url = st.text_input("Enter YouTube URL:", placeholder="https://www.youtube.com/watch?v=...")

#     if st.button("Analyze Video", type="primary"):
#         if youtube_url:
#             with st.spinner("Fetching transcript and building knowledge base..."):
#                 transcript = get_transcript(youtube_url)
#                 if transcript:
#                     st.session_state.rag_chain = create_rag_chain(transcript)
#                     st.success("Video analysis complete! You can now ask questions.")
#         else:
#             st.warning("Please enter a YouTube URL.")

# # --- Q&A Section ---
# if st.session_state.rag_chain:
#     st.header("Ask a Question")
#     question = st.text_input("What would you like to know?", placeholder="e.g., What is the main topic of the video?")

#     if question:
#         with st.spinner("Thinking..."):
#             answer = st.session_state.rag_chain.invoke(question)
#             st.markdown("### Answer")
#             st.write(answer)
# else:
#     st.info("Please analyze a video to start asking questions.")

Writing app.py
