# **Multimodal RAG**

In [1]:
%%capture
pip install -U langchain-huggingface  langchain_community youtube-transcript-api LanceDB sentence-transformers faiss-gpu gradio

In [2]:
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from youtube_transcript_api import YouTubeTranscriptApi
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import gradio as gr

In [3]:
#Load models
print("Loading models...")
llm_pipeline = pipeline("text2text-generation", model="google/flan-t5-small", device=0)
llm = HuggingFacePipeline(pipeline=llm_pipeline)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
print("Models loaded!")

Loading models...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=llm_pipeline)
  embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Models loaded!


### Define Helper Functions

In [4]:
def fetch_youtube_transcript(video_url):
    """Fetch transcript from YouTube video."""
    video_id = video_url.split("v=")[-1]
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        text = " ".join([entry["text"] for entry in transcript])
        return text
    except Exception as e:
        print(f"Error fetching transcript: {e}")
        return None

def main():
    video_url = input("Enter YouTube video URL: ").strip()
    transcript = fetch_youtube_transcript(video_url)

    if not transcript:
        print("Unable to fetch transcript. Exiting...")
        return


    transcript_docs = [transcript]
    vector_store = FAISS.from_texts(transcript_docs, embeddings)

    print("You can start asking questions about the video. Type 'quit' or 'exit' to end.")

    while True:
        query = input("Ask a question: ").strip()
        if query.lower() in ["quit", "exit"]:
            print("Exiting. Goodbye!")
            break


       # Retriever
        docs = vector_store.similarity_search(query, k=5)

        if not docs:
            print("The question doesn't seem relevant to the video content. Try rephrasing.")
            continue


        context = " ".join([doc.page_content for doc in docs])         # Combine the retrieved docs into a single context
        print(f"Debug Context: {context}")


        prompt_template = PromptTemplate(
            input_variables=["context", "query"],
            template="You are an assistant answering questions based on the following video transcript:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:",
        )
        chain = LLMChain(llm=llm, prompt=prompt_template)

        try:
            response = chain.run({"context": context, "query": query})
            print(f"Response: {response}")
        except Exception as e:
            print(f"Error generating response: {e}")

if __name__ == "__main__":
    main()


Enter YouTube video URL: https://www.youtube.com/watch?v=7Hcg-rLYwdM
You can start asking questions about the video. Type 'quit' or 'exit' to end.
Ask a question: What do the astronauts feel about their work?
Debug Context: As I look back on the the mission that we've had here on the International Space Station,
I'm proud to have been a part of much of the science activities that happened over the last two months. The view is always amazing I didn't think I would do another spacewalk and to now have the chance to have done four more was just icing on the cake for a a wonderful mission. Does the 10th one feel like the first one? No, a little more comfortable on the tenth one. It's hard to put into words just what it was like to be a part of
this expedition, expedition 63. It'll be kind of a memory that will last a lifetime for me. It's been a true honor. Dragon SpaceX undock sequence commanded. Thrusters
looking good. The hardest part was getting us launched, but the most important part

  chain = LLMChain(llm=llm, prompt=prompt_template)
  response = chain.run({"context": context, "query": query})


Response: They feel proud
Ask a question: what happend in the video?
Debug Context: As I look back on the the mission that we've had here on the International Space Station,
I'm proud to have been a part of much of the science activities that happened over the last two months. The view is always amazing I didn't think I would do another spacewalk and to now have the chance to have done four more was just icing on the cake for a a wonderful mission. Does the 10th one feel like the first one? No, a little more comfortable on the tenth one. It's hard to put into words just what it was like to be a part of
this expedition, expedition 63. It'll be kind of a memory that will last a lifetime for me. It's been a true honor. Dragon SpaceX undock sequence commanded. Thrusters
looking good. The hardest part was getting us launched, but the most important part is bringing us home. Rise and shine Daddy. We love you. Hurry home so we can go get my dog. Splashdown! Welcome back to planet Earth and th

I tried on this YouTube video:  
[https://www.youtube.com/watch?v=7Hcg-rLYwdM](https://www.youtube.com/watch?v=7Hcg-rLYwdM)



## Gradio App

In [5]:
def fetch_youtube_transcript(video_url):
    video_id = video_url.split("v=")[-1]
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        text = " ".join([entry["text"] for entry in transcript])
        return text
    except Exception as e:
        return f"Error fetching transcript: {e}"


def create_vector_store(transcript):
    transcript_docs = [transcript]
    return FAISS.from_texts(transcript_docs, embeddings)


def process_query(video_url, query):
    transcript = fetch_youtube_transcript(video_url)
    if "Error" in transcript:
        return transcript, ""

    vector_store = create_vector_store(transcript)

    docs = vector_store.similarity_search(query, k=5)

    if not docs:
        return "No relevant context found for your query.", ""

    context = " ".join([doc.page_content for doc in docs])
    prompt_template = PromptTemplate(
        input_variables=["context", "query"],
        template="You are an assistant answering questions based on the following video transcript:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:",
    )
    chain = LLMChain(llm=llm, prompt=prompt_template)

    try:
        response = chain.run({"context": context, "query": query})
        return response, video_url
    except Exception as e:
        return f"Error generating response: {e}", video_url


In [6]:
def main_interface(video_url, query):
    response, video_display = process_query(video_url, query)
    video_embed = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_url.split("v=")[-1]}" frameborder="0" allowfullscreen></iframe>'
    return response, video_embed


with gr.Blocks(css="""
    body {
        background-color:black !important; /* Ensures the black background is applied */
        color: white !important; /* Ensures text is white */
    }
    button {
        background-color: #1a73e8 !important; /* Blue buttons */
        color: black !important; /* Button text color */
    }
    button:hover {
        background-color: green !important; /* Darker blue on hover */
    }
""") as app:
    gr.Markdown("Chat with YouTube Video")
    with gr.Row():
        video_url = gr.Textbox(label="YouTube Video URL", placeholder="Enter the video URL here")
    with gr.Row():
        query = gr.Textbox(label="Ask a Question", placeholder="Ask something about the video transcript")
    with gr.Row():
        response = gr.Textbox(label="Response", placeholder="The assistant's response will appear here", interactive=False)
    with gr.Row():
        video_display = gr.HTML(value="")
    with gr.Row():
        submit_btn = gr.Button("Submit")

    submit_btn.click(main_interface, inputs=[video_url, query], outputs=[response, video_display])

if __name__ == "__main__":
    app.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://461259201c50425ad7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
