# Extract information from Youtube link using LLM Model

Import all the necessary libraries

In [28]:
import os
from langchain_community.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate 
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.document_loaders import YoutubeLoader
from transformers import AutoTokenizer
from langchain_text_splitters import CharacterTextSplitter
from transformers import AutoTokenizer
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from dotenv import load_dotenv

Load all the environmental variables

In [29]:
load_dotenv()

True

Load the embedding model

In [18]:
model_name = "BAAI/bge-base-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

Create a vector database using FAISS to store the content from Youtube link

In [30]:
def create_vector_db_from_youtube_url(video_url):
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=100, chunk_overlap=0)
    docs = text_splitter.split_documents(transcript)
    embeddings = HuggingFaceInferenceAPIEmbeddings(api_key = os.environ["HUGGINGFACEHUB_API_TOKEN"], model_name = model_name)
    db =  FAISS.from_documents(docs,embeddings)
    return db

Define the LLM Model and Prompt

In [20]:
def get_response_from_query(db: FAISS ,query: str,k=4) -> FAISS:
    docs=db.similarity_search(query,k=k)
    docs_page_content = " ".join([d.page_content for d in docs])
    prompt = PromptTemplate(
        input_variables = ["question","docs"], template="""
        You are an Youtube extractor. Please be truthful and give direct answers. 
        Please tell 'I don't know' if user query is not in content.
        
        Answer the following question: \n{question}\n\n
        By searching the following transcript:\n{docs}.
        \nAnswer:"""
    )
    model = HuggingFaceHub(repo_id = "HuggingFaceH4/zephyr-7b-alpha", model_kwargs = {"temperature":0.5, "max_new_tokens":512, "max_length":64})
    chain = LLMChain(llm=model,prompt=prompt)
    result=chain.run(question = query, docs = docs_page_content)
    return result

Shoot your question here

youtube_url = "https://youtu.be/lG7Uxts9SXs?si=MuNRnw4XSMrz-Uc6"  

Query: What is discussed in the video?

In [33]:
# This video contains docker explaination. In this video, no one talks. Just display the content with music. 
db = create_vector_db_from_youtube_url(video_url="https://www.youtube.com/watch?v=Q1H6ez7Ryfk")
db

<langchain_community.vectorstores.faiss.FAISS at 0x1f84d406df0>

In [34]:
response = get_response_from_query(db,query="What is discussed in the video?")
response

"\n        You are an Youtube extractor. Please be truthful and give direct answers. \n        Please tell 'I don't know' if user query is not in content.\n        \n        Answer the following question: \nWhat is discussed in the video?\n\n\n        By searching the following transcript:\n[Music] so [Music] [Music] foreign [Music] you.\n        \nAnswer:\nThe transcript provided does not directly indicate what is being discussed in the video. It appears to be background music or sound effects."

### OUTPUT:

You are an Youtube extractor. Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in content.   

Answer the following question: \nWhat is discussed in the video?   

By searching the following transcript: [Music] so [Music] [Music] foreign [Music] you.

Answer:
The transcript provided does not directly indicate what is being discussed in the video. It appears to be background music or sound effects.

In [36]:
db = create_vector_db_from_youtube_url(video_url="https://www.youtube.com/watch?v=mxVkNGkzuxU")
db

<langchain_community.vectorstores.faiss.FAISS at 0x1f84d40c0d0>

In [37]:
response = get_response_from_query(db,query="What is discussed in the video?")
response

"\n        You are an Youtube extractor. Please be truthful and give direct answers. \n        Please tell 'I don't know' if user query is not in content.\n        \n        Answer the following question: \nWhat is discussed in the video?\n\n\n        By searching the following transcript:\nwhat is tucker containerization explained in a simple story in my previous show we learned that containerization technology helps software run easily freely and independently but how to put software in a container how to run the container to answer those questions let's meet the hero behind the scene docker a container runtime and its processes what is docker what is container runtime what are the processes it sounds complicated no worries let's simply watch a short story this is docky ducky works on a dock he packs and loads cargos on vessels he helps his clients to ship stuff all over the world it is an important job doki likes his job one day a client named jane comes to the dock jane urgently ne

### OUTPUT:

You are an Youtube extractor. Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in content.

Answer the following question: What is discussed in the video?

By searching the following transcript:
what is tucker containerization explained in a simple story in my previous show we learned that containerization technology helps software run easily freely and independently but how to put software in a container how to run the container to answer those questions let's meet the hero behind the scene docker a container runtime and its processes what is docker what is container runtime what are the processes it sounds complicated no worries let's simply watch a short story this is docky ducky works on a dock he packs and loads cargos on vessels he helps his clients to ship stuff all over the world it is an important job doki likes his job one day a client named jane comes to the dock jane urgently needs to ship many different packages how to do that quickly docky gets a shipping instruction list it lists everything on how to ship jane's stuff in a container what type of container how to upload her stuff to the container and how to ship the container on the boat etc [Music] this is just like software containerization first a container runtime such as docker needs to be available on the server next a dockerfile is prepared with all the steps to pack the software to the container and to run the container on the server [Music] following the instructions ducky works diligently on building chains container this is exactly what the software docker is doing building the container following the dockerfile instructions [Music] when the container is done docky sticks a label on it and pushes it to the container warehouse waiting for the next shipping time for the software docker the finished container is called container image just like a container template similarly docker tags and pushes the image to the image registry the warehouse it is time to ship daki pulls the container out of the warehouse and uploads it to the boat now jane's stuff is ready to go to the world at the same time the software docker pulls out the image runs container instances on the server to serve the world [Music] congratulations now you understand docker the container runtime and its processes simple [Music] in the real chipping world things can be more complicated on a large scale with so many containers and operations orchestration is needed it is the same in the software world [Music] do you know who is the best container orchestrator comment it below keep learning no matter how complex it seems remember it is just a docking and shipping business simple [Music] you.

Answer: 
The video discusses the concept of containerization in software and how Docker, a container runtime, helps with the process. The video uses a simple story to explain the process of building and shipping container images, highlighting the similarities between the real-world shipping business and software containerization. The video also briefly touches on container orchestration and mentions that it is needed for large-scale container operations.

Create a streamlit interface

In [10]:
import streamlit as st
st.title("Youtube Assistant")
with st.sidebar:
    with st.form(key='my_form'):
        youtube_url = st.sidebar.text_area(
            label="What is the Youtube video URL?",
            max_chars=50
        )
        query =st.sidebar.text_area(
            label="Ask me about the video?",
            max_chars=50,
            key = "query"
        )
        submit_button =st.form_submit_button(label="Submit")
if query and youtube_url:
    db = create_vector_db_from_youtube_url(youtube_url)
    response = get_response_from_query(db,query)
    st.subheader("Answer: ")
    st.text(response)