In [None]:
from langchain.document_loaders import YoutubeLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import find_dotenv, load_dotenv
from langchain.vectorstores import FAISS

In [None]:
#Load the environment variables
load_dotenv(find_dotenv())
embeddings = OpenAIEmbeddings()

In [None]:
video_url = "https://www.youtube.com/watch?v=L_Guz73e6fw"

def create_db_from_youtube_video_url(video_url):
    #Loading the url to a document
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    
    #Reducing the tokens by spliting document into chunks that is not too large to the model
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap = 100)
    docs = text_splitter.split_documents(transcript)
    
    #Converting the documents into vectors for the model to understand
    db = FAISS.from_documents(docs, embeddings)
    return db
    

In [None]:
def get_response_from_query(db, question, k=4):
    """
    gpt-3.5-turbo can handle up to 4097 tokens. Setting the chunk_size to 1000 and k to 4 maximizes the number of tokens to analyze
    k is documents

    Args:
        db (_type_): from the create_db_from_youtube_video_url function
        question (_type_): the question you want to ask about the video
        k (int, optional): to maximize the number of tokens. Defaults to 4.
    """
    
    
    # Helps to find the most similar document based on the question
    docs = db.similarity_search(question, k=k)
    #Join all the documents into one single string
    docs_page_content = "".join([id.page_content for d in docs])
    
    #Create the model
    chat = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature = 0.6)
    
    #Defining/Designing the template for the prompt
    template = """ 
    You are an assistance that helps to answer questions based on a youtube video transcript:{docs}
    
    Only use factual information from the transcript to answer the question. If you feel that you do not have enough information to answer the question, say " I don't know". 
    
    Your answers should be verbose and detailed.
    """
    
    #Special message used to instruct the model about its role or the behavior it should emulate.
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    
    #Human Question prompt - users inputs to the model.
    human_template = "Answer the following question: {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    
    chat_prompt = ChatPromptTemplate.from_messages(
        [system_message_prompt, human_message_prompt]
    )
    
    #Putting it in a chain
    chain = LLMChain(llm=chat, prompt=chat_prompt)
    
    #run the chain
    response = chain.run(question = question, docs=docs_page_content)
    response = response.replace("\n", "")
    
    return response, docs

We now have the building blocks that we need hence we can start to cll the functions

In [None]:
video_url = "https://www.youtube.com/watch?v=L_Guz73e6fw"
db = create_db_from_youtube_video_url(video_url)

question = "What are they saying about AGI?"
response, docs = get_response_from_query(db, question)

print(textwrap.fill(response, width=85))