In [60]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import  RecursiveCharacterTextSplitter
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
import os
from langchain.prompts import  PromptTemplate
from langchain.chains import LLMChain
import getpass

In [35]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass("enter google ")

In [58]:

def youtube_url_to_db(url):
    loader = YoutubeLoader.from_youtube_url(url)
    transcript = loader.load()
    
    text_transcript = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 100)
    text_transcript = text_transcript.split_documents(transcript)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    db = Chroma.from_documents(text_transcript , embedding=embeddings )
    print(text_transcript)
    return db


[Document(metadata={'source': 'd4yCWBGFCEs'}, page_content="welcome to this generative AI mini course first we will understand the Gen AI fundamentals then we will learn Lang chain which is a python framework used for building gen application and in the end we will build two endtoend gen AI projects the first project will be using commercial GPT model where we will build equity news research tool the second project will be using open-source llm model where we will build a Q&A tool in retail industry let's start with the definition of gen ai ai can be categorized into two sections generative ai non-generative ai when you talk about non-generative AI you are dealing with problems such as you have a chest x-ray and you want to find out if this person has pneumonia or not or maybe you have some data on person's credit history and you want to figure out if the person should be given a loan or not in these problems you are not creating new content you have data and based on that data you are

<langchain_community.vectorstores.chroma.Chroma at 0x12e56f890>

In [62]:
from langchain_google_genai import ChatGoogleGenerativeAI
def get_resp_query(db , query , k = 10):
    docs = db.similarity_search(query , k = k)
    docs_page_content = ''.join([d.page_content for d in docs])
    llm = ChatGoogleGenerativeAI(
        model="gemini-1.5-pro",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
    prompt = PromptTemplate(
        input_variables = ["question" , "docs"],
        template="""
                You are a helpful assistant that that can answer questions about youtube videos 
                based on the video's transcript.
                
                Answer the following question: {question}
                By searching the following video transcript: {docs}
                
                Only use the factual information from the transcript to answer the question.
                
                If you feel like you don't have enough information to answer the question, say "I don't know".
                
                Your answers should be verbose and detailed.
                """,   
    )
    chain = LLMChain(llm = llm ,prompt = prompt)
    response = chain.run(question = query , docs =docs_page_content)
    response = response.replace("\n" , "")
    return response

In [65]:
db = youtube_url_to_db("https://youtu.be/d4yCWBGFCEs?si=hBxp9DFrXRKuPJPj")
ans = get_resp_query(db , "what is a langchain as per content " , 10)

[Document(metadata={'source': 'd4yCWBGFCEs'}, page_content="welcome to this generative AI mini course first we will understand the Gen AI fundamentals then we will learn Lang chain which is a python framework used for building gen application and in the end we will build two endtoend gen AI projects the first project will be using commercial GPT model where we will build equity news research tool the second project will be using open-source llm model where we will build a Q&A tool in retail industry let's start with the definition of gen ai ai can be categorized into two sections generative ai non-generative ai when you talk about non-generative AI you are dealing with problems such as you have a chest x-ray and you want to find out if this person has pneumonia or not or maybe you have some data on person's credit history and you want to figure out if the person should be given a loan or not in these problems you are not creating new content you have data and based on that data you are

In [66]:
ans

'LangChain is a Python framework used to build applications on top of large language models (LLMs). It acts as a way to connect different LLMs, such as GPT or Lama 2, to an application, allowing for flexibility in choosing and switching between different models. This is particularly useful in situations where cost or availability of a specific LLM becomes a concern. '