# Install requirements

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
video_id="M2UXb_bxQ2U"
try:
    transcript_list=YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=["en"])
    transcript=" ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)
except TranscriptsDisabled:
    print("no caption available")

[Music] I've been staying inside too much lately probably contributes to my anxiety doctor says I need to get more vitamin D so I take a supplement two times a week if you got inside my head you see there's no point in going to therapy at least that's what I tell myself cuz I don't need another purchase added to my monthly Fe I'm doing fine all right okay Sun keeps coming up each day see it do M no Shades it's all right just passing time I watch it fake can't do too much anyways in this small town it way I'm doing fun I swear I'm doing am I really fine or am I just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake every single night I stay up late I don't know how to sleep when I'm overthinking every little thing but I'm taking my pills and I'm paying my bills so I do I still feel the same guess I play this guitar hoping that tomorrow I can say that I'm doing fine all right okay Sun keeps coming up each day see it through M no it's allor righ

# Indexing (Text Splitting)

In [None]:
splitter=RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks=splitter.create_documents([transcript])
len(chunks)

In [6]:
chunks[0:3]

[Document(metadata={}, page_content="[Music] I've been staying inside too much lately probably contributes to my anxiety doctor says I"),
 Document(metadata={}, page_content='doctor says I need to get more vitamin D so I take a supplement two times a week if you got inside'),
 Document(metadata={}, page_content="if you got inside my head you see there's no point in going to therapy at least that's what I tell")]

In [11]:
embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vector_store=FAISS.from_documents(chunks, embedding=embedding)

In [15]:
vector_store.index_to_docstore_id

{0: '5ba5171c-d930-47cb-ba73-e643e0a1c0e2',
 1: '3100bf45-9f84-4b99-a007-16c968accc84',
 2: '3602151a-e88a-4bb8-87ad-10503e1c15ad',
 3: '77a21914-0c01-4b52-8b24-4ae01e728789',
 4: '220f0608-b12f-4690-9b6d-9780a6760d18',
 5: 'ac66b9ae-9c42-43a8-b5e6-3a66ad434836',
 6: '4ccfceb2-7159-4adb-a002-f1f766a67119',
 7: 'a91eb3e4-f319-4fca-80b7-dd5c62a0b91f',
 8: '695108f8-e84b-4bf8-8b08-552c2814244f',
 9: '8ce8bdbb-2d98-4c6c-bca6-4411527ec5fa',
 10: '68e3bfbd-f896-43dd-a997-91b08079423b',
 11: 'ed96d121-2b47-4d6d-9c3c-0a3a2ec17aac',
 12: 'cf2ff82c-bcc7-487c-9ced-e5ef12e27a35',
 13: '6a5d3140-0acd-4999-bb65-9b0ac938043e'}

In [16]:
vector_store.get_by_ids(
    ["cf2ff82c-bcc7-487c-9ced-e5ef12e27a35", "6a5d3140-0acd-4999-bb65-9b0ac938043e"]
)

[Document(id='cf2ff82c-bcc7-487c-9ced-e5ef12e27a35', metadata={}, page_content="up each day see it through M no it's allor right just passing time I'll watch it fake can't do too"),
 Document(id='6a5d3140-0acd-4999-bb65-9b0ac938043e', metadata={}, page_content="fake can't do too much anyways in this small town it away I'm doing fun")]

# Retrival

In [None]:
retriver=vector_store.as_retriever(search_type='similarity', search_kwargs={"k":2})
retriver

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000130B436DD10>, search_kwargs={'k': 2})

In [20]:
retriver.invoke(input="How is my life?")

[Document(id='a91eb3e4-f319-4fca-80b7-dd5c62a0b91f', metadata={}, page_content="just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake"),
 Document(id='8ce8bdbb-2d98-4c6c-bca6-4411527ec5fa', metadata={}, page_content="to sleep when I'm overthinking every little thing but I'm taking my pills and I'm paying my bills")]

# Augmentation

In [21]:
llm=ChatGoogleGenerativeAI(model='gemini-1.5-pro')

In [None]:
prompt = PromptTemplate(
    template="""
    You are helpful assistant who give answer from the youtube transcript.
    Answer only from the provided transcript context.
    The context is retrived using RecursiveCharacterTextSplitter in langchain.
    If the context is insufficient try to manage it if possible otherwise just say you don't know.
    context: {context} \n
    question: {question}
""",
    input_variables=["context", "question"],
)

In [24]:
q="How is the life?"
retrived_docs=retriver.invoke(q)
retrived_docs

[Document(id='a91eb3e4-f319-4fca-80b7-dd5c62a0b91f', metadata={}, page_content="just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake"),
 Document(id='ed96d121-2b47-4d6d-9c3c-0a3a2ec17aac', metadata={}, page_content="that tomorrow I can say that I'm doing fine all right okay Sun keeps coming up each day see it")]

In [26]:
context_text="\n".join(doc.page_content for doc in retrived_docs)
context_text

"just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake\nthat tomorrow I can say that I'm doing fine all right okay Sun keeps coming up each day see it"

In [27]:
final_prompt=prompt.invoke({"context":context_text, "question":q})
final_prompt

StringPromptValue(text="\n    You are helpful assistant who give answer from the youtube transcript.\n    Answer only from the provided transcript context.\n    The context is retrived using RecursiveCharacterTextSplitter in langchain.\n    If the context is insufficient try to manage it if possible otherwise just say you don't know.\n    context: just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake\nthat tomorrow I can say that I'm doing fine all right okay Sun keeps coming up each day see it \n\n    question: How is the life?\n")

# Generation

In [28]:
answer=llm.invoke(final_prompt)
answer

AIMessage(content='The speaker describes being on autopilot lately and having trouble sleeping, but also expresses hope that tomorrow will be better, saying they\'ll be "doing fine."  They also acknowledge the sun keeps rising each day.  So it seems like a mixed bag, with some struggles but also a sense of perseverance.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-a68785eb-4961-4a09-9f16-ac84b50b7de0-0', usage_metadata={'input_tokens': 125, 'output_tokens': 62, 'total_tokens': 187, 'input_token_details': {'cache_read': 0}})

In [29]:
print(answer.content)

The speaker describes being on autopilot lately and having trouble sleeping, but also expresses hope that tomorrow will be better, saying they'll be "doing fine."  They also acknowledge the sun keeps rising each day.  So it seems like a mixed bag, with some struggles but also a sense of perseverance.


# Building Chain

In [None]:
from langchain_core.runnables import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()

In [10]:
def format_docs(retrived_docs):
    return "\n".join(doc.page_content for doc in retrived_docs)

In [11]:
video_id="M2UXb_bxQ2U"
try:
    transcript_list=YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=["en"])
    transcript=" ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)
except TranscriptsDisabled:
    print("no caption available")
splitter=RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
chunks=splitter.create_documents([transcript])

[Music] I've been staying inside too much lately probably contributes to my anxiety doctor says I need to get more vitamin D so I take a supplement two times a week if you got inside my head you see there's no point in going to therapy at least that's what I tell myself cuz I don't need another purchase added to my monthly Fe I'm doing fine all right okay Sun keeps coming up each day see it do M no Shades it's all right just passing time I watch it fake can't do too much anyways in this small town it way I'm doing fun I swear I'm doing am I really fine or am I just really good at ly I don't really know lately I've been on autopilot in my mind keeps me awake every single night I stay up late I don't know how to sleep when I'm overthinking every little thing but I'm taking my pills and I'm paying my bills so I do I still feel the same guess I play this guitar hoping that tomorrow I can say that I'm doing fine all right okay Sun keeps coming up each day see it through M no it's allor righ

In [12]:
embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vector_store=FAISS.from_documents(chunks, embedding=embedding)
retriver=vector_store.as_retriever(search_type='similarity', search_kwargs={"k":2})

In [16]:
llm=ChatGoogleGenerativeAI(model='gemini-1.5-pro')
prompt = PromptTemplate(
    template="""
    You are helpful assistant who give answer from the youtube transcript.
    Answer only from the provided transcript context.
    The context is retrived using RecursiveCharacterTextSplitter in langchain.
    If the context is insufficient try to manage it if possible otherwise just say you don't know.
    context: {context} \n
    question: {question}
""",
    input_variables=["context", "question"],
)

In [13]:
parallel_chain = RunnableParallel(
    {
        "context": retriver | RunnableLambda(format_docs),
        "question": RunnablePassthrough(),
    }
)

In [15]:
parallel_chain.invoke(input='What am I doing')

{'context': "this small town it way I'm doing fun I swear I'm doing am I really fine or am I just really good at\nthat's what I tell myself cuz I don't need another purchase added to my monthly Fe I'm doing fine",
 'question': 'What am I doing'}

In [17]:
parser=StrOutputParser()

In [18]:
main_chain=parallel_chain | prompt | llm | parser

In [19]:
main_chain.invoke('Can you summarize the video')

'The speaker expresses feelings of futility and a sense of just "passing time." They mention faking something they "can\'t do," and believe there\'s no point in therapy, or at least that\'s what they tell themselves.'