In [48]:
import os
from dotenv import load_dotenv
from google import genai
load_dotenv()

OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
GEMINI_API_KEY=os.getenv("GEMINI_API_KEY")
genai.Client(api_key=GEMINI_API_KEY)
#Video url
YOUTUBE_VIDEO="https://www.youtube.com/watch?v=qU3fmidNbJE"

In [49]:
#Model Setup
#defining LLM 
from langchain_openai.chat_models import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

model=ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")
gemini_model=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY,
    model="gemini-2.0-flash-001"
)


In [50]:
#testing model vy asking simple question
gemini_model.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

AIMessage(content='The Los Angeles Dodgers won the World Series in 2020, which was during the COVID-19 pandemic.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-001', 'safety_ratings': []}, id='run-d0899f43-8fc0-4687-9f94-aee80bc67877-0', usage_metadata={'input_tokens': 15, 'output_tokens': 26, 'total_tokens': 41, 'input_token_details': {'cache_read': 0}})

In [51]:
#we are using simple StrOutputParser to extract the answer as a string
from langchain_core.output_parsers import StrOutputParser

parser=StrOutputParser()
chain=gemini_model | parser
chain.invoke("hat MLB team won the World Series during the COVID-19 pandemic?")

'The Los Angeles Dodgers won the World Series in 2020, which was during the COVID-19 pandemic.'

In [52]:
from langchain_core.prompts import ChatPromptTemplate
template="""
Answer the question based on context below. If you can't answer the question, reply "I don't know.".

Context:{context}
Question:{question}
"""

prompt=ChatPromptTemplate.from_template(template)
prompt.format(context="Dnya's sister is Praju",question="Who is Dnya's sister?")

'Human: \nAnswer the question based on context below. If you can\'t answer the question, reply "I don\'t know.".\n\nContext:Dnya\'s sister is Praju\nQuestion:Who is Dnya\'s sister?\n'

In [53]:
chain=prompt | gemini_model | parser
chain.invoke(
    {
        "context":"Dnya's sister is Praju",
        "question":"Who is Dnya's sister?"
    }
)

'Praju'

In [54]:
translation_prompt=ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

In [55]:
from operator import itemgetter

translation_chain=(
    {"answer":chain, "language":itemgetter("language")} | translation_prompt | gemini_model | parser
)

translation_chain.invoke(
    {
        "context": "Dnya's sister is Praju and one more sister Vaish.",
        "question": "How many sisters does Dnya have? and what are their names?",
        "language": "Marathi",
    }

)

'The most accurate translation of "Dnya has two sisters: Praju and Vaish" to Marathi is:\n\n**ज्ञाला दोन बहिणी आहेत: प्राजू आणि वैश.**\n\nHere\'s a breakdown:\n\n*   **ज्ञाला (Dnyala):** "To Dnya" (dative case, indicating possession)\n*   **दोन (don):** Two\n*   **बहिणी (bahini):** Sisters\n*   **आहेत (aahet):** Are/have\n*   **प्राजू (Praju):** Praju\n*   **आणि (aani):** And\n*   **वैश (Vaish):** Vaish\n\nTherefore, the complete sentence means "Dnya has two sisters: Praju and Vaish."'

Transcribing YouTube Video

In [57]:
'''/*
# if transcribe file not created 
if not os.path.exists("transcription.txt"):
    youtube=YouTube(YOUTUBE_VIDEO)
    audio=youtube.streams.filter(only_audio=True).first()

    #loading base model, not most accurate but its fast
    whisper_model=whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file=audio.download(output_path=tmpdir)
        transcription=whisper_model.transcribe(file,fp16=False)["text"].strip()

        with open("transcription.txt","w") as file:
            file.write(transcription)
'''

'/*\n# if transcribe file not created \nif not os.path.exists("transcription.txt"):\n    youtube=YouTube(YOUTUBE_VIDEO)\n    audio=youtube.streams.filter(only_audio=True).first()\n\n    #loading base model, not most accurate but its fast\n    whisper_model=whisper.load_model("base")\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        file=audio.download(output_path=tmpdir)\n        transcription=whisper_model.transcribe(file,fp16=False)["text"].strip()\n\n        with open("transcription.txt","w") as file:\n            file.write(transcription)\n'

In [58]:
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs

def get_video_id(url):
    return parse_qs(urlparse(url).query).get("v", [None])[0]

# Step 1: Set the video URL
url = "https://www.youtube.com/watch?v=FwOTs4UxQS4"  # replace with actual ID

# Step 2: Get transcript text
video_id = get_video_id(url)
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = " ".join([t["text"] for t in transcript])

# Step 3: Save to a .txt file
filename = f"transcript.txt"
with open(filename, "w", encoding="utf-8") as f:
    f.write(text)

print(f"Transcript saved to {filename}")



Transcript saved to transcript.txt


In [59]:
with open("transcript.txt") as file:
    transcription = file.read()

transcription[:1000]

"ai ai ai ai ai ai you know more agentic agentic capabilities an AI agent agents agentic workflows agents agents agent agent agent agent agentic all right most explanations of AI agents is either too technical or too basic this video is meant for people like myself you have zero technical background but you use AI tools regularly and you want to learn just enough about AI agents to see how it affects you in this video we'll follow a simple one two three learning path by building on concepts you already understand like chatbt and then moving on to AI workflows and then finally AI agents all the while using examples you will actually encounter in real life and believe me when I tell you those intimidating terms you see everywhere like rag rag or react they're a lot simpler than you think let's get started kicking things off at level one large language models popular AI chatbots like CHBT Google Gemini and Claude are applications built on top of large language models LLMs and they're fant

In [60]:
try:
    chain.invoke(
        {
        "context": transcript,
        "question": "What is AI agent?"    
        }
    )

except Exception as e:
    print(e)

In [61]:

from langchain_community.document_loaders import TextLoader

loader=TextLoader("transcript.txt")
text_documents=loader.load()
text_documents

[Document(metadata={'source': 'transcript.txt'}, page_content='ai ai ai ai ai ai you know more agentic agentic capabilities an AI agent agents agentic workflows agents agents agent agent agent agent agentic all right most explanations of AI agents is either too technical or too basic this video is meant for people like myself you have zero technical background but you use AI tools regularly and you want to learn just enough about AI agents to see how it affects you in this video we\'ll follow a simple one two three learning path by building on concepts you already understand like chatbt and then moving on to AI workflows and then finally AI agents all the while using examples you will actually encounter in real life and believe me when I tell you those intimidating terms you see everywhere like rag rag or react they\'re a lot simpler than you think let\'s get started kicking things off at level one large language models popular AI chatbots like CHBT Google Gemini and Claude are applica

In [62]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=40)
text_splitter.split_documents(text_documents)[:5]
documents = text_splitter.split_documents(text_documents)

In [67]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings=GoogleGenerativeAIEmbeddings(google_api_key=GEMINI_API_KEY,model="models/text-embedding-004")
embedded_query=embeddings.embed_query("Who is Dnya's sister?")

print(f"Embedding length: {len(embedded_query)}")
print(embedded_query[:10])

Embedding length: 768
[0.0234909038990736, -0.007969883270561695, -0.060786258429288864, -0.0255004670470953, -0.00024231472343672067, 0.00904453732073307, -0.0014154528034850955, -0.029917960986495018, 0.01235028076916933, 0.030848467722535133]


In [68]:
sentence1 = embeddings.embed_query("DNya's sister is Praju")
sentence2 = embeddings.embed_query("Vaish's mother is a teacher")

In [69]:
from sklearn.metrics.pairwise import cosine_similarity

query_sentence1_similarity = cosine_similarity([embedded_query], [sentence1])[0][0]
query_sentence2_similarity = cosine_similarity([embedded_query], [sentence2])[0][0]

query_sentence1_similarity, query_sentence2_similarity

(0.7806410337241658, 0.39281267505323264)

In [70]:
from langchain_community.vectorstores import DocArrayInMemorySearch


vectorstore1 = DocArrayInMemorySearch.from_texts(
    [
        "Mary's sister is Susana",
        "John and Tommy are brothers",
        "Patricia likes white cars",
        "Pedro's mother is a teacher",
        "Lucia drives an Audi",
        "Mary has two siblings",
    ],
    embedding=embeddings,
)

In [71]:
vectorstore1.similarity_search_with_score(query="Who is Mary's sister?", k=3)

[(Document(metadata={}, page_content="Mary's sister is Susana"),
  0.7440929421471949),
 (Document(metadata={}, page_content='Mary has two siblings'),
  0.7013327194745267),
 (Document(metadata={}, page_content='John and Tommy are brothers'),
  0.5149859044829919)]

In [72]:
retriever1 = vectorstore1.as_retriever()
retriever1.invoke("Who is Mary's sister?")

[Document(metadata={}, page_content="Mary's sister is Susana"),
 Document(metadata={}, page_content='Mary has two siblings'),
 Document(metadata={}, page_content='John and Tommy are brothers'),
 Document(metadata={}, page_content='Patricia likes white cars')]

In [73]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup = RunnableParallel(context=retriever1, question=RunnablePassthrough())
setup.invoke("What color is Patricia's car?")

{'context': [Document(metadata={}, page_content='Patricia likes white cars'),
  Document(metadata={}, page_content='Lucia drives an Audi'),
  Document(metadata={}, page_content="Pedro's mother is a teacher"),
  Document(metadata={}, page_content="Mary's sister is Susana")],
 'question': "What color is Patricia's car?"}

In [74]:
chain = setup | prompt | gemini_model | parser
chain.invoke("What color is Patricia's car?")

'white'

In [75]:
chain.invoke("What car does Lucia drive?")

'Lucia drives an Audi.'

In [76]:
vectorstore2 = DocArrayInMemorySearch.from_documents(documents, embeddings)

In [78]:
chain = (
    {"context": vectorstore2.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | gemini_model
    | parser
)
chain.invoke("What is AI Agents?")

"AI agents must reason and act. A key trait of AI agents is their ability to iterate. The one massive change that has to happen in order for an AI workflow to become an AI agent is for the human decision maker to be replaced by an LLM. In other words, the AI agent must reason what's the most efficient way to compile these news articles."

In [81]:
from langchain_pinecone import PineconeVectorStore

index_name = "rag2"

pinecone = PineconeVectorStore.from_documents(
    documents, embeddings, index_name=index_name
)

In [82]:
pinecone.similarity_search("Real life example of AI agents?")[:3]

[Document(metadata={'source': 'transcript.txt'}, page_content="drafted V1 of a LinkedIn post how do I make sure it's good oh I know i'll add another step where an LM will critique the post based on LinkedIn best practices and let's repeat this until the best practices criteria are all met and after a few cycles of that we have the final output that was a hypothetical example so let's move on to a real world AI agent example andrew is a preeeminent figure in AI and he created this demo website that illustrates how an AI agent works i'll link the full video down below but when I search for a keyword like skier enter the AI vision agent in the background is first reasoning what a skier looks like a person on skis going really fast in snow for example right i'm not sure and then it's acting by looking at clips in video footage trying to identify what it thinks a skier is indexing that clip and then returning that clip to us although this might not feel impressive remember that an AI agent 

In [83]:
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | gemini_model
    | parser
)

chain.invoke("Real life example of AI agents?")

'Andrew created a demo website that illustrates how an AI agent works. When a keyword like "skier" is entered, the AI vision agent reasons what a skier looks like and then acts by looking at clips in video footage to identify what it thinks a skier is, indexing that clip, and then returning that clip to us.'