In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


## Model

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

model = ChatGoogleGenerativeAI(google_api_key=GOOGLE_API_KEY, model="gemini-1.5-flash")
parser = StrOutputParser()


'The Los Angeles Dodgers won the World Series in 2020, during the COVID-19 pandemic.'

Defining a prompt template:

In [15]:
from langchain.prompts import ChatPromptTemplate

prompt_template = '''
Answer the question based on the context below. Respond in a full sentence. If you can't answer the question reply "I don't know."

Context = {context}

Question = {question}
'''

prompt = ChatPromptTemplate.from_template(prompt_template)


In [26]:
chain = prompt | model | parser

In [27]:
# Testing the chain
chain.invoke({"context":"Mary's sister is Anna", "question":"Who is Mary's sister"})

"Mary's sister is Anna."

## Getting the video transcript

We use Whisper to transcribe get the audio from the video url.

In [None]:
import whisper
from pytubefix import YouTube
import tempfile

video_url = "https://www.youtube.com/watch?v=SGSOCuByo24&ab_channel=LexFridman" # https://www.youtube.com/watch?v=cdiD-9MMpb0"
yt = YouTube(video_url)
audio_stream = yt.streams.filter(only_audio=True).first()


transcription_model = whisper.load_model("base")

filename = "audio.mp4"
audio_stream.download(filename=filename)


In [None]:
with open("transcription.txt", "w", encoding="utf-8") as f:
    transcription = transcription_model.transcribe(filename, fp16=False, verbose=True)["text"].strip()
    f.write(transcription)
    
os.remove(filename)

print("Transcription complete and audio file deleted.")

Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:06.320]  The following is a conversation in Yalekun. He's considered to be one of the fathers of deep learning,
[00:06.320 --> 00:11.520]  which, if you've been hiding under a rock, is the recent revolution in AI that's captivated
[00:11.520 --> 00:17.440]  the world with the possibility of what machines can learn from data. He's a professor in New
[00:17.440 --> 00:23.920]  York University, a vice president and chief AI scientist at Facebook, and co-recipient the Turing
[00:23.920 --> 00:28.960]  Award for his work on deep learning. He's probably best known as the founding father of
[00:28.960 --> 00:34.400]  convolutional neural networks. In particular, their application to optical character recognition
[00:34.400 --> 00:41.760]  and the famed M-NIST data set. He is also an outspoken personality, unafraid to speak his mind in
[00:41.760 --> 00:47.

## Splitting the transcript into chunks

In [8]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'transcription.txt'}, page_content="The following is a conversation in Yalekun. He's considered to be one of the fathers of deep learning, which, if you've been hiding under a rock, is the recent revolution in AI that's captivated the world with the possibility of what machines can learn from data. He's a professor in New York University, a vice president and chief AI scientist at Facebook, and co-recipient the Turing Award for his work on deep learning. He's probably best known as the founding father of convolutional neural networks. In particular, their application to optical character recognition and the famed M-NIST data set. He is also an outspoken personality, unafraid to speak his mind in a distinctive French accent and explore provocative ideas both in the rigorous medium of academic research and the somewhat less rigorous medium of Twitter and Facebook. This is the Artificial Intelligence Podcast. If you enjoy it, subscribe on YouTube, give it 5 s

In [13]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)

In [14]:
print(len(documents))

48


## Embedding and Vector Database

In [7]:
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")


Manually create an index on [Pinecone](https://www.pinecone.io/) with dimension 3072 (to match the dimension of the embedding model we are using).

In [17]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)

# Checking that the index exists
index = pc.Index("youtube-rag-2")
print(index.describe_index_stats())

{'dimension': 3072,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 48}},
 'total_vector_count': 48,
 'vector_type': 'dense'}


In [18]:
from langchain_pinecone import PineconeVectorStore

index="youtube-rag-2"
vectorstore = PineconeVectorStore.from_existing_index(index_name=index, embedding=embeddings)


We process a limited number of documents per minute to respect the rate limit of the free tier of the Google GenAI API. Specifically, we delay each request to avoid exceeding the allowed number of requests per minute.


In [19]:
import time 

MAX_REQUESTS_PER_MINUTE = 4
REQUEST_INTERVAL = 60  # 60 seconds
DELAY_BETWEEN_DOCS = REQUEST_INTERVAL / MAX_REQUESTS_PER_MINUTE  # 15 seconds delay between documents

for idx, doc in enumerate(documents):
    print(f"Embedding document {idx + 1} of {len(documents)}...")

    # Embed and add to Pinecone
    vectorstore.add_documents([doc])

    print(f"Processed document {idx + 1}/{len(documents)}.")

    # Delay to stay within 2 requests per minute
    if idx < len(documents) - 1:  # Avoid sleeping after the last document
        print(f"Waiting {DELAY_BETWEEN_DOCS} seconds before processing the next document...")
        time.sleep(DELAY_BETWEEN_DOCS)



Embedding document 1 of 48...
Processed document 1/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 2 of 48...
Processed document 2/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 3 of 48...
Processed document 3/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 4 of 48...
Processed document 4/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 5 of 48...
Processed document 5/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 6 of 48...
Processed document 6/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 7 of 48...
Processed document 7/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 8 of 48...
Processed document 8/48.
Waiting 15.0 seconds before processing the next document...
Embedding document 9 of 48...
Processed document 9/48.
Waiting 15.0 seconds befo

In [None]:
vectorstore.similarity_search("What does Yann LeCun say about Model-based reinforcement learning?")[:3]

[Document(id='b3611c9e-0140-4228-9562-fbe32514d97f', metadata={'source': 'transcription.txt'}, page_content="idea of the effect of turning the wheel on the car and, you know, we know we need to stay on the road. So there's a lot of things that we bring to the table, which is basically our predictive model of the world. And that model allows us to not do stupid things and to basically stay within the context of things we need to do. We still face, you know, unpredictable situations and that's how we learn. But that allows us to learn really, really, really quickly. So that's called model-based reinforcement learning. There's some imitation and supervision because we have a driving instructor that tells us occasionally what to do. But most of the learning is learning the model, learning physics that we've done since we were babies. That's where almost all the learning physics is somewhat transferable from, it's transferable from syn descent. Stupid things are the same everywhere. Yeah. I

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

retriever = vectorstore.as_retriever()

chain = (
    {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | prompt | model | parser
)
response = chain.invoke("What does Yann LeCun say about Model-based reinforcement learning?")

In [23]:
response

'Yann LeCun describes model-based reinforcement learning as incorporating a predictive model of the world that allows for quick learning by avoiding "stupid things," while still allowing for learning from unpredictable situations.'