In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


## Model

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

model = ChatGoogleGenerativeAI(google_api_key=GOOGLE_API_KEY, model="gemini-1.5-flash")
parser = StrOutputParser()


Defining a prompt template:

In [4]:
from langchain.prompts import ChatPromptTemplate

prompt_template = '''
Answer the question based on the context below. Respond in a full sentence. If you can't answer the question reply "I don't know."

Context = {context}

Question = {question}
'''

prompt = ChatPromptTemplate.from_template(prompt_template)


In [5]:
chain = prompt | model | parser

In [6]:
# Testing the chain
chain.invoke({"context":"Mary's sister is Anna", "question":"Who is Mary's sister"})

"Mary's sister is Anna."

## Getting the video transcript

We use Whisper to transcribe get the audio from the video url.

In [5]:
import whisper
from pytubefix import YouTube
import tempfile

video_url = "https://www.youtube.com/watch?v=mxqOPdEUNTs&t=324s&ab_channel=PracticalEngineering" #"https://www.youtube.com/watch?v=SGSOCuByo24&ab_channel=LexFridman" # https://www.youtube.com/watch?v=cdiD-9MMpb0"
yt = YouTube(video_url)
audio_stream = yt.streams.filter(only_audio=True).first()


transcription_model = whisper.load_model("base")

filename = "audio.mp4"
audio_stream.download(filename=filename)


'd:\\LEGION\\Documents\\Learning Stuff\\Projects\\RAG-video-qa\\audio.mp4'

In [6]:
with open("transcription.txt", "w", encoding="utf-8") as f:
    transcription = transcription_model.transcribe(filename, fp16=False, verbose=True)["text"].strip()
    f.write(transcription)
    
os.remove(filename)

print("Transcription complete and audio file deleted.")

Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:07.440]  This is the Carl's Bad desalination plant outside San Diego, California.
[00:07.440 --> 00:14.160]  It produces roughly 10% of the area's fresh water, around 50 million gallons or 23,000 cubic
[00:14.160 --> 00:15.680]  meters per day.
[00:15.680 --> 00:19.880]  Unlike most treatment plants that clean up water from rivers or lakes, the Carl's Bad
[00:19.880 --> 00:23.960]  Plant pulls its water directly from the ocean.
[00:23.960 --> 00:29.120]  Desalination, or the removal of salt from sea water, is one of those technologies that is
[00:29.120 --> 00:31.520]  always seemed right on the horizon.
[00:31.520 --> 00:36.360]  It might surprise you to learn that there are more than 18,000 desalination plants operating
[00:36.360 --> 00:37.920]  across the globe.
[00:37.920 --> 00:43.080]  But those plants provide less than a percent of global wa

## Splitting the transcript into chunks

In [7]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'transcription.txt'}, page_content="This is the Carl's Bad desalination plant outside San Diego, California. It produces roughly 10% of the area's fresh water, around 50 million gallons or 23,000 cubic meters per day. Unlike most treatment plants that clean up water from rivers or lakes, the Carl's Bad Plant pulls its water directly from the ocean. Desalination, or the removal of salt from sea water, is one of those technologies that is always seemed right on the horizon. It might surprise you to learn that there are more than 18,000 desalination plants operating across the globe. But those plants provide less than a percent of global water needs, even though they consume a quarter of all the energy used in the water industry. I live like 100 miles away from the nearest sea, so it's easier for me to mix up my own batch of sea water right here in the studio. There are two main weights we use to desalinate water, and I've got some garage demonstrations to sh

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)
print(len(documents))

21


## Embedding and Vector Database

In [9]:
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")


Manually create an index on [Pinecone](https://www.pinecone.io/) with dimension 3072 (to match the dimension of the embedding model we are using).

In [10]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "youtube-rag-3"

# Check if the index already exists
if index_name not in [index.name for index in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",       
            region="us-east-1" 
        )
    )
    print(f"Index '{index_name}' created.")
else:
    print(f"Index '{index_name}' already exists.")

index = pc.Index(index_name)

print(index.describe_index_stats())


Index 'youtube-rag-3' already exists.
{'dimension': 3072,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}


In [11]:
from langchain_pinecone import PineconeVectorStore

index="youtube-rag-3"
vectorstore = PineconeVectorStore.from_existing_index(index_name=index, embedding=embeddings)


We process a limited number of documents per minute to respect the rate limit of the free tier of the Google GenAI API. Specifically, we delay each request to avoid exceeding the allowed number of requests per minute.


In [12]:
import time 

MAX_REQUESTS_PER_MINUTE = 5
REQUEST_INTERVAL = 60  # 60 seconds
DELAY_BETWEEN_DOCS = REQUEST_INTERVAL / MAX_REQUESTS_PER_MINUTE  # 15 seconds delay between documents

for idx, doc in enumerate(documents):
    print(f"Embedding document {idx + 1} of {len(documents)}...")

    # Embed and add to Pinecone
    vectorstore.add_documents([doc])

    print(f"Processed document {idx + 1}/{len(documents)}.")

    # Delay to stay within 2 requests per minute
    if idx < len(documents) - 1:  # Avoid sleeping after the last document
        print(f"Waiting {DELAY_BETWEEN_DOCS} seconds before processing the next document...")
        time.sleep(DELAY_BETWEEN_DOCS)



Embedding document 1 of 21...
Processed document 1/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 2 of 21...
Processed document 2/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 3 of 21...
Processed document 3/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 4 of 21...
Processed document 4/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 5 of 21...
Processed document 5/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 6 of 21...
Processed document 6/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 7 of 21...
Processed document 7/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 8 of 21...
Processed document 8/21.
Waiting 12.0 seconds before processing the next document...
Embedding document 9 of 21...
Processed document 9/21.
Waiting 12.0 seconds befo

In [13]:
vectorstore.similarity_search("How much salt is in ocean saltwater?")[:3]

[Document(id='a2d8a47e-75ab-4761-b410-c29dc82a49d3', metadata={'source': 'transcription.txt'}, page_content="percent, but have you ever heard of per mil? That another circle blew the slash, and now instead of parts per hundred, this symbol means parts per thousand, which is the perfect unit to talk about salinity. The salinity of the ocean actually varies a little bit geographically and through the seasons, but in general, every liter of sea water usually has around 35 grams of dissolved salt. In other words, 35 parts per thousand or 35 per mil. That means for this bucket, I need about this much salt to match the salinity of sea water. I didn't get a dead-on, but this is close enough for our demo. Looks like a lot of salt, but I could dissolve about 10 times as much in the water before the solution becomes saturated and won't hold anymore. So compared to how salty it could be, sea water isn't that far from freshwater. But compared to how salty it should be in order to be okay to drink 

In [16]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

retriever = vectorstore.as_retriever()

chain = (
    {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()}
    | prompt | model | parser
)
response = chain.invoke("How much salt is in ocean saltwater?")
response

'Ocean saltwater usually has around 35 grams of dissolved salt per liter, or 35 parts per thousand.'