In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

#set the video you want to use:
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"

In [105]:
# Import the ChatOpenAI class for calling OpenAI chat models (like GPT-3.5, GPT-4) through LangChain
from langchain_openai.chat_models import ChatOpenAI

# Initialize an OpenAI chat model instance.
model = ChatOpenAI(openai_api_key = OPENAI_API_KEY, model='gpt-3.5-turbo')

In [106]:
#uncomment below to test:
#model.invoke("what is the best soccer team?")

In [107]:
## Import an output parser that converts the model's structured output into a plain Python string.
from langchain_core.output_parsers import StrOutputParser

# Create an instance of the string output parser.
parser = StrOutputParser()

# User Input → OpenAI Model → Parsed String Output
chain = model | parser

# uncomment below to test:
#chain.invoke('what is 2+2')

In [108]:
# Import ChatPromptTemplate, which lets you define reusable prompt templates with placeholders (variables) like {context} and {question}.
from langchain.prompts import ChatPromptTemplate

# Define a template that tells the model how to behave.
template = """
Answer the question based on the context below. If you can't answer the question, reply "Quack Quack".

Context: {context}

Question: {question}
"""

# Convert the raw text prompt into a structured LangChain ChatPromptTemplate.
prompt = ChatPromptTemplate.from_template(template)


In [109]:
# Input → Prompt → Model → Text Parser → Final Answer
chain = prompt | model | parser

# Test the chain by providing concrete values for the prompt variables.
chain.invoke({
    "context": "I love white cars",
    "question": "what color should my next car be?"
})

'White'

In [110]:
import tempfile
import whisper
from pytubefix import YouTube

# Only generate the transcription if it doesn't already exist. This prevents you from re-downloading and re-transcribing the same video every time you run the script.
if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO) # Initialize a YouTube object using the video URL. 'YOUTUBE_VIDEO' must be defined earlier in your code.
    audio = youtube.streams.filter(only_audio=True).first()  # Select the audio-only stream (no video) for faster download and smaller file size.

    #load the base model. not the most accurate but fast
    whisper_model = whisper.load_model("base")

    # Create a temporary directory to store the downloaded audio file. The directory is automatically cleaned up after the 'with' block exits.
    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir) ## Download the audio stream into the temporary directory.
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()         # Transcribe the audio using Whisper. fp16=False ensures compatibility on CPUs (prevents GPU-only errors). The result is a dict; we extract the "text" field and strip whitespace.

        # Save the transcription to a text file for later retrieval. This makes your script much faster on repeated runs.
        with open("transcription.txt", "w") as file:
            file.write(transcription)

In [111]:
# Open the saved transcription file and read its full content as a string. # This is used when the transcript was already generated earlier, so we don't need to re-run Whisper every time.
with open("transcription.txt") as file:
    transcription = file.read()

# Display the first 100 characters of the transcription.
transcription[:100]

"I think it's possible that physics has exploits and we should be trying to find them. arranging some"

In [112]:
# here we try to invoke the function, but the transcription is too big for the model so we get an error
try:
    chain.invoke({
        "context": transcription,
        "question": "Is reading paper a good idea?"
    })
except Exception as e:
    print(e)

Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 47046 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}


In [113]:
#We need to split the documents. This load the text file into langchain for splitting 
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()

#uncomment to test:
#text_documents

In [114]:
# Import a text splitter designed for splitting long documents into smaller chunks that work well with language models and embeddings.
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Create a text splitter with:
# - chunk_size=1000: each chunk will contain ~1000 characters
# - chunk_overlap=20: each chunk will overlap 20 characters with the next one
# Overlap helps preserve context continuity across chunks, improving retrieval quality.

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)

# Split the list of Document objects into many smaller Documents.
# Each resulting Document has:
# - a chunk of text
# - metadata referencing the source file
documents = text_splitter.split_documents(text_documents)

# Display the first 5 chunks for inspection.
text_splitter.split_documents(text_documents)[:5]

[Document(metadata={'source': 'transcription.txt'}, page_content="I think it's possible that physics has exploits and we should be trying to find them. arranging some kind of a crazy quantum mechanical system that somehow gives you buffer overflow, somehow gives you a rounding error in the floating point. Synthetic intelligences are kind of like the next stage of development. And I don't know where it leads to. Like at some point, I suspect the universe is some kind of a puzzle. These synthetic AIs will uncover that puzzle and solve it. The following is a conversation with Andre Kappathi, previously the director of AI at Tesla. And before that, at OpenAI and Stanford, he is one of the greatest scientist engineers and educators in the history of artificial intelligence. This is the Lex Friedman podcast to support it. Please check out our sponsors and now to your friends. Here's Andre Kappathi. What is a neural network? And what does it seem to do such a surprisingly good job of learning

In [115]:
# Import the Pinecone vector store wrapper for LangChain. This allows LangChain to interact with your Pinecone index using the standard VectorStore interface.
from langchain_pinecone import PineconeVectorStore

# Name of your Pinecone index. You must have already created this index in your Pinecone project dashboard.
index_name = "youtube-rag"

# Create a Pinecone vector store and upload all of your embedded document chunks.

# This step performs:
# 1. Embedding each chunk (if not already embedded)
# 2. Uploading vectors + metadata into Pinecone
# 3. Returning a VectorStore object wired to Pinecone

pinecone = PineconeVectorStore.from_documents(
    documents, embeddings, index_name=index_name
)

In [116]:
# Perform a similarity search in the Pinecone vector store. This retrieves the chunks whose embeddings are most similar to the embedding of the query.
# The result is a list of Document objects ranked by relevance. We display only the top 3 matches for inspection.
pinecone.similarity_search("What is Hollywood going to start doing?")[:3]

[Document(metadata={'source': 'transcription.txt'}, page_content="It's like high quality audio and you're speaking usually pretty clearly. I don't know what open AI's plans are either. Yeah, there's always fun projects basically. And stable diffusion also is opening up a huge amount of experimentation. I would say in the visual realm and generating images and videos and movies. I'll think like videos now. And so that's going to be pretty crazy. That's going to almost certainly work and it's going to be really interesting when the cost of content creation is going to fall to zero. You used to need a painter for a few months to paint a thing and now it's going to be speak to your phone to get your video. So Hollywood will start using it to generate scenes, which completely opens up. Yeah, so you can make a movie like Avatar eventually for under a million dollars. Much less. Maybe just by talking to your phone. I mean, I know it sounds kind of crazy. And then there'd be some voting mechan

In [118]:
from langchain.schema.runnable import RunnablePassthrough

# Build a full RAG chain using LangChain's composable "Runnable" syntax. Each step pipes its output into the next.
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

# Run the full RAG pipeline by simply passing a question.
# Under the hood:
#   1. The question is embedded
#   2. Pinecone finds similar chunks
#   3. These chunks are inserted into your prompt
#   4. The model answers using ONLY the retrieved context
chain.invoke("What is hollywood going to start doing? in details")

'Hollywood is going to start using stable diffusion to generate scenes, which will completely open up new possibilities for content creation. This means that Hollywood will be able to make movies like Avatar for under a million dollars, and possibly even less, just by talking to a phone. This technology will drastically reduce the cost of content creation and revolutionize the way movies are made.'