In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langsmith import traceable
import os

In [2]:
os.environ["LANGSMITH_PROJECT"] = "Youtube Project"

In [3]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(
    model_name="gpt-4o-mini",   
    openai_api_key=api_key,
    max_tokens=500             
)

In [4]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import YoutubeLoader


<h1>(a) Indexing Ingestion </h1>

In [5]:
from langchain.schema import Document

with open("script.txt", "r", encoding="utf-8") as f:
    content = f.read()


In [7]:
import re

cleaned = re.sub(r"[\d.:]|\n", "",content)

In [10]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([cleaned])

In [11]:
len(chunks)

318

In [12]:
chunks[10]

Document(metadata={}, page_content="to run it and there's my output i like pizza on the next line i'll type std two colons c out two left angle brackets for output i'll write a second line it's really good then i'll run this again you can press this icon to clear output uh oh we have a problem i like pizza it's really good all of this text is on one line what if you need the next line of text to be on well the next line when you need to move your cursor down to the next line you can follow some string of text with double left angle brackets for output std colon colon en dl that means end line and i'll do the same for my second line i'm going to save clear my output run this again yeah there we go i like pizza it's really good each line of text is on a different line another option for a new line that's better performance wise is to add a new line character within single quotes type backslash n and let's replace that here as well so i'm going to save clear my output run this again i lik

<h1>(b) Embedding + Vector Store</h1>

In [6]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")


In [None]:
vector_store = FAISS.from_documents(chunks, embeddings)


In [14]:
vector_store.index_to_docstore_id

{0: '84280cab-7aaa-4b82-8c87-f956ffda6ffd',
 1: '19558011-8f99-429f-8695-58b02a775f3a',
 2: '1024c529-05db-430f-8dfb-f73ff73cbe10',
 3: '6aa1a328-78ce-41f7-b174-f740c7b8f775',
 4: '46eb85ac-fc88-403c-894e-271c05e56b88',
 5: '9152d056-9e74-4e65-848a-1513e0e1f178',
 6: '1e3a49af-ae42-42b0-9c8e-64b99a954873',
 7: '7444643a-4aa9-4dc2-8e14-abeb434f523a',
 8: 'ed52116b-eddb-4689-9529-028249077d66',
 9: '60f7d3bb-f76f-440d-afa2-caf9ccb4fc62',
 10: 'b4349881-cfc1-443b-b182-42870cacdc92',
 11: '1de5bcf0-f98f-4444-959c-908e21d5d5c4',
 12: '3f8a6bec-0c75-4980-aaa4-a346c4c216be',
 13: '614f921f-577b-4302-abce-2dd7b691d08c',
 14: 'f4f6d5a7-6bb1-4b78-a277-9fc14ec2469e',
 15: 'e7795287-33bb-49f7-8a15-a069b584e61b',
 16: '791e10e3-0d94-46d7-8216-1ada8c07c34e',
 17: 'fc115f7a-6a52-4c36-b99e-5ca682ffc587',
 18: '7723b7c4-782f-4fd6-b4e3-a922701023e7',
 19: 'df05d38d-a1a6-49e2-a51f-076056f1f17e',
 20: '85d0bffb-cb7f-4705-9465-173873e78c3d',
 21: '5807f203-6453-4792-87cb-da80e342781c',
 22: '5c9fe4e1-7b5e-

In [15]:
vector_store.get_by_ids(['84280cab-7aaa-4b82-8c87-f956ffda6ffd'])

[Document(id='84280cab-7aaa-4b82-8c87-f956ffda6ffd', metadata={}, page_content="hey guys in this video you're going to write your first c plus program so sit back relax and well enjoy the show hey if you wouldn't mind please like comment and subscribe one like equals one prayer for the youtube algorithm i'm gonna tell you why you need to learn c plus plus c plus plus is a fast language like really fast it's commonly used in advanced graphics applications a few examples would include adobe applications video editing software anything that's graphics intensive c plus plus is considered a middle level language therefore it's commonly used with embedded systems and most importantly it's commonly used with creating video games i like video games like a lot compared to other programming languages you could say that c plus plus is a middle level language programming languages tend to be on a spectrum the higher level of programming languages the more it resembles human language languages that

In [18]:
vector_store.save_local("c++ script embedding")

In [7]:
vector_store = FAISS.load_local(
    "c++ script embedding", 
    embeddings, 
    allow_dangerous_deserialization=True
)


In [8]:
vector_store.get_by_ids(["84280cab-7aaa-4b82-8c87-f956ffda6ffd"])

[Document(id='84280cab-7aaa-4b82-8c87-f956ffda6ffd', metadata={}, page_content="hey guys in this video you're going to write your first c plus program so sit back relax and well enjoy the show hey if you wouldn't mind please like comment and subscribe one like equals one prayer for the youtube algorithm i'm gonna tell you why you need to learn c plus plus c plus plus is a fast language like really fast it's commonly used in advanced graphics applications a few examples would include adobe applications video editing software anything that's graphics intensive c plus plus is considered a middle level language therefore it's commonly used with embedded systems and most importantly it's commonly used with creating video games i like video games like a lot compared to other programming languages you could say that c plus plus is a middle level language programming languages tend to be on a spectrum the higher level of programming languages the more it resembles human language languages that

<h1>(c) Retrieval</h1>

In [9]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [10]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002035C073B60>, search_kwargs={'k': 5})

In [11]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [17]:
question          = "Explain the use of if else"
retrieved_docs    = retriever.invoke(question)

In [19]:
retrieved_docs

[Document(id='4a53ec19-39c6-4092-942b-5f7c053c4e0c', metadata={}, page_content="will display this message let's try this one more time and to your age i am  years old i hit enter you are too old to enter the site so yeah that's an if statement you do something if a condition is true if not then you don't do it if you would rather do something else you can use an else statement if there's other conditions you would like to check before reaching the else statement you can use an else if statement and there's really no limit to these if you would like a copy of this code i'll post this in the comments section down below and well yeah those are if statements in c plus all right everybody today we're going to discuss switches a switch is an alternative to using many else if statements it compares one value against many matching cases here's an example of what you don't want to do i have this program we have variable month month is of the integer data type the user will type in a number  thr

In [20]:
context_text = "\n\n\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"will display this message let's try this one more time and to your age i am  years old i hit enter you are too old to enter the site so yeah that's an if statement you do something if a condition is true if not then you don't do it if you would rather do something else you can use an else statement if there's other conditions you would like to check before reaching the else statement you can use an else if statement and there's really no limit to these if you would like a copy of this code i'll post this in the comments section down below and well yeah those are if statements in c plus all right everybody today we're going to discuss switches a switch is an alternative to using many else if statements it compares one value against many matching cases here's an example of what you don't want to do i have this program we have variable month month is of the integer data type the user will type in a number  through  to represent the month if month is equal to  it is january else if month\

In [21]:
final_prompt = prompt.invoke({"context": context_text, "question": question})
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      will display this message let's try this one more time and to your age i am  years old i hit enter you are too old to enter the site so yeah that's an if statement you do something if a condition is true if not then you don't do it if you would rather do something else you can use an else statement if there's other conditions you would like to check before reaching the else statement you can use an else if statement and there's really no limit to these if you would like a copy of this code i'll post this in the comments section down below and well yeah those are if statements in c plus all right everybody today we're going to discuss switches a switch is an alternative to using many else if statements it compares one value against many matching cases here's an example of what you don't want to do i

In [22]:
answer = llm.invoke(final_prompt)
print(answer.content)

An if else statement is used to execute different blocks of code based on whether a specified condition is true or false. If the condition in the if statement is true, the code within that block executes. If the condition is false, the code within the else block executes instead. This structure allows for decision-making in programming, enabling the program to respond differently to varying inputs or situations.

For example, in the provided context, if a user enters an age and the condition checks if the age is greater than or equal to a certain number, it would execute one block if the condition is true (e.g., "welcome to the site"), and another block if the condition is false (e.g., "you are not old enough to enter"). The order of the if and else if statements matters, as it determines which code block is executed based on the evaluated conditions.


In [23]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [24]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [25]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [27]:
parser = StrOutputParser()

In [28]:
main_chain = parallel_chain | prompt | llm | parser

In [29]:
main_chain.invoke('Can you summarize the video')

'The video serves as an introduction to C++ programming. It covers the basics of variables, explaining their declaration and assignment, and highlights the fill function to populate an array with predetermined values (pizza, hamburgers, hot dogs). Additionally, it emphasizes the performance of C++ as a fast, middle-level programming language commonly used in graphics applications, video games, and embedded systems. The video also includes an example of creating an object (a car) with attributes (make, model, year, color) and methods (accelerate, brake), illustrating object-oriented programming concepts. Viewers are encouraged to like, comment, and subscribe.'

In [30]:
main_chain.invoke("explain the loops")

'The transcript explains several types of loops in C++:\n\n1. **For Loop**: A for loop executes a block of code a specified number of times. It consists of three main components:\n   - An initialization statement (e.g., `int index = 1` or simply `int i = 1`).\n   - A stopping condition (e.g., `i <= 3`) that determines when the loop ends.\n   - An increment or decrement statement (e.g., `i++`) to update the counter after each iteration. The structure is `for(initialization; condition; increment) { //code to execute }`.\n\n2. **Do While Loop**: A do while loop executes a block of code once before checking a condition to determine if it should repeat. The structure is `do { //code to execute } while (condition);`. It guarantees that the code inside the loop runs at least once.\n\n3. **Nested Loops**: These are loops inside of other loops. The inner loop runs completely every time the outer loop runs once. It is important to use different index variables for each loop (commonly `i` for the

In [32]:
main_chain.invoke("what is pakistan")

"I don't know."