In [2]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

### Step 1a - Indexing (Document Ingestion)

In [22]:
video_id = "xt5Z0CBP2J4" # only the ID, not full URL

try:
    # If you don’t care which language, this returns the “best” one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)
except TranscriptsDisabled:
    print("No Transcript available for this video")

They drizzled it all across. Is that the right word? Drizzled. Megan Fox just just emailed me. Sorry. Okay, we'll read it for the class. It's for a beep. It's for a beeper party. Anyway, um No, no, no, no, no. Now we need to know. That's You can't just leave that. You need to read that email. Just park that in the side and move on. Megan Fox. Is it the real Megan Fox or is it like Well, she is real. Yo, what is up people of the internet? Welcome back to another episode of the Waveform Podcast. We're your hosts. I'm Marquez. I'm Andrew. And I'm David. This was WWDC week. Apple had their uh their their software announcement festivities slash an what? I don't know what you want to call it. It's a developer conference, but it's also like unveiling a bunch of new stuff. Ragger. And uh yeah, there's there's lots to talk about. There's liquid glass. There is iPad turning into a computer finally. You guys are stuff. I didn't do anything. You didn't say it yet. Uh fake workout buddies. Everythi

In [23]:
transcript_list

[{'text': 'They drizzled it all across. Is that the',
  'start': 0.16,
  'duration': 5.04},
 {'text': 'right word? Drizzled.', 'start': 2.56, 'duration': 6.079},
 {'text': 'Megan Fox just just emailed me.', 'start': 5.2, 'duration': 5.6},
 {'text': 'Sorry.', 'start': 8.639, 'duration': 3.681},
 {'text': "Okay, we'll read it for the class. It's",
  'start': 10.8,
  'duration': 3.28},
 {'text': "for a beep. It's for a beeper party.",
  'start': 12.32,
  'duration': 4.799},
 {'text': 'Anyway, um No, no, no, no, no. Now we',
  'start': 14.08,
  'duration': 4.24},
 {'text': "need to know. That's You can't just",
  'start': 17.119,
  'duration': 2.881},
 {'text': 'leave that. You need to read that email.',
  'start': 18.32,
  'duration': 3.2},
 {'text': 'Just park that in the side and move on.',
  'start': 20.0,
  'duration': 3.68},
 {'text': 'Megan Fox. Is it the real Megan Fox or',
  'start': 21.52,
  'duration': 7.32},
 {'text': 'is it like Well, she is real.', 'start': 23.68, 'duration':

### Step 1b - Indexing (Text Splitting)

In [24]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
chunks = splitter.create_documents([transcript])

In [25]:
len(chunks)

123

In [26]:
chunks[0]

Document(metadata={}, page_content="They drizzled it all across. Is that the right word? Drizzled. Megan Fox just just emailed me. Sorry. Okay, we'll read it for the class. It's for a beep. It's for a beeper party. Anyway, um No, no, no, no, no. Now we need to know. That's You can't just leave that. You need to read that email. Just park that in the side and move on. Megan Fox. Is it the real Megan Fox or is it like Well, she is real. Yo, what is up people of the internet? Welcome back to another episode of the Waveform Podcast. We're your hosts. I'm Marquez. I'm Andrew. And I'm David. This was WWDC week. Apple had their uh their their software announcement festivities slash an what? I don't know what you want to call it. It's a developer conference, but it's also like unveiling a bunch of new stuff. Ragger. And uh yeah, there's there's lots to talk about. There's liquid glass. There is iPad turning into a computer finally. You guys are stuff. I didn't do anything. You didn't say it ye

### Step 1c and 1d - Indexing (Embedding Generation and Storing in Vector Store)

In [27]:
embedding = OpenAIEmbeddings(model="text-embedding-3-small" )
vector_store = FAISS.from_documents(chunks, embedding)

In [28]:
vector_store.index_to_docstore_id

{0: 'f892ca06-db8f-4466-acf6-b70c78db74ed',
 1: '0f234cfe-0ea4-4bf4-a19d-7cacc06179b9',
 2: '7314792c-e1eb-4d81-9506-5bc22a0bd7d1',
 3: 'f0edf2cf-d6dc-43d5-b6cb-4cd300b4e9cf',
 4: 'afa15277-7044-4938-9936-bdcfa5397da1',
 5: 'c8721191-fcc5-487a-8e3d-7184f729be87',
 6: '269ea48b-9139-459b-938f-7cad936a31e9',
 7: '120a2c71-757f-443c-bdb9-64913fed2467',
 8: '2a7f38de-0bc0-40bb-b060-4a7960a280ed',
 9: 'a37b3293-b168-40d7-8889-989148a2dde3',
 10: 'bc614be9-7aee-4dee-bb6b-451661608cb5',
 11: 'e1bda58a-257f-4684-b81b-ec3978c9e436',
 12: 'b43ea247-2b83-4d85-a81a-7b058dcf962e',
 13: '0d1f2ca0-13bd-4b52-b760-87d1bdf4690d',
 14: '9c5f3052-6a70-4340-83ea-2f715ab7b04b',
 15: '4ce1543f-a839-4f08-916f-43821241fda0',
 16: 'b798e64c-755e-45ce-b3b6-4deae79f3d29',
 17: '1481e29c-957a-426b-b1c9-3b9a6b7c1eae',
 18: 'e1e1d24e-60b6-48bb-aa94-1afda83af45f',
 19: '39c93263-e9f9-424e-ad13-758434829620',
 20: '70ed8bab-d2ab-4188-9e41-d131ac3748e0',
 21: '59910ea8-7c7f-45e6-b46f-cfad514c4c26',
 22: '93691e20-232d-

In [None]:
# vector_store.get_by_ids([' '])

### Step2 - Retrieval

In [29]:
retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs = {"k":4})

In [30]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000280AC97D7D0>, search_kwargs={'k': 4})

In [32]:
retriever.invoke('What features did apple release in there overhaul iOS')

[Document(id='774f00ef-abdd-46dd-8e2a-8395cabe6eda', metadata={}, page_content='thing. It was kind of shocking. It was just teared up. Crazy. Like they did all they added full free form windows. Like you can just drag from the corner and make a bunch of windows as whatever size you want. uh multiple overlapping windows, oncreen, offscreen, full screen. They also added a menu bar at the top and window controls on those windows. And yeah, that the stoplight menu for like closing or minimizing or change the size of windows. There\'s a new redesigned files app. There is a new audio input selector, which I also confirmed will work with Bluetooth microphones. I used to have to I\'ve been paying for apps to do this for like years. Every time I test Bluetooth headphones, I\'m like, "How do I do this test? This is going to be the worst part." Yeah. Um, there are so many things in iPad OS. Uh, the pre the new preview app, you can export to different file sizes and types. Like all of this stuff, 

### Step3 Augmentation

In [33]:
model = ChatOpenAI(model = "gpt-4.1-nano-2025-04-14",  temperature = 0.2, max_completion_tokens = 200)

In [34]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context','question']
)

In [35]:
question = "What features did apple release in there overhaul iOS"
retrieved_docs = retriever.invoke(question)

In [36]:
retrieved_docs

[Document(id='774f00ef-abdd-46dd-8e2a-8395cabe6eda', metadata={}, page_content='thing. It was kind of shocking. It was just teared up. Crazy. Like they did all they added full free form windows. Like you can just drag from the corner and make a bunch of windows as whatever size you want. uh multiple overlapping windows, oncreen, offscreen, full screen. They also added a menu bar at the top and window controls on those windows. And yeah, that the stoplight menu for like closing or minimizing or change the size of windows. There\'s a new redesigned files app. There is a new audio input selector, which I also confirmed will work with Bluetooth microphones. I used to have to I\'ve been paying for apps to do this for like years. Every time I test Bluetooth headphones, I\'m like, "How do I do this test? This is going to be the worst part." Yeah. Um, there are so many things in iPad OS. Uh, the pre the new preview app, you can export to different file sizes and types. Like all of this stuff, 

In [37]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

'thing. It was kind of shocking. It was just teared up. Crazy. Like they did all they added full free form windows. Like you can just drag from the corner and make a bunch of windows as whatever size you want. uh multiple overlapping windows, oncreen, offscreen, full screen. They also added a menu bar at the top and window controls on those windows. And yeah, that the stoplight menu for like closing or minimizing or change the size of windows. There\'s a new redesigned files app. There is a new audio input selector, which I also confirmed will work with Bluetooth microphones. I used to have to I\'ve been paying for apps to do this for like years. Every time I test Bluetooth headphones, I\'m like, "How do I do this test? This is going to be the worst part." Yeah. Um, there are so many things in iPad OS. Uh, the pre the new preview app, you can export to different file sizes and types. Like all of this stuff, uh, the new files app can let you sort by or organize by showing different type

In [38]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [39]:
final_prompt

StringPromptValue(text='\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don\'t know.\n\n      thing. It was kind of shocking. It was just teared up. Crazy. Like they did all they added full free form windows. Like you can just drag from the corner and make a bunch of windows as whatever size you want. uh multiple overlapping windows, oncreen, offscreen, full screen. They also added a menu bar at the top and window controls on those windows. And yeah, that the stoplight menu for like closing or minimizing or change the size of windows. There\'s a new redesigned files app. There is a new audio input selector, which I also confirmed will work with Bluetooth microphones. I used to have to I\'ve been paying for apps to do this for like years. Every time I test Bluetooth headphones, I\'m like, "How do I do this test? This is going to be the worst part." Yeah. Um, there are so many things in iPad

### Step 4 - Generation

In [40]:
answer = model.invoke(final_prompt)
print(answer.content)

Apple released several features in their overhaul of iOS, including a redesigned phone app with a contacts cards favorites list at the top, a simpler interface with only three options at the bottom, and a new liquid glass design with submenus for calls, missed calls, voicemails, and spam. They also introduced a new preview app that allows exporting to different file sizes and types, a new files app that can organize by different types, and a new audio input selector that works with Bluetooth microphones. Additionally, there is a new menu bar at the top, window controls on windows, and a stoplight menu for closing, minimizing, or resizing windows.


### Step 5 - Building a chain

In [41]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [42]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [43]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [44]:
parallel_chain.invoke('What is the next iOS called')

{'context': 'one cutout of like that\'s pretty good, but the more you look at it, the more blocky and like chopped off like old bad portrait mode. Yeah. Yeah. Wait, Marquez, could you drop that video in the Slack for the big ballers already uploading? Sweet. Yes. So, yeah, I guess we can we can talk about any number of these OSs. They are all labeled version 26 now. So if you were thinking, "Oh, this is only the third Vision OS." Oh, we know. It\'s just they call it Vision OS 26 now because they\'re all 26. We talked about this last week how it\'s feels futuristic even though it comes out during 2025. I mean, who doesn\'t want iOS 26 at WWDC2? Exactly. On the Galaxy S25 Ultra. Someone brought up an interesting point. Are they going to call the new iPhone the iPhone 26 now? cuz it\'s going to be a little awkward to have the iPhone 17 running iOS 26. They\'re going to call it the iPhone 9. Oh god. Well, we Well, it didn\'t match before though. Yeah. True. Yeah. I don\'t think they\'re go

In [45]:
parser = StrOutputParser()

In [46]:
main_chain = parallel_chain | prompt | model | parser

In [48]:
main_chain.invoke('What is next iOS called')

'The next iOS is called iOS 26.'

### Streamlit Implementation