In [1]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaLLM,OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings

## 1. External Source


In [2]:
# 1. Indexing

video_id="Gfr50f6ZBvo"

try:
    transcript_list=YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])

    transcript="".join(chunk['text'] for chunk in transcript_list)

    print(transcript)

except TranscriptsDisabled:
    print("No Transcript File found")

the following is a conversation withdemus hasabisceo and co-founder of deepminda company that has published and buildssome of the most incredible artificialintelligence systems in the history ofcomputing including alfred zero thatlearnedall by itself to play the game of goldbetter than any human in the world andalpha fold two that solved proteinfoldingboth tasks considered nearly impossiblefor a very long timedemus is widely considered to be one ofthe most brilliant and impactful humansin the history of artificialintelligence and science and engineeringin generalthis was truly an honor and a pleasurefor me to finally sit down with him forthis conversation and i'm sure we willtalk many times again in the futurethis is the lex friedman podcast tosupport it please check out our sponsorsin the description and now dear friendshere's demishassabislet's start with a bit of a personalquestionam i an ai program you wrote tointerview people until i get good enoughto interview youwell i'll be imp

In [3]:
# 2. Text Splitting

splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

chunks=splitter.create_documents([transcript])

print(len(chunks))

print(chunks[101])

163
page_content='method asbeing one of the greatest ideashumanity's ever had and allowed us toprogress with our knowledgebut i think as a true scientist i thinkwhat you find is the more you find outuh you the more you realize we don'tknowand and i always think that it'ssurprising that more people don't aren'ttroubled you know every night i thinkabout all these things we interact withall the time that we have no idea howthey work timeconsciousness gravitylife we can't i mean these are all thefundamental things of nature i think theway we don't really know what they areto live life we uh pin certainassumptions on them and kind of treatour assumptions as if they're a factyeah that allows us to sort of box themoff somehow yeah box them offbut the reality is when you think oftimeyou should remind yourself you shouldput it off the shtake it off the shelf and realize likeno we have a bunch of assumptionsthere's still a lot of there's even nowa lot of debate there's a lot ofuncertainty about 

In [4]:
# 3. Embedding Generation and Vector Store

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(documents=chunks, embedding=embedding)

vector_store.index_to_docstore_id

  from .autonotebook import tqdm as notebook_tqdm


{0: 'cb6213ea-e5b0-4d63-a29f-fb5456e9fd35',
 1: '22f7f7be-c314-48b6-a587-e0a8f4c421d9',
 2: 'c7459432-e339-40a1-85c6-47079e125e47',
 3: '3011811b-cb91-43f9-aef7-485d5369f6cc',
 4: '30fa3806-5961-4414-89c6-e2f515930f76',
 5: '0ab4269f-12e6-4387-a546-61d68d4fd286',
 6: 'da8a3d58-3c5e-4835-bf26-6eb6c537c983',
 7: '1d451df7-d0ba-4c77-9234-de23446e5c07',
 8: 'b7cd2b47-02bf-470f-bf35-084cc618de91',
 9: '4ee870d2-697d-4890-95c2-f6dd15aa87e4',
 10: '65bb016e-5c9a-4f6d-8288-e1542bf888a5',
 11: '2fcb96e0-a197-46ab-9b37-0ce4d7f4bd14',
 12: '142c8e69-d4da-4511-ad30-f5bfe89da950',
 13: 'bd601bdf-84b7-48e5-a216-383b717108a9',
 14: '29ccd825-0ba8-4e75-abf2-f5b4050f113e',
 15: 'a69286c5-2074-4764-938d-78271d6252a9',
 16: 'bec13dce-0e16-47cc-ab8c-5f4cee09816f',
 17: 'b118ed74-cafd-45f9-aca6-51d982ba8c4a',
 18: 'e9a2b40d-9f40-440f-be6b-074a5c36fc19',
 19: 'c1fb6f7f-e071-4141-88dc-a250b4a713fb',
 20: '08973977-c44a-4f3d-9117-b6d9a0b3b66b',
 21: '71d3ca66-6ec7-41ba-b42a-19c748656438',
 22: '43d180f9-a53f-

In [12]:
vector_store.get_by_ids(['ab1d938d-3742-4291-bfb5-8ec30bccd300'])[0].page_content

"giving youglimpses ofwhat things you totally missed in thephysics of todayexactly just here here's glimpses of nolike there's a muchuha much more elaborate world or a muchsimpler world or somethinga much deeper maybe simpler explanationyes of things right than the standardmodel of physics which we know doesn'twork but we still keep adding to so umand and that's how i think the beginningof an explanation would look and itwould start encompassing many of themysteries that we have wondered aboutfor thousands of years like you knowconsciousnessuh life and gravity all of these thingsyeah giving us a glimpses ofexplanations for those things yeahwell umdamas dear one of the specialhuman beings in this giant puzzle ofours and it's a huge honor that youwould take a pause from the biggerpuzzle to solve this small puzzle of aconversation with me today it's truly anhonor and a pleasure thank you thank youi really enjoyed it thanks lexthanks for listening to thisconversation with demas establish"

## 2. Retrival


In [13]:
retriver=vector_store.as_retriever(search_type='similarity',search_kwarge={"k":4})

In [14]:
retriver

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000017C343863C0>, search_kwargs={})

In [15]:
retriver.invoke("Tell me about the Aliens")

[Document(id='a6d4bc55-69d0-4012-81fe-55616eddd03a', metadata={}, page_content="search yet and maybewe're looking in the wrong bands and andwe've got the wrong devices and wewouldn't notice what an alien form waslike to be so different to what we'reused to but you know i'm not i don'treally buy that that it shouldn't be asdifficult as that like we i think we'vesearched enough there should be if itwere everywhere if it was it should beeverywhere we should see dyson's fearsbeing put up sun's blinking in and outyou know there should be a lot ofevidence for those things and then thereare other people argue well the sort ofsafari view of like well we're aprimitive species still because we'renot space faring yet and and and we'reyou know there's some kind of globe likeuniversal rule not to interfere startrek rule but like look look we can'teven coordinate humans to deal withclimate change and we're one specieswhat is the chance that of all of thesedifferent human civilization you knowalien c

## 3. Augmentation


In [16]:
prompt=PromptTemplate(
    template="""
You are a helpful assistant.
Answer ONLY from the Transcript Provided to you.
If Context is insufficient just say don't know.

{context}
Question:{question}
""",
input_variables=['context','question']
)

In [17]:
llm=OllamaLLM(model="llama3.2")

In [18]:
question = "Discussion about the Aliens happened in the video?"

retrived_docs = retriver.invoke(question)

In [19]:
retrived_docs

[Document(id='5bb19db1-a262-47ef-851c-a8a2dbb8d5bc', metadata={}, page_content="and you know we've people likedyson have thought about constructingdyson spheres around stars to collectall the energy coming out of the staryou know that there would beconstructions like that would be visibleacross base um probably even across agalaxy so and then you know if you thinkabout all of our radio television uhemissions that have gone out since sincethe you know 30s and 40s um imagine amillion years of that and now hundredsof civilizations doing that when weopened our ears at the point we gottechnologically sophisticated enough inthe space age we should haveheard a cacophony of voices we shouldhave joined that cacophony of voices andwhat we did we opened our ears and weheard nothingand many people who argue that there arealiens would say well we haven't reallydone exhaustive search yet and maybewe're looking in the wrong bands and andwe've got the wrong devices and wewouldn't notice what an alien 

In [20]:
context_text="\n\n".join(doc.page_content for doc in retrived_docs)
context_text

"and you know we've people likedyson have thought about constructingdyson spheres around stars to collectall the energy coming out of the staryou know that there would beconstructions like that would be visibleacross base um probably even across agalaxy so and then you know if you thinkabout all of our radio television uhemissions that have gone out since sincethe you know 30s and 40s um imagine amillion years of that and now hundredsof civilizations doing that when weopened our ears at the point we gottechnologically sophisticated enough inthe space age we should haveheard a cacophony of voices we shouldhave joined that cacophony of voices andwhat we did we opened our ears and weheard nothingand many people who argue that there arealiens would say well we haven't reallydone exhaustive search yet and maybewe're looking in the wrong bands and andwe've got the wrong devices and wewouldn't notice what an alien form waslike to be so different to what we'reused to but you know i'm not i\n\n

In [21]:
final_prompt=prompt.invoke({
    "context":context_text,
    "question":question
})

In [22]:
final_prompt

StringPromptValue(text="\nYou are a helpful assistant.\nAnswer ONLY from the Transcript Provided to you.\nIf Context is insufficient just say don't know.\n\nand you know we've people likedyson have thought about constructingdyson spheres around stars to collectall the energy coming out of the staryou know that there would beconstructions like that would be visibleacross base um probably even across agalaxy so and then you know if you thinkabout all of our radio television uhemissions that have gone out since sincethe you know 30s and 40s um imagine amillion years of that and now hundredsof civilizations doing that when weopened our ears at the point we gottechnologically sophisticated enough inthe space age we should haveheard a cacophony of voices we shouldhave joined that cacophony of voices andwhat we did we opened our ears and weheard nothingand many people who argue that there arealiens would say well we haven't reallydone exhaustive search yet and maybewe're looking in the wrong 

## 4. Generation


In [23]:
final_ans=llm.invoke(final_prompt)
final_ans

'Yes, discussion about aliens is present in the transcript. The speaker expresses their opinion that we are likely alone in the universe, citing a lack of evidence from extensive searches such as the SETI program, and suggesting that other civilizations should be visible if they exist. They also discuss various possibilities for extraterrestrial life forms and communication methods.'