In [1]:
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

  from .autonotebook import tqdm as notebook_tqdm


# Indexing

In [72]:
vid_id = 'lDg7OolCNmE'

try:
    api = YouTubeTranscriptApi()
    transcript_list = api.list(video_id=vid_id).find_transcript(['en']).fetch()
    transcript = ' '.join(chunk.text for chunk in transcript_list)

except TranscriptsDisabled:
    print('no captions available for this video')

In [21]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=120)
chunks = splitter.create_documents([transcript])

In [22]:
len(chunks)

71

In [23]:
chunks[1]

Document(metadata={}, page_content="whims are these two gentlemen here, Bob and Wade. Wade, why are you making that face? I splashed water on me, and I'm I can't unwet. I don't know what that means, but I don't like it. I went to take a drink and somehow No, that's not the part I don't know what that means. I know about the water. I just I can't stop feeling wet. My all of my head just feels wet. It won't stop, you know. And I'm Bob. Hi, Bob. How are you? Wet. Oh, I'm excited to be here. Dry. Dry as dry as can be. What did the philosopher say? The unwet life is not worth living. I'm pretty sure. I have a specific small talk question. I know this isn't a thing we've ever done before, but you mentioned you were going to be playing basketball. Is that happening? And how is it going? Continue. No, I've not Well, okay. Not by my choice. I would love to. I never heard from my friend who was trying to put the league together. So, I'm thinking about joining like a YMCA or something just so I c

In [25]:
embeddings = OllamaEmbeddings(model='mxbai-embed-large')
vector_store = FAISS.from_documents(chunks, embeddings)

In [26]:
vector_store.index_to_docstore_id

{0: '2ee6f7b6-6205-40d7-b313-a4a6ec28f78f',
 1: 'fe0417da-4b59-4682-a3dc-f1949732c96e',
 2: '23eac3b4-727b-4abc-9f6b-6c1aa518bc8a',
 3: '7d2216b1-1c00-46da-9241-d7b618a4f3da',
 4: '2f350c2d-bd36-4d84-a3e6-4ba12129284f',
 5: '0490813f-15fe-44c8-bfc0-067c564e4f7d',
 6: '5e6b55cf-4645-4ce1-95c6-8e80e91a897b',
 7: 'e0f531da-ac86-458c-a478-e5087a2f142e',
 8: '813538d2-8eaa-4401-b45f-25bea913dcd6',
 9: '35a9270e-41da-4b4a-8cce-b2cbd4d71e66',
 10: '7ee41f19-cdb5-46b9-be71-806343cce84d',
 11: '7fdf88d9-258c-47e7-b6a1-2021ad42eb6a',
 12: '9b9ee50e-79f6-4a79-b751-cf8953f1582b',
 13: '682d1f3e-a588-459e-b552-4e84f749507d',
 14: '6a39515c-92df-49c9-8629-5805f048c21b',
 15: '2ed93bc3-98ca-42a7-859d-8cbebf8b11ba',
 16: '9e39590d-ec86-4590-808a-473e1dc175a4',
 17: '42a79312-47be-4e92-a7e5-48ca8e654ec2',
 18: '77e48b5e-b344-4d93-9f47-a763bd449a38',
 19: '17e3cff2-6871-4b22-a839-2ab36a4593a4',
 20: '7b072527-bc10-4e8e-8157-95e87ffeb27f',
 21: 'ac449071-4718-47dd-899f-a3e59a13fd18',
 22: '312fa79a-773e-

In [27]:
vector_store.get_by_ids(['8f50f84e-9cc6-4ca4-876e-2b703ef088ff'])

[Document(id='8f50f84e-9cc6-4ca4-876e-2b703ef088ff', metadata={}, page_content="cuz I'm dead. Death is the end we will all see one day. And therefore, I win by getting there now. Mmentoto mori. Well, uh I'm going to refine this idea. I want to get to more scenarios in the next one. Much like with N I'd win. I was I was going for that kind of vibe. I think I've got We've tested it once. The Perfect Crime Part Two will be better than this one. Even though this one was very funny. You guys did great. So, thank you so much uh for all of your hard work. Thank you all for all of your hard listening. For the listeners who came in third place viewers, you're on the [\xa0__\xa0] list. Keep up. You're not even on the scoreboard. What are you doing? Terrible record. Uh thank you everybody. Be sure to follow the podcast. Uh merch never shut up about it unless no. Uh follow these guys Lord Minion 777 and me Markiplier. Thank you. Podcast out.")]

# Retrieval

In [60]:
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 10})

In [61]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f078a91c310>, search_kwargs={'k': 10})

In [62]:
retriever.invoke('who are the two gentlemen?')

[Document(id='fe0417da-4b59-4682-a3dc-f1949732c96e', metadata={}, page_content="whims are these two gentlemen here, Bob and Wade. Wade, why are you making that face? I splashed water on me, and I'm I can't unwet. I don't know what that means, but I don't like it. I went to take a drink and somehow No, that's not the part I don't know what that means. I know about the water. I just I can't stop feeling wet. My all of my head just feels wet. It won't stop, you know. And I'm Bob. Hi, Bob. How are you? Wet. Oh, I'm excited to be here. Dry. Dry as dry as can be. What did the philosopher say? The unwet life is not worth living. I'm pretty sure. I have a specific small talk question. I know this isn't a thing we've ever done before, but you mentioned you were going to be playing basketball. Is that happening? And how is it going? Continue. No, I've not Well, okay. Not by my choice. I would love to. I never heard from my friend who was trying to put the league together. So, I'm thinking about 

# Augmentation

In [63]:
llm = ChatOllama(model='llama2', temperature=0.2)

In [64]:
template = '''
You are a helpful assistant.
Answer ONLY from the provided transcript context.
If the context is insufficient, just say you dont know.

{context}
Question: {question}
'''

prompt = PromptTemplate(
    template=template,
    input_variables=['context', 'question']
)

In [65]:
question = 'summarize the process of prison break in this video'
retrived_docs = retriever.invoke(question)

In [66]:
retrived_docs

[Document(id='9c49c831-0f8e-45f8-ab39-9c458d17448e', metadata={}, page_content="work. Yeah, you're fine. You're back in. Pepper spray is activated and you're standing at the front the the entrance just like but like a mime. Like a mime. Yeah. For the listeners, I was looking around like a mime. Imagine a mime looking around. That's what I was doing. Oh yes, we should do a silent mime episode sometime for the listeners. Wait, due to the shouting and the fact that he's now set up a trap and cut off one escape route, I'm adding a adding a disadvantage point here. So you're back down to minus one. He's he's he's being smart about this. I shouted. Clearly, they bought it. I open the cell. I toss the former prison guard in. And now it's time to just waltz out the normal guard entrance exit. I pull out my fake security badges. Prepare to flash them to the other guards and I walk toward what I wherever I presume may be the uh guards come in and out for the day. The exit door that I'm miming ri

In [67]:
context_text = '\n\n'.join(doc.page_content for doc in retrived_docs)

In [68]:
final_prompt = prompt.invoke({'context': context_text, 'question': question})

In [69]:
final_prompt

StringPromptValue(text='\nYou are a helpful assistant.\nAnswer ONLY from the provided transcript context.\nIf the context is insufficient, just say you dont know.\n\nwork. Yeah, you\'re fine. You\'re back in. Pepper spray is activated and you\'re standing at the front the the entrance just like but like a mime. Like a mime. Yeah. For the listeners, I was looking around like a mime. Imagine a mime looking around. That\'s what I was doing. Oh yes, we should do a silent mime episode sometime for the listeners. Wait, due to the shouting and the fact that he\'s now set up a trap and cut off one escape route, I\'m adding a adding a disadvantage point here. So you\'re back down to minus one. He\'s he\'s he\'s being smart about this. I shouted. Clearly, they bought it. I open the cell. I toss the former prison guard in. And now it\'s time to just waltz out the normal guard entrance exit. I pull out my fake security badges. Prepare to flash them to the other guards and I walk toward what I wher

# Generation

In [70]:
answer = llm.invoke(final_prompt)
print(answer.content)

In this video, Wade is attempting to break out of prison with the help of Bob, who is pretending to be a security guard. Here is a summary of the process:

1. Wade and Bob plan their escape by analyzing the prison's layout and security measures.
2. Bob sets up a trap for the prison guard by placing pepper spray in a location where the guard will trip and fall.
3. While the guard is distracted, Wade makes his way to the middle of the chamber between the two doors that lead out of the prison.
4. Wade sets his motion-activated pepper spray on the ground and waits for the guard to come through one of the doors.
5. When the guard enters the chamber, Wade activates the pepper spray, temporarily disabling him.
6. Wade then puts on a security guard uniform and pretends to be the knocked-out guard, allowing him to walk past the remaining guards without being detected.
7. Once outside, Wade must navigate through additional security measures, such as cameras and alarms, in order to reach freedom.

# Building a Chain

In [71]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [73]:
def format_docs(retrived_docs):
    context_text = '\n\n'.join(doc.page_content for doc in retrived_docs)
    return context_text

In [74]:
parallel_chain = RunnableParallel({
    'question': RunnablePassthrough(),
    'context': retriever | RunnableLambda(format_docs)
})

In [76]:
parallel_chain.invoke('where is the prison break located?')

{'question': 'where is the prison break located?',
 'context': 'That\'s a robbery in progress app. Okay. And I\'m in prison trying to get out or what? Am I breaking into the prison to break someone out? That\'s an interesting What would you rather do? Would you rather be breaking out? Cuz that might be more interesting. You\'re trying to break out. He\'s trying to keep you in. Let\'s go. That That That\'s actually better than trying to break someone else out. Okay. You have a lot of stuff for a prisoner. I sure do. And these are the only items I have. I can\'t just Yep. You have You\'re I mean, especially if you\'re a prisoner, you got nothing. But I will say you can use the environment to your advantage. But it\'s it\'s barebones. This is Alcatraz level prison. Let\'s just say it is Alcatraz. Hardest prison to break out of. It\'s been done before, but it\'s also in the middle of the ocean. And you have the world\'s shortest breath. That\'s a huge disadvantage. I love this for me. It\'

In [77]:
parser = StrOutputParser()
main_chain = parallel_chain | prompt | llm | parser

In [78]:
main_chain.invoke('what is the plot of this video?')

'The plot of this video appears to be a humorous retelling of a prison escape story, with the speaker playing the role of a warden attempting to catch a prisoner (played by Bob) who has escaped through a hole in the wall. The speaker uses various elements of humor and exaggeration throughout the story, such as referring to the prisoner as "the world\'s dumbest criminal" and using a color picker to compare the colors of different objects. The video ends with the speaker saying that Bob has escaped and the listener is left wondering what will happen next in the story.'