# Veritasium - Agents

## Downloading & Importing Libraries

In [1]:
!pip install langchain langchain_community langchain_openai langchain-pinecone pinecone-client wikipedia sentence-transformers



In [19]:
import os
import re
from google.colab import files
from google.colab import userdata
from google.colab import runtime

from pinecone import Pinecone, ServerlessSpec
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import format_document
from langchain.schema.runnable import RunnableMap, RunnableSequence, RunnablePassthrough
from langchain.chains import RetrievalQA, LLMChain
from langchain.vectorstores import Pinecone as LCPinecone
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from sentence_transformers import SentenceTransformer

import logging
import requests
from collections import defaultdict
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import wikipedia
import numpy as np


nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
OPENAI_API_KEY = userdata.get('Ironhack-GPT')
PC_API_KEY = userdata.get('PineCone')
HF_TOKEN = userdata.get('HF')


os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN

## Initialize and Retrieve Embeddings


In [4]:
# Initialize Pinecone
pc = Pinecone(api_key=PC_API_KEY)

# Initialize the Pinecone index
index_name = "veritasium-vs-final"
pinecone_index = pc.Index(index_name)

# Initialize embeddings
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model='text-embedding-ada-002')

# Initialize LangChain Pinecone vector store with the summary as text_key
vector_store = LCPinecone(
    index=pinecone_index,
    embedding=embeddings_model,
    text_key="transcription"
)

  warn_deprecated(


In [5]:
# Initialize the Chat LLM with model_kwargs
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

# Define the prompt template
LLM_CONTEXT_PROMPT = ChatPromptTemplate.from_template(
    """You are an assistant for question-answering tasks. Use the following pieces of retrieved info from Veritasium videos to answer the question. If the info doesn't help, just say that you don't know and be concise in your response. else if the retrieved info is helpful, be as verbose and educational in your response as possible.

    Context: {context}
    Question: "{question}"
    Answer:
    """
)

# Create the LLM chain with the prompt template
llm_chain = LLMChain(prompt=LLM_CONTEXT_PROMPT, llm=llm)

# Create the retriever
retriever = vector_store.as_retriever()

# Set up the retrieval-based QA chain using RetrievalQA.from_chain_type
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

  warn_deprecated(


## Chat tests

In [6]:
# Test response
response = pinecone_index.fetch(ids=["vVKFBaaL4uM_1"])
print(response)

{'namespace': '',
 'usage': {'read_units': 1},
 'vectors': {'vVKFBaaL4uM_1': {'id': 'vVKFBaaL4uM_1',
                               'metadata': {'category': 'Physics',
                                            'chunk_id': 'vVKFBaaL4uM_1',
                                            'description': 'einstein s classic '
                                                           'thought experiment '
                                                           'involves sitting '
                                                           'on a train '
                                                           'travelling at the '
                                                           'speed of light if '
                                                           'you hold a mirror '
                                                           'in front of your',
                                            'published_at': '2011-11-01T20:05:36Z',
                                          

In [7]:
# Example query
query = "Which country has the lowest vaccination rate in the world?"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'Which country has the lowest vaccination rate in the world?', 'result': "I don't have information to answer your question.", 'source_documents': [Document(metadata={'category': 'Biology', 'chunk_id': '7ziWrneMYss_3', 'description': 'this video is sponsored by brilliant the first 200 people to sign up via https brilliant org veritasium get 20 off a yearly', 'published_at': '2022-03-22T11:55:53Z', 'summary': "In 1870, a British military doctor, Edward Nicholson, was stationed in Burma. He noticed that the older snake handlers were less affected by accidental bites than the younger ones. 20 years later in Saigon, a French medical researcher named Albert Calmet was vaccinating local residents against smallpox. He wondered if it was possible to make a vaccine for snake bites. Back in Paris, he tried injecting rabbits with a tiny amount of cobra venom, starting with just.03 milligrams. After 8 months of this, the rabbits were receiving 15 times the normal lethal d

In [8]:
# Example query
query = "what us special about the number 37?"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'what us special about the number 37?', 'result': 'The number 37 has several unique qualities that make it special. It appears in various forms and contexts in everyday life, mathematics, and even human behavior. Some interesting aspects of the number 37 include:\n- It is a prime number, which means it is only divisible by 1 and itself.\n- It has significance in random number selection, with many people perceiving it as a more random choice compared to other numbers.\n- It is a part of several interesting mathematical properties, such as being a permutable prime, a lucky prime, and a Padovan prime.\n- The number 37 is prominently featured in various scenarios and occurrences, leading to a sense of attraction or fascination with the number among individuals.\n\nOverall, the number 37 holds a special place in mathematics, human perception, and everyday occurrences, making it an intriguing and unique number.', 'source_documents': [Document(metadata={'category': 

In [9]:
# Example query
query = "what do you know about snake bites?"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'what do you know about snake bites?', 'result': "I know that snake venom can be very dangerous and can have different effects on the human body depending on the type of snake. The venom can be neurotoxic, hemotoxic, cytotoxic, or myotoxic, causing various issues such as paralysis, internal bleeding, tissue damage, and more. The process of creating antivenom involves injecting a large organism like a horse with a dilute solution of the venom to produce antibodies that can neutralize the venom in case of a snake bite. It's also important to stay calm and immobilize the affected limb if bitten by a snake.", 'source_documents': [Document(metadata={'category': 'Biology', 'chunk_id': '7ziWrneMYss_2', 'description': 'this video is sponsored by brilliant the first 200 people to sign up via https brilliant org veritasium get 20 off a yearly', 'published_at': '2022-03-22T11:55:53Z', 'summary': 'A bite from the inland taipan has enough venom to kill half a million mice

In [10]:
# Example query
query = "who's the president of Spain??"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': "who's the president of Spain??", 'result': "I don't know.", 'source_documents': [Document(metadata={'category': 'Space', 'chunk_id': '6YOz9Pxnzho_1', 'description': 'what it s like to see the earth from orbit special thanks to col chris hadfield for chatting with me http chrishadfield ca space', 'published_at': '2015-02-09T16:33:14Z', 'summary': '"I\'m confident this isn\'t the end of the world. This is just a problem that we\'re facing that is going to change things, but we\'re going to have to figure out a way to deal with it," he says. "It\'s us or me or I that has to make the change"', 'title': 'an astronaut s view of earth', 'url': 'https://www.youtube.com/watch?v=6YOz9Pxnzho', 'video_id': '6YOz9Pxnzho'}, page_content='individually That s who has to make the change You can t say they or him or her or it It s us or me or I that has to make the change and it s not going to be perfect and it s going to have to get a little bit critical before people are tr

In [11]:
# Example query
query = "give me 5 topics you know about physics??"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'give me 5 topics you know about physics??', 'result': '1. Newtonian physics and the concept of force causing changes in speed.\n2. General relativity and how it explains why all objects fall at the same rate.\n3. Kinetic energy, its relationship to mass and velocity, and its significance in satellite technology.\n4. The physics behind phenomena like the phone flip, electric charge attraction, magnetic cereal, and tea bag rocket.\n5. The concept of gravitational mass and inertial mass and their relationship in general relativity.', 'source_documents': [Document(metadata={'category': 'Physics', 'chunk_id': '1Xp_imnO6WE_0', 'description': 'five cool physics tricks but how do they work explanations http youtu be jimihpdmbpy check out audible com', 'published_at': '2014-08-06T06:46:32Z', 'summary': "Five fun physics phenomena. Have you ever tried to spin your phone? If you do it in this direction, it's pretty easy. But if you try to flip your phone end over end l

In [12]:
# Example query
query = "who is Derek Muller?"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'who is Derek Muller?', 'result': 'Derek Muller is a scientist, educator, and filmmaker known for his work in creating educational content and videos that explain scientific concepts and stories behind various places, including those seen on Google Earth.', 'source_documents': [Document(metadata={'category': 'Physics', 'chunk_id': 'liqF6EamiE4_1', 'description': 'when sunlight shines through a small hole it casts a circular image on the wall regardless of the shape of the hole the size of the', 'published_at': '2011-06-13T22:30:47Z', 'summary': "re seeing is... A projection of the sun. I've never thought about it before. Well, I did photography at school and we did pinhole cameras. The hole actually reflects what it's showing on the wall. So? So you see what you see on the other side.", 'title': 'can you solve this shadow illusion', 'url': 'https://www.youtube.com/watch?v=liqF6EamiE4', 'video_id': 'liqF6EamiE4'}, page_content='re seeing is A projection of the

In [13]:
# Example query
query = "can you fetch me some youtube video urls about physics??"

# Construct the input for the QA chain
qa_input = {
    "query": query
}

# Get the answer
try:
    answer = qa_chain.invoke(qa_input)
    print("---- Answer ----")
    print(answer)
except Exception as e:
    print("Error occurred:", str(e))

---- Answer ----
{'query': 'can you fetch me some youtube video urls about physics??', 'result': "I don't know.", 'source_documents': [Document(metadata={'category': 'Physics', 'chunk_id': '5THOUSvpCKk_0', 'description': 'veritasium is a channel of science and engineering videos featuring experiments expert interviews cool demos and discussions', 'published_at': '2013-02-11T06:09:19Z', 'summary': "Sometimes the simplest questions have the most amazing answers. Where does the Sun get that energy from? Where do they get the matter to make the tree? What is a candle flame really made of? Whoa! How does it do that? Go the laws of physics! I can't see the X. I guess the question is why not?", 'title': 'veritasium trailer', 'url': 'https://www.youtube.com/watch?v=5THOUSvpCKk', 'video_id': '5THOUSvpCKk'}, page_content='Sometimes the simplest questions have the most amazing answers Like is there a speed limit in the universe Where does the Sun get that energy from Where do they get the matter 

## Agents

### Retriever

In [14]:
chat_model = ChatOpenAI(api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo")

In [36]:
# Define the function to ask GPT with retriever
def ask_gpt_with_retriever(query, context=""):
    # Use the qa_chain to get the response and source documents
    result = qa_chain({"query": query})
    response = result["result"]
    source_documents = result["source_documents"]

    # Log retrieved documents for verification
    retrieved_texts = "\n\n".join(doc.page_content for doc in source_documents)
    print("Retrieved Documents:\n", retrieved_texts)

    # Combine retrieved texts with the existing context
    combined_context = context + "\n\nRetrieved documents:\n" + retrieved_texts

    messages = [
        SystemMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved info from Veritasium videos to answer the question. If the info doesn't help, just say that you don't know and be concise in your response. else if the retrieved info is helpful, be as verbose and educational in your response as possible."),
        HumanMessage(content="Here is some info retrieved from Veritasium videos:\n" + combined_context),
        HumanMessage(content="Based on this info, please answer the following question':"),
        HumanMessage(content=query)
    ]

    prompt = ChatPromptTemplate.from_messages(messages)
    llm_chain = LLMChain(llm=chat_model, prompt=prompt)
    gpt_response = llm_chain.run({})
    return gpt_response

In [None]:
# Define the function to simulate the conversation
def simulate_conversation(queries):
    context = ""
    for i, query in enumerate(queries):
        # Process the query using GPT-3.5 Turbo with retriever
        response = ask_gpt_with_retriever(query, context)

        # Update context with the current query and response
        context += f"\nUser Query {i+1}: {query}\nBot Response {i+1}: {response}\n"

        # Print the conversation
        print(f"User Query {i+1}: {query}")
        print(f"Bot Response {i+1}: {response}")
        print("-" * 50)

# Define a set of conversational queries for testing
test_queries = [
    "how are you?",
    "tell me about the number 37?",
    "where do you get this info from?",
    "What are some other fun math facts?",
    "Can you fetch me some YouTube video URLs about physics?",
    "Tell me about the speed limit in the universe.",
    "How does quantum entanglement work?",
    "Can you summarize the video about imaginary numbers?",
    "What are some fun physics phenomena?",
    "Who is the president of Spain?",
]

# Run the simulated conversation
simulate_conversation(test_queries)

In [37]:
test_queries = [
    "can you share a video url explaining how bikes work?",
    "who is Derek Muller??",
    "How many videos do you have?"
]

# Run the simulated conversation
simulate_conversation(test_queries)

Retrieved Documents:
 Most people don t know how bicycles actually work So we modified this bike to prove it This video is sponsored by KiwiCo More about them at the end of the show controller that allows him to lock out the steering to one side So what he s going to do is as I m biking he s going to pick whether I can turn either to the left or to the right So go for it I m giving it a left turn it pulls the pin out But you can see that you can still fully steer after I ve pulled the pin out I ve armed it There s where it locks Okay Now that s when your LED comes on That just says turn that way Turn left Yeah And if I try to turn right Can t I can t And if I try to turn left you can I can So the question is can I successfully execute this left hand turn Should we give it a shot I mean he s not going to tell me whether it s left or right so I have to look at the LED to know which way I can still turn You let me know when you re ready Okay No That was meant to be a turn to the right but

### Fetch Agent

In [38]:
class FetchAgent:
    def __init__(self, pinecone_index):
        self.pinecone_index = pinecone_index
        self.vectorizer = TfidfVectorizer()

    def fetch_all_video_ids(self):
        try:
            query_response = self.pinecone_index.query(
                vector=[0] * 1536,
                top_k=1500,
                include_metadata=True
            )
            all_ids = [match['id'] for match in query_response['matches']]
            print(f"Fetched {len(all_ids)} video IDs.")
            return all_ids
        except Exception as e:
            print(f"An error occurred while fetching video IDs: {e}")
            return []

    def fetch_video_metadata(self, video_ids):
        try:
            video_data = self.pinecone_index.fetch(ids=video_ids)
            return video_data['vectors']
        except Exception as e:
            print(f"An error occurred while fetching video metadata: {e}")
            return {}

    def fetch_video_urls(self, keyword_phrase, all_ids):
        results = defaultdict(list)
        if not all_ids:
            print("No video IDs found.")
            return results

        # Vectorize the query
        query_vector = self.vectorizer.fit_transform([keyword_phrase]).toarray()

        # Fetch metadata in batches
        batch_size = 100
        for i in range(0, len(all_ids), batch_size):
            batch_ids = all_ids[i:i+batch_size]
            video_metadata_batch = self.fetch_video_metadata(batch_ids)
            if not video_metadata_batch:
                print("No video metadata found.")
                continue

            for chunk_id, video_metadata in video_metadata_batch.items():
                metadata = video_metadata.get('metadata', {})
                title = metadata.get('title', '')
                description = metadata.get('description', '')
                transcription = metadata.get('transcription', '')
                base_video_id = chunk_id.split('_')[0]

                # Combine title, description, and transcription
                combined_text = f"{title} {description} {transcription}"

                # Vectorize the combined text
                text_vector = self.vectorizer.transform([combined_text]).toarray()

                # Calculate cosine similarity
                relevance_score = cosine_similarity(query_vector, text_vector)[0][0]

                if relevance_score > 0:
                    if base_video_id not in results or results[base_video_id][2] < relevance_score:
                        results[base_video_id] = (title, metadata['url'], relevance_score)

        # Sort results by relevance score in descending order
        sorted_results = sorted(results.values(), key=lambda x: x[2], reverse=True)
        return [(title, url) for title, url, _ in sorted_results]

    def extract_keywords(self, query):
        # Use NLTK stopwords and additional custom stopwords
        stop_words = set(stopwords.words('english'))
        custom_stopwords = {'can', 'you', 'share', 'a', 'video', 'url', 'explaining', 'the', 'about', 'is', 'are', 'and', 'in'}
        all_stopwords = stop_words.union(custom_stopwords)

        # Simple keyword extraction using regular expression and common words filtering
        query = re.sub(r'[^\w\s]', '', query)  # Remove punctuation
        words = query.lower().split()
        keywords = [word for word in words if word not in all_stopwords]
        return ' '.join(keywords)  # Join keywords into a single phrase

    def run(self, query):
        keyword_phrase = self.extract_keywords(query)
        print(f"Extracted Keyword Phrase: {keyword_phrase}")  # Print extracted keywords for debugging

        if not keyword_phrase:
            return "No relevant keywords found in the query."

        all_ids = self.fetch_all_video_ids()
        if not all_ids:
            return "No video IDs found."

        results = self.fetch_video_urls(keyword_phrase, all_ids)

        if results:
            unique_results = list(dict.fromkeys(results))  # Remove duplicates while maintaining order
            response = "Here are some video recommendations (while the video might not be strictly about your topic, it might be related):\n"
            for title, url in unique_results[:3]:  # Limit to top 3 results
                response += f"{title}: {url}\n"
        else:
            response = "No videos found for your query."
        return response

# Initialize Agents
fetch_agent = FetchAgent(pinecone_index)

In [20]:
# Example Usage
user_query = "can you share a video url explaining how bikes work?"
print(fetch_agent.run(user_query))

Extracted Keyword Phrase: bikes work
Fetched 1066 video IDs.
Here are some video recommendations (while the video might not be strictly about your topic, it might be related):
4 revolutionary riddles resolved: https://www.youtube.com/watch?v=72DCj3BztG4
most people don t know how bikes work: https://www.youtube.com/watch?v=9cNmUNHSBac
how bikes actually work: https://www.youtube.com/watch?v=scliyWrN7mk



In [21]:
user_query = "can you share a video about quantam computing??"
print(fetch_agent.run(user_query))

Extracted Keyword Phrase: quantam computing
Fetched 1066 video IDs.
Here are some video recommendations (while the video might not be strictly about your topic, it might be related):
the remarkable story behind the most important algorithm of all time: https://www.youtube.com/watch?v=nmgFG7PUHfo
how does a quantum computer work: https://www.youtube.com/watch?v=g_IaVepNDT4
how quantum computers break the internet starting now: https://www.youtube.com/watch?v=-UrdExQW0cs



In [22]:
user_query = "can you share a video about president Max??"
print(fetch_agent.run(user_query))

Extracted Keyword Phrase: president max
Fetched 1066 video IDs.
Here are some video recommendations (while the video might not be strictly about your topic, it might be related):
the real story of oppenheimer: https://www.youtube.com/watch?v=Xzv84ZdtlE0
parallel worlds probably exist here s why: https://www.youtube.com/watch?v=kTXTPe3wahc
when is a bungee jumper s acceleration max: https://www.youtube.com/watch?v=FhmLBxyX8Dw



In [23]:
user_query = "can you share a video about turbana silia??"
print(fetch_agent.run(user_query))

Extracted Keyword Phrase: turbana silia
Fetched 1066 video IDs.
No videos found for your query.


In [24]:
user_query = "can you share a video about spiders??"
print(fetch_agent.run(user_query))

Extracted Keyword Phrase: spiders
Fetched 1066 video IDs.
Here are some video recommendations (while the video might not be strictly about your topic, it might be related):
why life seems to speed up as we age: https://www.youtube.com/watch?v=aIx2N-viNwY
the stickiest non sticky substance: https://www.youtube.com/watch?v=vS0TuIPoeBs
what jumping spiders teach us about color: https://www.youtube.com/watch?v=nfAqTSjMBJk



### Video Summarizer

In [39]:
class VideoSummarizerAgent:
    def __init__(self, fetch_agent, qa_chain):
        self.fetch_agent = fetch_agent
        self.qa_chain = qa_chain
        self.vectorizer = TfidfVectorizer()

    def fetch_video_chunks(self, base_video_id):
        try:
            chunk_ids = [f"{base_video_id}_{i}" for i in range(30)]  # Adjust as needed
            video_metadata_batch = self.fetch_agent.fetch_video_metadata(chunk_ids)
            combined_transcriptions = []

            for chunk_id in sorted(video_metadata_batch.keys(), key=lambda x: int(x.split('_')[-1])):
                video_metadata = video_metadata_batch[chunk_id]['metadata']
                transcription = video_metadata.get('transcription', "")
                if isinstance(transcription, str):
                    combined_transcriptions.append(transcription)
                else:
                    combined_transcriptions.extend(transcription)

            combined_text = " ".join(combined_transcriptions)
            print(f"Fetched and combined transcription for video ID {base_video_id}")
            return combined_text
        except Exception as e:
            print(f"An error occurred while fetching video chunks for video ID {base_video_id}: {e}")
            return ""

    def filter_content(self, text):
        filtered_text = re.sub(r'This video is sponsored by.*?$', '', text, flags=re.MULTILINE)
        filtered_text = re.sub(r'Check out .* for more information', '', filtered_text, flags=re.MULTILINE)
        # Add more filtering rules as needed
        return filtered_text

    def extract_keywords(self, query):
        stop_words = set(stopwords.words('english'))
        custom_stopwords = {'can', 'you', 'share', 'a', 'video', 'url', 'explaining', 'the', 'about', 'is', 'are', 'and', 'in'}
        all_stopwords = stop_words.union(custom_stopwords)

        query = re.sub(r'[^\w\s]', '', query)  # Remove punctuation
        words = query.lower().split()
        keywords = [word for word in words if word not in all_stopwords]
        return ' '.join(keywords)  # Join keywords into a single phrase

    def search_similar_videos(self, query, all_ids):
        keyword_phrase = self.extract_keywords(query)
        query_vector = self.vectorizer.fit_transform([keyword_phrase]).toarray()

        best_match_id = None
        highest_similarity = 0

        for batch_start in range(0, len(all_ids), 100):
            batch_ids = all_ids[batch_start:batch_start+100]
            video_metadata_batch = self.fetch_agent.fetch_video_metadata(batch_ids)

            for chunk_id, video_metadata in video_metadata_batch.items():
                metadata = video_metadata.get('metadata', {})
                title = metadata.get('title', '')
                description = metadata.get('description', '')
                transcription = metadata.get('transcription', '')
                combined_text = f"{title} {description} {transcription}"

                text_vector = self.vectorizer.transform([combined_text]).toarray()
                similarity = cosine_similarity(query_vector, text_vector)[0][0]

                if similarity > highest_similarity:
                    highest_similarity = similarity
                    best_match_id = chunk_id.split('_')[0]

        return best_match_id

    def extract_video_id(self, video_url_or_title):
        match = re.search(r'(?:v=|video id |youtu\.be/)([\w-]+)', video_url_or_title)
        if match:
            video_id = match.group(1)
            print(f"Extracted video ID from query: {video_id}")
            return video_id
        else:
            all_ids = self.fetch_agent.fetch_all_video_ids()
            if not all_ids:
                return None

            best_match_id = self.search_similar_videos(video_url_or_title, all_ids)
            if best_match_id:
                print(f"Extracted video ID from title search: {best_match_id}")
                return best_match_id
            else:
                print("No matching video found.")
                return None

    def summarize_video(self, video_url_or_title):
        try:
            video_id = self.extract_video_id(video_url_or_title)
            if not video_id:
                return "Could not find the video in Veritasium's channel. Please make sure to send the video URL or title."

            combined_text = self.fetch_video_chunks(video_id)
            if not combined_text:
                return "Could not find the video in Veritasium's channel. Please make sure to send the video URL or title."

            # Filter out unnecessary content
            filtered_text = self.filter_content(combined_text)

            # Generate summary using ask_gpt_with_retriever with a refined prompt
            summary_prompt = f"Provide a comprehensive and concise summary of the following video, removing any promotional content or irrelevant details:\n\n{filtered_text}"
            summary = ask_gpt_with_retriever(summary_prompt)
            print(f"Generated summary for video ID {video_id}")
            return summary.strip()
        except Exception as e:
            print(f"An unexpected error occurred for video {video_url_or_title}: {e}")
            return "Error: Unable to summarize the video due to an unexpected issue."

    def run(self, query):
        summary = self.summarize_video(query)
        return summary

# Initialize the VideoSummarizerAgent with the fetch_agent and qa_chain
summarizer_agent = VideoSummarizerAgent(fetch_agent, qa_chain)

In [26]:
# Example usage
user_query = "summarize this video https://www.youtube.com/watch?v=vVKFBaaL4uM"
print(summarizer_agent.run(user_query))

Extracted video ID from query: vVKFBaaL4uM
Fetched and combined transcription for video ID vVKFBaaL4uM
Retrieved Documents:
 What would it look like to ride alongside a beam of light This is a question Einstein asked over a hundred years ago Now the trouble was he couldn t actually do the experiment so he had to use his imagination He had to do a thought experiment to figure out what a reasonable outcome would be Einstein s intuition told him that the world should feel the same in all frames of reference moving with constant velocity These are called inertial frames of reference So it doesn t matter if you re in a train or on a plane or in a car or in a room As long as you re moving with constant velocity the laws of physics should apply in exactly the same way And this is what s called the principle of relativity Is there a speed limit the universe I don t think so So you can go as fast as you want Probably Could we go as fast as light Sure eventually absolutely Can you compare the sp

In [27]:
user_query = "summarize this video id dvk2PQNcg8w"
print(summarizer_agent.run(user_query))

Extracted video ID from query: dvk2PQNcg8w
Fetched and combined transcription for video ID dvk2PQNcg8w
Retrieved Documents:
 This is the challenge of a YouTuber which is pushing that record button and actually filming something because you never know are people going to hate it Is it good enough Have you thought through what you re going to say I have not thought through what I m going to say Okay so I want to talk about the question of why is it that right now when it is really easy to get access to facts and information where you can just pull up your phone and look up anything and the world Why is it now that we have the most access to facts Do facts mean the least This is what I want to know Why does fake news spread now Why are we more polarized now than ever before And what I have to kind of admit to you is that I was a real optimist Maybe I was naive about the internet but my thinking about having an international communication system whereby anyone anywhere can share anything a

In [28]:
user_query = "summarize this video 'how bikes actually work'"
print(summarizer_agent.run(user_query))

Fetched 1066 video IDs.
Extracted video ID from title search: 9cNmUNHSBac
Fetched and combined transcription for video ID 9cNmUNHSBac
Retrieved Documents:
 Most people don t know how bicycles actually work So we modified this bike to prove it This video is sponsored by KiwiCo More about them at the end of the show controller that allows him to lock out the steering to one side So what he s going to do is as I m biking he s going to pick whether I can turn either to the left or to the right So go for it I m giving it a left turn it pulls the pin out But you can see that you can still fully steer after I ve pulled the pin out I ve armed it There s where it locks Okay Now that s when your LED comes on That just says turn that way Turn left Yeah And if I try to turn right Can t I can t And if I try to turn left you can I can So the question is can I successfully execute this left hand turn Should we give it a shot I mean he s not going to tell me whether it s left or right so I have to loo

In [29]:
user_query = "summarize this video flexi waszii"
print(summarizer_agent.run(user_query))

Fetched 1066 video IDs.
Extracted video ID from title search: yCsgoLc
Fetched and combined transcription for video ID yCsgoLc
Could not find the video in Veritasium's channel. Please make sure to send the video URL or title.


### External Knowledge Retrieval Agent (Wikipedia)

In [30]:
# TEST
class ExternalKnowledgeRetrievalAgent:
    def __init__(self, qa_chain, fetch_agent, summarizer_agent):
        self.qa_chain = qa_chain
        self.fetch_agent = fetch_agent
        self.summarizer_agent = summarizer_agent

    def fetch_veritasium_info(self, query):
        try:
            result = self.qa_chain({"query": query})
            response = result["result"]
            source_documents = result["source_documents"]

            # Check if the response is relevant
            if "I don't know" in response or len(source_documents) == 0:
                return None, None

            # Log retrieved documents for verification
            retrieved_texts = "\n\n".join(doc.page_content for doc in source_documents)
            print("Retrieved Documents:\n", retrieved_texts)

            # Refer to the document in the response
            response_with_source = f"{response}\n\nSource: Retrieved from Veritasium video."
            return response_with_source, "Veritasium"
        except Exception as e:
            print(f"An error occurred while fetching Veritasium info: {e}")
            return None, None

    def fetch_wikipedia_info(self, query):
        try:
            search_results = wikipedia.search(query)
            if not search_results:
                return None, None
            page = wikipedia.page(search_results[0])
            summary = page.summary
            return f"Information retrieved from Wikipedia:\n\n{summary}", "Wikipedia"
        except wikipedia.exceptions.DisambiguationError as e:
            print(f"Disambiguation error while fetching Wikipedia info: {e}")
            return "Error: Your query is ambiguous. Please provide more specific information.", "Wikipedia"
        except Exception as e:
            print(f"An error occurred while fetching Wikipedia info: {e}")
            return "Error: Unable to retrieve information from Wikipedia.", "Wikipedia"

    def answer_query(self, query):
        veritasium_answer, veritasium_source = self.fetch_veritasium_info(query)
        if veritasium_answer:
            return veritasium_answer, veritasium_source
        else:
            wikipedia_answer, wikipedia_source = self.fetch_wikipedia_info(query)
            if wikipedia_answer:
                return wikipedia_answer, wikipedia_source
            else:
                return "No relevant information found. Please provide more details or check your query.", "None"

def process_query(query):
    # Check if query is asking for a video URL
    if is_video_fetch_query(query):
        response = fetch_agent.run(query)
        source = "FetchAgent"
        return response, source

    # Check if query is asking to summarize a video
    elif is_video_summarization_query(query):
        response = summarizer_agent.run(query)
        source = "VideoSummarizerAgent"
        return response, source

    # Handle general questions
    else:
        response, source = external_knowledge_agent.answer_query(query)
        return response, source

def is_video_fetch_query(query):
    video_fetch_keywords = ["share a video", "video about", "video explaining"]
    return any(keyword in query.lower() for keyword in video_fetch_keywords)

def is_video_summarization_query(query):
    summarization_keywords = ["summarize this video", "video id", "video title"]
    return any(keyword in query.lower() for keyword in summarization_keywords)

# Example usage
external_knowledge_agent = ExternalKnowledgeRetrievalAgent(qa_chain, fetch_agent, summarizer_agent)

# Simulate different types of queries
queries = [
    "Can you share a video about quantum computing?",
    "Summarize this video https://www.youtube.com/watch?v=vVKFBaaL4uM",
    "What is the significance of the number 37?",
    "Tell me about Henrietta Leavitt",
    "Who is Jesus?"
]

for user_query in queries:
    response, source = process_query(user_query)
    print(f"Source: {source}\nResponse: {response}\n")



Extracted Keyword Phrase: quantum computing
Fetched 1066 video IDs.
Source: FetchAgent
Response: Here are some video recommendations (while the video might not be strictly about your topic, it might be related):
math s fundamental flaw: https://www.youtube.com/watch?v=HeQX2HjkcNo
the trillion dollar equation: https://www.youtube.com/watch?v=A5w-dEgIU1M
how does a quantum computer work: https://www.youtube.com/watch?v=g_IaVepNDT4


Extracted video ID from query: vVKFBaaL4uM
Fetched and combined transcription for video ID vVKFBaaL4uM
Retrieved Documents:
 What would it look like to ride alongside a beam of light This is a question Einstein asked over a hundred years ago Now the trouble was he couldn t actually do the experiment so he had to use his imagination He had to do a thought experiment to figure out what a reasonable outcome would be Einstein s intuition told him that the world should feel the same in all frames of reference moving with constant velocity These are called inertial

In [40]:
# Now that we know thst the fallback works, We can re-define the Agent to only fetch from wikipedia since the retriever will already look in the index for matching docs

class ExternalKnowledgeRetrievalAgent:
    def __init__(self):
        pass

    def fetch_wikipedia_info(self, query):
        try:
            search_results = wikipedia.search(query)
            if not search_results:
                print(f"No relevant search results found for query: '{query}'")
                return "No relevant information found. Please provide more details or check your query.", "None"

            page = wikipedia.page(search_results[0])
            summary = page.summary
            print(f"Successfully fetched information for '{query}' from Wikipedia.")
            return f"Information retrieved from Wikipedia:\n\n{summary}", "Wikipedia"

        except wikipedia.exceptions.DisambiguationError as e:
            print(f"Disambiguation error while fetching Wikipedia info for query '{query}': {e}")
            return "Error: Your query is ambiguous. Please provide more specific information.", "Wikipedia"

        except Exception as e:
            print(f"An error occurred while fetching Wikipedia info for query '{query}': {e}")
            return "Error: Unable to retrieve information from Wikipedia.", "Wikipedia"

    def answer_query(self, query):
        response, source = self.fetch_wikipedia_info(query)
        return response, source

# Initialize the agent
external_knowledge_agent = ExternalKnowledgeRetrievalAgent()

# Example queries
queries = [
    "Clinical significance in psychology",
    "Henrietta Leavitt",
    "Reincarnation of Jesus"
]

# Process each query and print the response
for user_query in queries:
    print(f"\nProcessing query: {user_query}")
    response, source = external_knowledge_agent.answer_query(user_query)
    print(f"Source: {source}\nResponse: {response}\n")



Processing query: Clinical significance in psychology
Successfully fetched information for 'Clinical significance in psychology' from Wikipedia.
Source: Wikipedia
Response: Information retrieved from Wikipedia:

In medicine and psychology, clinical significance is the practical importance of a treatment effect—whether it has a real genuine, palpable, noticeable effect on daily life.




Processing query: Henrietta Leavitt
Successfully fetched information for 'Henrietta Leavitt' from Wikipedia.
Source: Wikipedia
Response: Information retrieved from Wikipedia:

Henrietta Swan Leavitt (; July 4, 1868 – December 12, 1921) was an American astronomer. Her discovery of how to effectively measure vast distances to remote galaxies led to a shift in the scale and understanding of the scale and the nature of the universe. Nomination of Leavitt for the Nobel Prize had to be halted because of her death. 
A graduate of Radcliffe College, she worked at the Harvard College Observatory as a human comp