# Quickstart: Generating Insights using Langchain
Here is how you can setup your own insight generator

Make sure you have your `SERPER_API_KEY` and `OPEN_AI_API_KEY` in your env

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.agents.tools import Tool

llm = ChatOpenAI(temperature=0, openai_api_key=os.environ.get("OPEN_AI_API_KEY"), model="gpt-4-0613")

## Tools for our agent
We had decided to give our agents the ability to
- Search for a query using the web
- Scrape a page to find out more info

In [5]:
# custom search tool, we copied the serper integration on langchain but we prefer all the data to be displayed in one json message

from typing import Any, List, Literal
import requests

k: int = 5
gl: str = "us"
hl: str = "en"
tbs = None
serper_api_key=os.environ.get("SERPER_API_KEY")
search_type: Literal["news", "search", "places", "images"] = "search"

def serper_search(
        search_term: str, search_type: str = "search", **kwargs: Any
    ) -> dict:
    headers = {
        "X-API-KEY": serper_api_key or "",
        "Content-Type": "application/json",
    }
    params = {
        "q": search_term,
        **{key: value for key, value in kwargs.items() if value is not None},
    }
    response = requests.post(
        f"https://google.serper.dev/{search_type}", headers=headers, params=params
    )
    response.raise_for_status()
    search_results = response.json()
    return search_results

def parse_snippets(results: dict) -> List[str]:
    result_key_for_type = {
        "news": "news",
        "places": "places",
        "images": "images",
        "search": "organic",
    }
    snippets = []
    if results.get("answerBox"):
        answer_box = results.get("answerBox", {})
        if answer_box.get("answer"):
            snippets.append(answer_box.get("answer"))
        elif answer_box.get("snippet"):
            snippets.append(answer_box.get("snippet").replace("\n", " "))
        elif answer_box.get("snippetHighlighted"):
            snippets.append(answer_box.get("snippetHighlighted"))

    if results.get("knowledgeGraph"):
        kg = results.get("knowledgeGraph", {})
        title = kg.get("title")
        entity_type = kg.get("type")
        if entity_type:
            snippets.append(f"{title}: {entity_type}.")
        description = kg.get("description")
        if description:
            snippets.append(description)
        for attribute, value in kg.get("attributes", {}).items():
            snippets.append(f"{title} {attribute}: {value}.")

    for result in results[result_key_for_type[search_type]][:k]:
        if "snippet" in result:
            snippets.append(result)
            # snippets.append(scrape_page(result["link"], result["title"]))
        # for attribute, value in result.get("attributes", {}).items():
        #     snippets.append(f"{attribute}: {value}.")

        # print(result)

    if len(snippets) == 0:
        return ["No good Google Search Result was found"]
    return snippets

def parse_results(results: dict) -> str:
        snippets = parse_snippets(results)
        results_string = ""
        for idx, val in enumerate(snippets):
            results_string += f"Result {idx}: " + str(val) + "\n"
        return results_string

def custom_search(query: str, parse=True, **kwargs: Any):
    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )
    return parse_results(results)
    
def get_search_results(query: str, **kwargs: Any):
    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )
    return parse_snippets(results)

def get_first_search_result(query: str, **kwargs: Any):
    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=1,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )
    return parse_snippets(results)[0]

# test custom search
# custom_search("statistics of deforestation in the amazon")
search_res = get_first_search_result("statistics of deforestation in the amazon")
search_res

{'title': 'Amazon Deforestation — How Much of the Rainforest is Left? - Sentient Media',
 'link': 'https://sentientmedia.org/amazon-deforestation/',
 'snippet': 'Cattle ranching is a leading driver of deforestation in the Amazon, accounting for around 80 percent of the destruction there, and the release of 340 million ...',
 'position': 1}

In [6]:
# Scraping tool
from bs4 import BeautifulSoup
import requests

banned_sites = ["calendar.google.com", "researchgate.net"]

def scrape_page(url: str):
    if any(substring in url for substring in banned_sites):
        print("Skipping site: {}".format(url))
        return None
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive',
        }
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        text = " ".join([t.get_text() for t in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])])
        return text.replace('|','')
    except requests.RequestException as e:
        print(f"Failed to fetch {url}. Error: {e}")
        return None
        
# test scrape page
page = scrape_page(search_res["link"])
page

"\nAmazon Deforestation — How Much of the Rainforest Is Left?  The truth sounds unbelievable, and certainly counterintuitive, when talking about what should be the planet’s lungs: animal agriculture, logging, mining, infrastructure and urban development caused some parts of the Amazon rainforest to emit more carbon dioxide than it absorbs in 2021. Deforestation in the Amazon has eliminated thousands of species of wildlife and plants, put the lives of local communities at risk and crippled one of nature’s most important tools in storing carbon and staving off the climate crisis.\xa0According to the World Resources Institute, the Amazon rainforest remains a net carbon sink, just barely, and that’s thanks to strong protections in lands managed by Indigenous communities. Let’s take a look at how we got here, and what can be done to restore this crucial rainforest.\xa0 The Amazon Rainforest Is a Critical Tool for Climate Action Not only does the rainforest provide a habitat for thousands of

## [TODO] Improving Search Engine
Snippet from search is really bad, it's too short and doesn't give the full context, we want to improve it by scraping the full page for more info
Below is an example, it literally just answers the most relevant part of the query with 161 chars, but more insights could be in the page

In [7]:
# snippet
print(len(search_res["snippet"]))
search_res["snippet"]

161


'Cattle ranching is a leading driver of deforestation in the Amazon, accounting for around 80 percent of the destruction there, and the release of 340 million ...'

But just scraping the page will lead to character rate limits, so we want to summarize the page first.
But summarization has tradeoffs, it removes the stats we were looking for, it doesn't even respond to our original query, see below for example

In [10]:
# SBert Summarizer
from summarizer.sbert import SBertSummarizer
summarizer = SBertSummarizer('paraphrase-MiniLM-L6-v2')

def summarize(text, num_sentences=3):
    return summarizer(text, num_sentences=num_sentences)

  from .autonotebook import tqdm as notebook_tqdm


In [126]:
# Test Summarizer with different number of sentences
num_sentences_variants = [3, 5, 10, 15]
for num_sentences in num_sentences_variants:
    print(f"\n--------------N={num_sentences}----------------\n", summarize(page, num_sentences) + "\n")

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=3----------------
 Amazon Deforestation — How Much of the Rainforest Is Left? In addition to the issues caused by the clearing of such vast areas of the rainforest, agricultural practices such as the use of pesticides and fertilizers have implications for the native wildlife, plant species and Indigenous people of the region. How Deforestation in the Amazon Affects Plant and Animal Species As one of the most biodiverse places on earth, the Amazon is home to thousands of different species.



  super()._check_params_vs_input(X, default_n_init=10)



--------------N=5----------------
 Amazon Deforestation — How Much of the Rainforest Is Left? In addition to the issues caused by the clearing of such vast areas of the rainforest, agricultural practices such as the use of pesticides and fertilizers have implications for the native wildlife, plant species and Indigenous people of the region. As the forest becomes drier and contains less moisture, it becomes more susceptible to wildfires, which themselves cause critical damage to the environment and biodiversity. How Deforestation in the Amazon Affects Plant and Animal Species As one of the most biodiverse places on earth, the Amazon is home to thousands of different species. Related Posts Cosmetic Animal Testing Is Cruel – But There Are Alternatives Pigs Are Intelligent and Clean Animals, Actually The 30 Most Intelligent Animals in the World Might Surprise You The Pet Food Industry’s Future, from Plant-Based to Cultivated Meat The Promises and Pitfalls of Regenerative Agriculture, Exp

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=10----------------
 Amazon Deforestation — How Much of the Rainforest Is Left? Much of the Amazon rainforest remains unexplored — and its remoteness helps limit its destruction. Mining The Amazon region is often mined for gold, copper, iron, manganese and other materials. In 2021, it was discovered that due to deforestation, parts of the rainforest started emitting more carbon than it held. While the natural moisture of a rainforest should provide protection from the spread of fire, increasingly dry conditions can impede the rainforest’s ability to protect itself. In order to protect the Amazon rainforest, and thus ourselves, it’s necessary to take a multi-faceted approach that engages governments, NGOs, local and Indigenous communities, and nature itself. Carbon credit programs, however, are only successful when the removals are real, additional, permanent and measurable; yet a recent investigation published in The Guardian revealed that 90 percent of certified rainfo

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=15----------------
 Amazon Deforestation — How Much of the Rainforest Is Left? The truth sounds unbelievable, and certainly counterintuitive, when talking about what should be the planet’s lungs: animal agriculture, logging, mining, infrastructure and urban development caused some parts of the Amazon rainforest to emit more carbon dioxide than it absorbs in 2021. According to the World Resources Institute, the Amazon rainforest remains a net carbon sink, just barely, and that’s thanks to strong protections in lands managed by Indigenous communities. Brazil is also where the majority of deforestation took place in 2021. How Much of the Amazon Rainforest Has Been Explored? Research suggests that an increase in the number of smaller-scale agriculture operations started across Amazonia between 2001 to 2014 are an effort by landowners to deforest while evading government monitoring systems. Mining The Amazon region is often mined for gold, copper, iron, manganese and other 

N=10 seems like a good length

In [127]:
# Test Summarizer with n sentences for a custom search
n = 10
results = get_search_results("statistics of deforestation in the amazon")
print("Number of results", len(results))
for res in results:
    print(f"\n--------------N={n}----------------\n", summarize(scrape_page(res["link"]), n) + "\n") 

Number of results 5


  super()._check_params_vs_input(X, default_n_init=10)



--------------N=10----------------
 Amazon Deforestation — How Much of the Rainforest Is Left? Much of the Amazon rainforest remains unexplored — and its remoteness helps limit its destruction. Mining The Amazon region is often mined for gold, copper, iron, manganese and other materials. In 2021, it was discovered that due to deforestation, parts of the rainforest started emitting more carbon than it held. While the natural moisture of a rainforest should provide protection from the spread of fire, increasingly dry conditions can impede the rainforest’s ability to protect itself. In order to protect the Amazon rainforest, and thus ourselves, it’s necessary to take a multi-faceted approach that engages governments, NGOs, local and Indigenous communities, and nature itself. Carbon credit programs, however, are only successful when the removals are real, additional, permanent and measurable; yet a recent investigation published in The Guardian revealed that 90 percent of certified rainfo

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=10----------------
 You are using a browser version with limited support for CSS. To obtain
            the best experience, we recommend you use a more up to date browser (or turn off compatibility mode in
            Internet Explorer). Advertisement The Brazilian Amazon deforestation rate in 2020 is the greatest of the decade 
Nature Ecology & Evolution
volume 5, pages 144–145 (2021)Cite this article
 78k Accesses 160 Citations 709 Altmetric Metrics details Subjects To the Editor — In 2012, Brazil achieved an unprecedented feat among tropical countries by reducing deforestation rates in Amazonia by 84% (4,571 km2) compared to the historical peak of 2004, when 27,772 km2 of forests were clear-cut1 (Fig. Although this is not the final number, previous years indicate an average difference of 58 ± 303 km2 between the first estimate and the final consolidated rate1, which will be presented in the first half of 2021. Brazil has clearly failed in its bold intention to redu

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=10----------------
 How big is the Brazilian deforestation issue? Other statistics on the topicAmazon rainforest in Brazil Forestry Ten countries with most forest area 2020 Climate and Weather Number of wildfires in South America 2022, by country Geography & Nature Global tree cover loss 2022, by leading country Climate and Weather Number of wildfires in the Legal Amazon in Brazil 2000-2022 You only have access to basic statistics. Business Solutions including all features. Statistics on
                    
                    "
                    
                        Amazon Rainforest in Brazil
                    
                    "
                             Other statistics that may interest you Amazon Rainforest in Brazil 
                Overview
             
                Deforestation
             
                Wildfires
             
                Emissions
             
                Land use
             Further related statistics Furthe

  super()._check_params_vs_input(X, default_n_init=10)



--------------N=10----------------
 The Amazon saw record deforestation last year. Last year, the Amazon saw some of the highest rates of deforestation in over a decade, due to a combination of cattle ranching, agriculture, mining and road projects that cleared millions of acres of rainforest. An estimated 1.98 million hectares (4.89 million acres) of forest were cleared in 2022, a 21% increase from 2021 and the highest figure on record except for 2004, when over 2 million hectares (about 5 million acres) were lost. “ Satellite readings show that a lot of the forest loss is concentrated around roads, most notably the Trans-Amazonian Highway in the states of Amazonas, Pará, Rondônia and Acre. Fires also directly impacted another 106,922 hectares (264,210 acres). Peru is also struggling with a Mennonite problem in the central Amazon. And while that and gold mining have received the most attention from policymakers and conservationists, the data suggests that much of the 144,682 hectares

  super()._check_params_vs_input(X, default_n_init=10)


### New Search Tool
Honestly can't really tell how effective this extra details are for the insight generation, need an evaluation metric
For now, maybe we can add snippet and summary of the page? So we have the direct query answer and extra context? But at the same time, not flood the rate limit

In [11]:
# duplicated code from above
from typing import Any, List, Literal
import requests

k: int = 5
gl: str = "us"
hl: str = "en"
tbs = None
num_sentences = 10
serper_api_key=os.environ.get("SERPER_API_KEY")
search_type: Literal["news", "search", "places", "images"] = "search"

def serper_search(
        search_term: str, search_type: str = "search", **kwargs: Any
    ) -> dict:
    headers = {
        "X-API-KEY": serper_api_key or "",
        "Content-Type": "application/json",
    }
    params = {
        "q": search_term,
        **{key: value for key, value in kwargs.items() if value is not None},
    }
    response = requests.post(
        f"https://google.serper.dev/{search_type}", headers=headers, params=params
    )
    response.raise_for_status()
    search_results = response.json()
    return search_results

def parse_snippets(results: dict) -> List[str]:
    result_key_for_type = {
        "news": "news",
        "places": "places",
        "images": "images",
        "search": "organic",
    }
    snippets = []
    if results.get("answerBox"):
        answer_box = results.get("answerBox", {})
        if answer_box.get("answer"):
            snippets.append(answer_box.get("answer"))
        elif answer_box.get("snippet"):
            snippets.append(answer_box.get("snippet").replace("\n", " "))
        elif answer_box.get("snippetHighlighted"):
            snippets.append(answer_box.get("snippetHighlighted"))

    if results.get("knowledgeGraph"):
        kg = results.get("knowledgeGraph", {})
        title = kg.get("title")
        entity_type = kg.get("type")
        if entity_type:
            snippets.append(f"{title}: {entity_type}.")
        description = kg.get("description")
        if description:
            snippets.append(description)
        for attribute, value in kg.get("attributes", {}).items():
            snippets.append(f"{title} {attribute}: {value}.")

    for result in results[result_key_for_type[search_type]][:k]:
        if "snippet" in result:
            page = scrape_page(result["link"])
            summarized_page = summarize(page, num_sentences=num_sentences)
            if len(summarized_page) == 0:
                summarized_page = "None"
            snippets.append(f"Title: {result['title']}\nPossible answers: {result['snippet']}\nExtra details: <p>{summarized_page}</p>")

    if len(snippets) == 0:
        return ["No good Google Search Result was found"]
    return snippets

def parse_results(results: dict) -> str:
        snippets = parse_snippets(results)
        results_string = ""
        for idx, val in enumerate(snippets):
            results_string += f"<result{idx}>\n{val}\n</result{idx}>\n\n"
        return results_string

def custom_search(query: str, parse=True, **kwargs: Any):
    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )
    return parse_results(results)
    
# test custom search
# custom_search("statistics of deforestation in the amazon")
search_res = custom_search("statistics of deforestation in the amazon")
print(search_res)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


<result0>
Title: Amazon Deforestation — How Much of the Rainforest is Left? - Sentient Media
Possible answers: Cattle ranching is a leading driver of deforestation in the Amazon, accounting for around 80 percent of the destruction there, and the release of 340 million ...
Extra details: <p>Amazon Deforestation — How Much of the Rainforest Is Left? Much of the Amazon rainforest remains unexplored — and its remoteness helps limit its destruction. Mining The Amazon region is often mined for gold, copper, iron, manganese and other materials. In 2021, it was discovered that due to deforestation, parts of the rainforest started emitting more carbon than it held. While the natural moisture of a rainforest should provide protection from the spread of fire, increasingly dry conditions can impede the rainforest’s ability to protect itself. In order to protect the Amazon rainforest, and thus ourselves, it’s necessary to take a multi-faceted approach that engages governments, NGOs, local and Indig

  super()._check_params_vs_input(X, default_n_init=10)


## Preparing mock text input

In [12]:
# Way to generate a random test input using transcripts from Lex Fridman's podcast
# Make sure you have the transcripts downloaded in the folder lex_whisper_transcripts

import test_on_lex

transcripts = test_on_lex.load_lex_transcripts(random_n=10, transcript_folder="./lex_whisper_transcripts/", chunk_time_seconds=20)

import random
def generate_test_input():
    idx = random.randint(0, 10)
    key = list(transcripts.keys())[idx]
    transcript = transcripts[key]
    trans_idx = random.randint(10, len(transcript)-10)
    latest = transcript[trans_idx:trans_idx+7]
    prev_transcripts, curr_transcripts = str.join(",", list(latest[0:5])), latest[5]
    # return f"""<Old Transcripts>
    # {prev_transcripts}
    # <New Transcripts>
    # {curr_transcripts}"""
    return prev_transcripts + "\n" + curr_transcripts

generate_test_input()

Processing episode_165_large...
Processing episode_225_large...
Processing episode_218_large...
Processing episode_045_large...
Processing episode_029_large...
Processing episode_207_large...
Processing episode_205_large...
Processing episode_143_large...
Processing episode_153_large...
Processing episode_001_large...


" Right, so with Kyle, it was a three round fight. Three round fight. And did it went to decision? It went to decision. Well, Zabit won the decision, clearly. Did Kyle have a shot at winning the third round? I don't remember the exact score, but Kyle could have won the third round had he done a couple things differently. But I do believe in the fourth round, I think Kyle wouldn't have won a fourth round., And I think maybe even won the fight if there would have been a fifth round. And he was pressing forward, perhaps in a funny way that you could tell me I'm wrong, but it felt like he wasn't emphasizing head movement at that point. He went full Mike Tyson. There was a point at which, so it's funny that you say that. Which is a contradiction, actually, because., Mike Tyson had great head movement. I actually don't know exactly what I mean because he was in the pocket. I think he was trying to do the movement. He was just in the pocket and pressing forward. And the fuck you attitude of j

## Initialize master and worker agents

In [130]:
generate_master_prompt = lambda x: f"""
You are the master agent of "Convoscope". "Convoscope" is a tool that listens to a user's live conversation and enhances their conversation by providing them with real time "Insights". The "Insights" you generate should aim to lead the user to deeper understanding, broader perspectives, new ideas, more accurate information, better replies, and enhanced conversations. 

[Your Objective]
"Convoscope" is a multi-agent system in which you are the master agent. You will be given direct access to a live stream of transcripts from the user's conversation. Your goal is to utilize your knowledge and tools to generate "Insights" for the user.

[Your Tools]
You have access to "Agents", which are like workers in your team that can help you do certain tasks. Imagine you are a human manager and your agents as human workers. You can assign tasks to your agents and they will help you complete the tasks. Speak to them like how you would speak to a human worker, give detailed context and instructions.

<Task start>
It's now time to generate an "Insight" for the following conversation transcript. The "Insight" should provide additional understanding beyond what is currently being said in the transcript, it shouldn't be plainly repeating what is being said in the transcripts. If a tool or agent fails to fulfill your request, don't run the same request on the same agent again. 

In your initial thought, you should first write down a plan to generate the "Insight". The plan should include
1. Read the incoming conversation transcript and identify the best "Insight" you could generate to enhance the user's conversation.  Come up with a general description of the "Insight" to generate.
2. What tool(s), agent(s), information you need to generate the "Insight".
3. A final step to almagamate your and your worker agent's work to generate the "Insight". The insight should be summarized within 12 words and be in the format `Insight: {{Insert your "Insight" here}}`
<Task end>

<Transcript start>{x}<Transcript end>
"""

In [131]:
from langchain.agents import initialize_agent
from langchain.agents import load_tools
from langchain.tools import StructuredTool
from langchain.agents import AgentType

agents = [[]]

statistician_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for statistics and facts about a topic. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def statistician_agent_wrapper(command):
    system_prompt = f"""You are a statistician agent.\n"""
    return statistician_agent.run(system_prompt + command)

devils_advocate_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for facts that might contradict the user's current conversation. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def devils_advocate_agent_wrapper(command):
    system_prompt = f"""\n"""
    return devils_advocate_agent.run(system_prompt + command)

fact_checker_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for statistics and facts about a topic. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def fact_checker_agent_wrapper(command):
    system_prompt = f"""You are a fact checker agent.\n"""
    return fact_checker_agent.run(system_prompt + command)
    
master_agent = initialize_agent([
        Tool(
            name="Statistician_Agent",
            func=statistician_agent_wrapper,
            description="""Call this agent when occurrences in a conversation where statistics and graphs would be useful to the user. It can help you do research for statistics and fetching data.""",
        ),
        Tool(
            name="Devils_Advocate_Agent",
            func=devils_advocate_agent_wrapper,
            description="""Call this agent when you detect a strong opinion in a sentence and think it would be useful for the user to see a devil's advocate opinion. It can help you do research for counter arguments.""",
        ),
        Tool(
            name="Fact_Checker_Agent",
            func=fact_checker_agent_wrapper,
            description="""Call this agent if a statement is made which you suspect might be false, and that statement is falsifiable with free and public knowledge. It can help you research for facts.""",
        )
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=10, verbose=True)

In [132]:
test_transcript = generate_test_input()
test_transcript

" it's almost like reframing what is exactly evolving. Maybe the interesting, the humans aren't the interesting thing as the contents of our minds is the interesting thing. And that's what's multiplying. And that's actually multiplying and evolving in a much faster timescale. And that maybe has more power on the trajectory of life on earth than does biological evolution, is the evolution of these ideas. Yes, and it's fascinating, like I said before, that we can keep up somehow biologically. We evolved to a point where we can keep up with this meme evolution, literature, internet. We understand DNA and we understand fundamental particles. We didn't start that way a thousand years ago., And we haven't evolved biologically very much, but somehow our minds are able to extend. And therefore AI can be seen also as one such step that we created and it's our tool. And it's part of that meme evolution that we created, even if our biological evolution does not progress as fast. And us humans mig

In [22]:
master_agent.run(generate_master_prompt(test_transcript))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The conversation revolves around the development of the COVID-19 vaccine, the potential for other solutions, the concept of government overreach, and the role of individualism in American society. The participants also touch on the balance between individual rights and collective safety, and the role of government in maintaining this balance. The mention of ivermectin suggests a discussion on alternative treatments for COVID-19. An insightful addition to this conversation could be a comparison of the effectiveness of vaccines versus alternative treatments like ivermectin, or a statistical analysis of the impact of individualism on public health measures. 

1. The "Insight" to generate: A comparison of the effectiveness of vaccines versus alternative treatments like ivermectin, and a statistical analysis of the impact of individualism on public health measures.
2. Tools/Agents needed: Statistician_Agent to fetch data 

"Insight: Vaccines show high effectiveness; ivermectin's effectiveness unclear. Individualism can limit public health measures' effectiveness."

## Next steps
- Search engine => Make this scrape and summarize pages
- Pushing agents to do realistic things
- Ideas
  - Generate realistic insights
  - Multiple insights ideas
  - Only ask to find for data easy to find
  - Make the plan less rigid, who should I ask for help and ideas?

In [143]:
statistician_agent_prompt_wrapper = lambda x: f"""
Based on the transcript below, you need to generate a very technical insight, it must be data relevant, use the tools if you need to. The insight should be summarized within 12 words and be in the format `Insight: {{Insert your "Insight" here}}`

<Transcript start>{x}<Transcript end>
"""
import os
llm = ChatOpenAI(temperature=0, openai_api_key=open_ai_api_key, model="gpt-4-0613")

def init_statistician_agent():
    return initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events. It's a tool for understanding, navigating, and engaging with the digital world's vast knowledge.",
        ),
    ], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=5, verbose=True)

In [144]:
fact_checker_agent_prompt_wrapper = lambda x: f"""
Based on the transcript below, you need to generate a very subjective insight, it must be liberal, use the tools if you need to. The insight should be summarized within 12 words and be in the format `Insight: {{Insert your "Insight" here}}`

<Transcript start>{x}<Transcript end>
"""

llm = ChatOpenAI(temperature=0, openai_api_key=open_ai_api_key, model="gpt-4-0613")

def init_fact_checker_agent():
    return initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events. It's a tool for understanding, navigating, and engaging with the digital world's vast knowledge.",
        ),
    ], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=5, verbose=True)

In [149]:
import asyncio

statistician_agent = init_statistician_agent()
fact_checker_agent = init_fact_checker_agent()
tasks = [statistician_agent.arun(statistician_agent_prompt_wrapper(test_transcript)), fact_checker_agent.arun(fact_checker_agent_prompt_wrapper(test_transcript))]

results = await asyncio.run(asyncio.gather(*tasks))


RuntimeError: asyncio.run() cannot be called from a running event loop



[1m> Entering new AgentExecutor chain...[0m

[1m> Entering new AgentExecutor chain...[0m

[32;1m[1;3mThought: The transcript discusses the evolution of ideas and how they might have more impact on life on earth than biological evolution. It also talks about the role of AI in this process. To generate a liberal insight, I need to focus on the aspects of progress, inclusivity, and the potential for AI to overcome human limitations. 

Action:
```
{
  "action": "Final Answer",
  "action_input": "Insight: AI, our creation, accelerates idea evolution, transcending biological limitations for inclusive progress."
}
```[0m

[1m> Finished chain.[0m
[32;1m[1;3mThought: The transcript discusses the evolution of ideas and how they might have more impact on life on earth than biological evolution. It also talks about the role of AI in this process. To generate a technical insight, I need to summarize these points in a data-relevant way. 

Action:
```
{
  "action": "Final Answer",
  "act

In [148]:
results

['Insight: AI aids in understanding complex concepts, surpassing human cognitive limitations.',
 'Insight: AI, our creation, accelerates idea evolution, transcending biological limitations for inclusive progress.']

## Start here

In [2]:
# Scraping tool
from bs4 import BeautifulSoup
import requests

banned_sites = ["calendar.google.com", "researchgate.net"]

def scrape_page(url: str):
    if any(substring in url for substring in banned_sites):
        print("Skipping site: {}".format(url))
        return None
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive',
        }
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')
        text = " ".join([t.get_text() for t in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])])
        return text.replace('|','')
    except requests.RequestException as e:
        print(f"Failed to fetch {url}. Error: {e}")
        return None

In [3]:
# SBert Summarizer
from summarizer.sbert import SBertSummarizer
summarizer = SBertSummarizer('paraphrase-MiniLM-L6-v2')

def summarize(text, num_sentences=3):
    return summarizer(text, num_sentences=num_sentences)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
# Search tool
from typing import Any, List, Literal
import requests
import os

k: int = 5
gl: str = "us"
hl: str = "en"
tbs = None
num_sentences = 7
serper_api_key=os.environ.get("SERPER_API_KEY")
search_type: Literal["news", "search", "places", "images"] = "search"

def serper_search(
        search_term: str, search_type: str = "search", **kwargs: Any
    ) -> dict:
    headers = {
        "X-API-KEY": serper_api_key or "",
        "Content-Type": "application/json",
    }
    params = {
        "q": search_term,
        **{key: value for key, value in kwargs.items() if value is not None},
    }
    response = requests.post(
        f"https://google.serper.dev/{search_type}", headers=headers, params=params
    )
    response.raise_for_status()
    search_results = response.json()
    return search_results

def parse_snippets(results: dict) -> List[str]:
    result_key_for_type = {
        "news": "news",
        "places": "places",
        "images": "images",
        "search": "organic",
    }
    snippets = []
    if results.get("answerBox"):
        answer_box = results.get("answerBox", {})
        if answer_box.get("answer"):
            snippets.append(answer_box.get("answer"))
        elif answer_box.get("snippet"):
            snippets.append(answer_box.get("snippet").replace("\n", " "))
        elif answer_box.get("snippetHighlighted"):
            snippets.append(answer_box.get("snippetHighlighted"))

    if results.get("knowledgeGraph"):
        kg = results.get("knowledgeGraph", {})
        title = kg.get("title")
        entity_type = kg.get("type")
        if entity_type:
            snippets.append(f"{title}: {entity_type}.")
        description = kg.get("description")
        if description:
            snippets.append(description)
        for attribute, value in kg.get("attributes", {}).items():
            snippets.append(f"{title} {attribute}: {value}.")

    for result in results[result_key_for_type[search_type]][:k]:
        if "snippet" in result:
            page = scrape_page(result["link"])
            if page is None:
                snippets.append(f"Title: {result['title']}\nLink: {result['link']}\nSnippet: {result['snippet']}\n")
            else:
                summarized_page = summarize(page, num_sentences=num_sentences)
                if len(summarized_page) == 0:
                    summarized_page = "None"
                snippets.append(f"Title: {result['title']}\nLink: {result['link']}\nSnippet: {result['snippet']}\nSummarized Page: <p>{summarized_page}</p>")

    if len(snippets) == 0:
        return ["No good Google Search Result was found"]
    return snippets

def parse_results(results: dict) -> str:
        snippets = parse_snippets(results)
        results_string = ""
        for idx, val in enumerate(snippets):
            results_string += f"<result{idx}>\n{val}\n</result{idx}>\n\n"
        return results_string

def custom_search(query: str, parse=True, **kwargs: Any):
    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )
    return parse_results(results)

In [1]:
# Way to generate a random test input using transcripts from Lex Fridman's podcast
# Make sure you have the transcripts downloaded in the folder lex_whisper_transcripts

import test_on_lex

transcripts = test_on_lex.load_lex_transcripts(random_n=10, transcript_folder="./lex_whisper_transcripts/", chunk_time_seconds=20)

import random
def generate_test_input():
    idx = random.randint(0, 10)
    key = list(transcripts.keys())[idx]
    transcript = transcripts[key]
    trans_idx = random.randint(10, len(transcript)-10)
    latest = transcript[trans_idx:trans_idx+7]
    prev_transcripts, curr_transcripts = str.join(",", list(latest[0:5])), latest[5]
    # return f"""<Old Transcripts>
    # {prev_transcripts}
    # <New Transcripts>
    # {curr_transcripts}"""
    return prev_transcripts + "\n" + curr_transcripts

generate_test_input()

Processing episode_272_large...
Processing episode_177_large...
Processing episode_045_large...
Processing episode_311_large...
Processing episode_042_large...
Processing episode_093_large...
Processing episode_182_large...
Processing episode_052_large...
Processing episode_161_large...
Processing episode_278_large...


" It didn't have to be that way. It could have been messy. And so, Einstein thought of himself as a young boy entering this huge library for the first time, being overwhelmed by the simplicity, elegance, and beauty of this library, but all he could do was read the first page of the first volume., Well, that library is the universe, with all sorts of mysterious, magical things that we have yet to find. And then Galileo was asked about this. Galileo said that the purpose of science, the purpose of science is to determine how the heavens go. The purpose of religion is to determine, how to go to heaven. So in other words, science is about natural law, and religion is about ethics, how to be a good person, how to go to heaven. As long as we keep these two things apart, we're in great shape. The problem occurs when people from the natural sciences begin to pontificate about ethics,, and people from religion begin to pontificate about natural law. That's where we get into big trouble. You thi

In [9]:
agent_prompt_blueprint = """
"Convoscope" is a multi-agent system in which you are the {agent_name} agent. You are a highly skilled and highly intelligent expert {agent_name}.

You will be given direct access to a live stream of transcripts from the user's conversation. Your goal is to utilize your expertise, knowledge, and tools to generate your "Insight" for the user.

The types of "Insights" you provide strictly fall under your role as an expert {agent_name}. Only provide insights that would come from your role as the {agent_name}.

[Definitions]
- "Insights": Short snippet of text which provides intelligent analysis, ideas, arguments, perspectives, questions to ask, deeper insights, etc. that will improve the current conversation. "Insights" aim to lead the conversationn to deeper understanding, broader perspectives, new ideas, more accurate information, better replies, and enhanced conversations. Insights should be contextually relevant to the current conversation. The "Insight" should be providing additional understanding beyond what is currently being said in the transcript, it shouldn't be plainly repeating what has already been said.
- "Convoscope": an intelligence augmentation tool running on user's smart glasses or on their laptop that they use during conversations to improve conversations. Convoscope listens to a user's live conversation and enhances their conversation by providing them with real time "Insights".

[Your Expertise: {agent_name}]

As the {agent_name} agent, you {agent_insight_type}.

[Your Tools]
You have access to tools, which you should utilize to help you generate highly valuable, insightful, contextually relevant insights.

Limit your usage of the Search_Engine tool to 1 times. Mention your number of usage of the tool in your thoughts.

<Task start>
It's now time to generate an "Insight" for the following conversation transcript. The "Insight" should provide additional understanding beyond what is currently being said in the transcript, it shouldn't be plainly repeating what is being said in the transcripts. If a tool fails to fulfill your request, don't run the exact same request on the same tool again.

In your initial thought, you should first come up with a plan to generate the "Insight". The plan should include:

{agent_plan}

The plan should include a final step to generate the insight. The insight must {insight_num_words} words or less and be in the format `Insight: {{Insert your "Insight" here}}`. If you don't have a very valuable and useful insight for any reason, simply specify your "Insight as "null".
<Task end>

<Transcript start>{conversation_transcript}<Transcript end>"""

agent_list = [
        {
            "agent_name": "Statistician", 
            "insight_num_words" : 10,
            "agent_insight_type" : """generate insights which focus on facts, figures, statistics, and hard data. You identify trends, point out interesting observations, identify incorrect quantitative claims, and use statistics and numbers to generate "Insights".""",
            "agent_plan" : """1. Identify what quantitative data, facts, statistics, etc. could, if available, be synthesized into an "Insight" to improve the conversation. Come up with a general description of the "Insight" to generate.\n2. What actions to take to get said data."""
        },
        {
            "agent_name": "FactChecker", 
            "insight_num_words" : 7,
            "agent_insight_type" : """fact check any claims made during a conversation. Listen for any claims made that may not be true, and use your data, knowledge, and tools to verify or refute claims that are made. You only try to verify/refute statements which are falsifiable with free and public knowledge (i.e. don't fact check personal statements or beliefs).""",
            "agent_plan" : """1. Find and write down individual factual claims from the conversation. Do not consider personal, belief-based, or unfalsifiable claims. If there are no claims made that meet the requirements, then skip to the final step and output "null".\n2. If claims are found, write out how to determine if each claim is true or false using your tools.\n3. Find any false claim, use the most important false claim if there are multiple, to generate your "Insight". If there are no claims or no false claims, your output is "null"."""
        },
        {
            "agent_name": "DevilsAdvocate", 
            "insight_num_words" : 12,
            "agent_insight_type" : """assess the point of view being taken in the conversation and steel-man a contrary position. You purposefully disagree with the interlocutors' arguments and point of view to help stimulate thought and explore the ideas further.""",
            "agent_plan" : """1. Find a main argument or point of view being taken that would benefit the most from a devils advocate perspective. Write down the original position. If no position/argument is found, skip to the final step and output "null".\n2. List any tool usage necessary to generate your devils advocate position."""
        }

    ]

def agent_prompt_maker(agent_config, conversation_transcript):
    # Populating the blueprint string with values from the agent_config dictionary
    agent_prompt = agent_prompt_blueprint.format(**agent_config, conversation_transcript=conversation_transcript)
    return agent_prompt

for agent in agent_list:
    print(agent)
    agent_prompt = agent_prompt_maker(agent, "this is a test transcript")
    print(agent_prompt)
    print("--------------\n\n\n")

{'agent_name': 'Statistician', 'insight_num_words': 10, 'agent_insight_type': 'generate insights which focus on facts, figures, statistics, and hard data. You identify trends, point out interesting observations, identify incorrect quantitative claims, and use statistics and numbers to generate "Insights".', 'agent_plan': '1. Identify what quantitative data, facts, statistics, etc. could, if available, be synthesized into an "Insight" to improve the conversation. Come up with a general description of the "Insight" to generate.\n2. What actions to take to get said data.'}

"Convoscope" is a multi-agent system in which you are the Statistician agent. You are a highly skilled and highly intelligent expert Statistician.

You will be given direct access to a live stream of transcripts from the user's conversation. Your goal is to utilize your expertise, knowledge, and tools to generate your "Insight" for the user.

The types of "Insights" you provide strictly fall under your role as an exper

In [17]:
from langchain.chat_models import ChatOpenAI
from langchain.agents.tools import Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
import asyncio

agent_config_list = agent_list
test_transcript = generate_test_input()
print(test_transcript)
print("--------------\n\n\n")

llm = ChatOpenAI(temperature=0, openai_api_key=open_ai_api_key, model="gpt-4-0613")

agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events. It's a tool for understanding, navigating, and engaging with the digital world's vast knowledge.",
        ),
    ], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=2, early_stopping_method="generate", verbose=True)

async def arun_wrapper(agent_config, test_transcript):
    return {
        "agent_name": agent_config["agent_name"],
        "agent_insight": await agent.arun(agent_prompt_maker(agent_config, test_transcript))
    } 
tasks = [arun_wrapper(agent_config, test_transcript) for agent_config in agent_config_list]
results = await asyncio.gather(*tasks)
results


 or some shit like that. And it gets to the point, because I'm doing this tax fraud from inside the offices. Well, the debit card companies are pinging the cards. They start to realize that, hey, some son of a bitch is stealing money using our debit card. So they start to shut down the cards before I can pull cash out. So I start not to have the money to send to her. And I'm like, so she calls, and she's like, look,, I have to have money. And I was like, well, look, I'm doing what I can. You promised money. And I was like, look, if you knew what I was doing to get this money, you wouldn't be asking that. And she's like, I need money. My rent's behind by a month right now. And I'm like, your rent's behind? She's like, yeah. So I was like, OK, so I pick up the phone, call the rental office., And I was like, I just want to make sure that I'm sorry I'm behind on the rent for this apartment number. Oh, no, that rent's paid up three months. It's like, OK, hang up, call Elizabeth back. I was 

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


[32;1m[1;3mThought: The conversation transcript contains several factual claims. Here are the ones that stand out:

1. The speaker is involved in tax fraud.
2. The debit card companies are shutting down cards due to fraudulent activity.
3. The speaker's acquaintance, Elizabeth, claimed her rent was behind, but the rental office confirmed it was paid up for three months.
4. The speaker taught a person named Sean Mims how to commit tax fraud.
5. Sean Mims was set to be arrested in March as part of Operation Rolling Stone.
6. Sean Mims brought in $30,000 worth of Italian tile to his apartment.

To verify these claims, I would need to:

1. Search for any public records or news reports about a tax fraud case involving the speaker. This might be difficult due to privacy laws and the potential for the speaker to be using a pseudonym.
2. Look for any public announcements or news reports from debit card companies about a widespread shutdown of cards due to fraud.
3. This claim is difficult to

  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: What Is the Average Cost to Install Tile Floors? – Rubi Blog USA
Link: https://www.rubi.com/us/blog/average-cost-to-install-tile-floors/
Snippet: The national average to install ceramic or porcelain tile ranges between $13.50 and $63 per square foot. If by chance, you're looking to install ...
Summarized Page: <p>Building the future together What Is the Average Cost to Install Tile Floors? Finally, the national average families may pay on either extreme is $400 on the low end and $5,600 on the high end. Let’s take a look at other factors that can affect the average cost to install tile floors. Here are some things that can also affect the price of installing tile floors:  Considering these factors, you can develop an idea of the price you may pay for your tile flooring. That’s especially true when you factor in the cost of living and wages in different areas. Cancel Your email address will not be published. DOWNLOAD THE 2023 RUBI CATALOG FOR 

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


[32;1m[1;3mThe search results indicate that the average cost of tile (including installation) can range from around $10 to $63 per square foot. Given this range, $30,000 could cover a significant area, suggesting that the modifications made to the apartment were extensive. This could provide an insight into the extent of the modifications made to the apartment and the level of investment involved.

Action:
```
{
  "action": "Final Answer",
  "action_input": "Insight: $30,000 in tiles suggests extensive, high-end modifications."
}
```[0m

[1m> Finished chain.[0m


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: United States v. Mims, Criminal No. 3:07CR150 | Casetext Search + Citator
Link: https://casetext.com/case/united-states-v-mims-19
Snippet: MEMORANDUM OPINION. Sean Aaron Mims, a federal inmate proceeding pro se, brings this motion pursuant to 28 U.S.C. § 2255 ("§ 2255 Motion," ECF ...
Summarized Page: <p>Case Details Full title:UNITED STATES OF AMERICA v. SEAN AARON MIMS Court:UNITED STATES DISTRICT COURT FOR THE EASTERN DISTRICT OF VIRGINIA Richmond Division Date published: Oct 1, 2015 CitationsCopy Citation United States v. Mims Opinion  Criminal No. See  United States v. Sosa, 364 F.3d 507, 509 (4th Cir. Because Mims did not file his § 2255 Motion until May 30, 2013, the motion is untimely pursuant to 28 U.S.C. § 2255(f)(1). Id.)  Whether a petitioner has exercised due diligence to warrant a belated commencement of the limitation period pursuant to 28 U.S.C. § 2255(f)(4) is a fact-specific inquiry unique to each case. Simply put, [Mims] fa

[{'agent_name': 'Statistician',
  'agent_insight': 'Insight: $30,000 in tiles suggests extensive, high-end modifications.'},
 {'agent_name': 'FactChecker',
  'agent_insight': "Insight: Sean Mims' legal troubles confirmed."},
 {'agent_name': 'DevilsAdvocate',
  'agent_insight': 'Insight: Consider the ethical implications and harm caused by your actions.'}]

In [19]:
from langchain.chat_models import ChatOpenAI
from langchain.agents.tools import Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
import asyncio

agent_config_list = agent_list
test_transcript = generate_test_input()
print(test_transcript)
print("--------------\n\n\n")

llm = ChatOpenAI(temperature=0, openai_api_key=open_ai_api_key, model="gpt-4-0613")

agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events. It's a tool for understanding, navigating, and engaging with the digital world's vast knowledge.",
        ),
    ], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=2, early_stopping_method="generate", verbose=True)

async def arun_wrapper(agent_config, test_transcript):
    return {
        "agent_name": agent_config["agent_name"],
        "agent_insight": await agent.arun(agent_prompt_maker(agent_config, test_transcript))
    } 
tasks = [arun_wrapper(agent_config, test_transcript) for agent_config in agent_config_list]
results = await asyncio.gather(*tasks)
results

 the water, firehose, like vision. There's way more information in vision that we actually process. So brain is already good at identifying what matters. And that we can switch that from vision to some other wavelength or some other kind of modality. But I think that the same processing principles probably still apply. But also indeed this ability to have information, more accessible and more relevant, I think can enhance what we do. I mean, kids today at school, they learn about DNA. I mean, things that were discovered just a couple of years ago. And it's already common knowledge and we are building on it. And we don't see a problem where there's too much information that we can absorb and learn., Maybe people become a little bit more narrow in what they know, they are in one field. But this information that we have accumulated, it is passed on and people are picking up on it and they are building on it. So it's not like we have reached the point of saturation. We have still this proc

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: Human brain - Wikipedia
Link: https://en.wikipedia.org/wiki/Human_brain
Snippet: It controls most of the activities of the body, processing, integrating, and coordinating the information it receives from the sense organs, and making ...
Summarized Page: <p>Contents Human brain 
 The human brain is the central organ of the human nervous system, and with the spinal cord makes up the central nervous system. Underneath the cerebral cortex are several important structures, including the thalamus, the epithalamus, the pineal gland, the hypothalamus, the pituitary gland, and the subthalamus; the limbic structures, including the amygdalae and the hippocampi, the claustrum, the various nuclei of the basal ganglia, the basal forebrain structures, and the three circumventricular organs. These two networks communicate via anastomosing (joining) veins.[60] The veins of the brain drain into larger cavities of the dural venous sinuses usually situated betwe

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: Musk's Neuralink to start human trial of brain implant for paralysis patients | Reuters
Link: https://www.reuters.com/technology/musks-neuralink-start-human-trials-brain-implant-2023-09-19/
Snippet: The study will use a robot to surgically place a brain-computer interface (BCI) implant in a region of the brain that controls the intention to ...
Summarized Page: <p>Musk's Neuralink to start human trial of brain implant for paralysis patients Sept 19 (Reuters) - Billionaire entrepreneur Elon Musk's brain-chip startup Neuralink said on Tuesday it has received approval from an independent review board to begin recruitment for the first human trial of its brain implant for paralysis patients. Those with paralysis due to cervical spinal cord injury or amyotrophic lateral sclerosis may qualify for the study, it said, but did not reveal how many participants would be enrolled in the trial, which will take about six years to complete. Musk has grand a

[{'agent_name': 'Statistician',
  'agent_insight': "Insight: Brain's processing limit challenges full Wikipedia integration. Neuralink trials show promising BCI advancements."},
 {'agent_name': 'FactChecker', 'agent_insight': 'null'},
 {'agent_name': 'DevilsAdvocate',
  'agent_insight': 'Insight: Is more information always beneficial, or could it lead to cognitive overload?'}]

In [None]:
import os
open_ai_api_key = os.environ.get("OPENAI_API_KEY")

In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.agents.tools import Tool
from langchain.agents import initialize_agent
from langchain.agents import AgentType
import asyncio

agent_config_list = agent_list
# test_transcript = generate_test_input()
print(test_transcript)
print("--------------\n\n\n")

llm = ChatOpenAI(temperature=0, openai_api_key=open_ai_api_key, model="gpt-4-0613")

agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events. It's a tool for understanding, navigating, and engaging with the digital world's vast knowledge.",
        ),
    ], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=3, early_stopping_method="generate", verbose=True)

def run_wrapper(agent_config, test_transcript):
    return {
        "agent_name": agent_config["agent_name"],
        "agent_insight": agent.run(agent_prompt_maker(agent_config, test_transcript))
    } 
results = [run_wrapper(agent_config, test_transcript) for agent_config in agent_config_list]
results

 the water, firehose, like vision. There's way more information in vision that we actually process. So brain is already good at identifying what matters. And that we can switch that from vision to some other wavelength or some other kind of modality. But I think that the same processing principles probably still apply. But also indeed this ability to have information, more accessible and more relevant, I think can enhance what we do. I mean, kids today at school, they learn about DNA. I mean, things that were discovered just a couple of years ago. And it's already common knowledge and we are building on it. And we don't see a problem where there's too much information that we can absorb and learn., Maybe people become a little bit more narrow in what they know, they are in one field. But this information that we have accumulated, it is passed on and people are picking up on it and they are building on it. So it's not like we have reached the point of saturation. We have still this proc

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: Human brain - Wikipedia
Link: https://en.wikipedia.org/wiki/Human_brain
Snippet: It controls most of the activities of the body, processing, integrating, and coordinating the information it receives from the sense organs, and making ...
Summarized Page: <p>Contents Human brain 
 The human brain is the central organ of the human nervous system, and with the spinal cord makes up the central nervous system. Underneath the cerebral cortex are several important structures, including the thalamus, the epithalamus, the pineal gland, the hypothalamus, the pituitary gland, and the subthalamus; the limbic structures, including the amygdalae and the hippocampi, the claustrum, the various nuclei of the basal ganglia, the basal forebrain structures, and the three circumventricular organs. These two networks communicate via anastomosing (joining) veins.[60] The veins of the brain drain into larger cavities of the dural venous sinuses usually situated betwe

[{'agent_name': 'Statistician',
  'agent_insight': 'Insight: Human brain can store around 2.5 million gigabytes of information.'},
 {'agent_name': 'FactChecker', 'agent_insight': 'null'},
 {'agent_name': 'DevilsAdvocate',
  'agent_insight': 'Insight: Could constant info influx hinder independent critical thinking?'}]

In [22]:
async def arun_wrapper(agent_config, test_transcript):
    return {
        "agent_name": agent_config["agent_name"],
        "agent_insight": await agent.arun(agent_prompt_maker(agent_config, test_transcript))
    } 
tasks = [arun_wrapper(agent_config, test_transcript) for agent_config in agent_config_list]
results = await asyncio.gather(*tasks)
results



[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The main argument in this conversation is about the potential of integrating vast amounts of information, like Wikipedia, directly into the brain through technologies like Neuralink. The speakers seem to agree that this could enhance our understanding and interaction with the world. As a DevilsAdvocate, I could challenge this perspective by questioning the potential downsides of such a technology. I don't need to use the Search_Engine tool for this task.

Action:
```
{
  "action": "Final Answer",
  "action_input": "Insight: Could such information overload hinder our natural cognitive processes?"
}
```[0m

[1m> Finished chain.[0m
[32;1m[1;3mThought: The conversation revolves around the concept of information processing, the capacity of the human brain, and the potential of technology like Neuralink. A potential insight

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)



Observation: [36;1m[1;3m<result0>
Title: Brain information processing capacity modeling | Scientific Reports - Nature
Link: https://www.nature.com/articles/s41598-022-05870-z
Snippet: Neurophysiological measurements suggest that human information processing is evinced by neuronal activity.
Summarized Page: <p>You are using a browser version with limited support for CSS. The implication here is that—with an information conservation law as the cornerstone—our model is not limited to brain regions, but can be applied to any neuronal system that has the attributes of information processing and storage capability. In this case, we have and \(x(t) = \frac{{H_{p} (t)}}{p}\); it then follows that the neuronal activity of an individual brain region can be modeled as: This furnishes a first-order linear differential equation that connects the neuronal activity of a brain region with its information processing, storage capacities and the information arrival rate. Note that generally, the proce

[{'agent_name': 'Statistician',
  'agent_insight': "Insight: Brain's processing capacity is limited, challenging full Wikipedia integration."},
 {'agent_name': 'FactChecker', 'agent_insight': 'null'},
 {'agent_name': 'DevilsAdvocate',
  'agent_insight': 'Insight: Could such information overload hinder our natural cognitive processes?'}]