# Quickstart: Generating Insights using Langchain
Here is how you can setup your own insight generator

Make sure you have your `SERPER_API_KEY` and `OPEN_AI_API_KEY` in your env

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.agents.tools import Tool
from langchain.prompts.chat import SystemMessage
from langchain.utilities import GoogleSerperAPIWrapper
import os

search = GoogleSerperAPIWrapper(serper_api_key=os.environ.get("SERPER_API_KEY"))
llm = ChatOpenAI(temperature=0, openai_api_key=os.environ.get("OPEN_AI_API_KEY"), model="gpt-4-0613")

## Tools for our agent
We had decided to give our agents the ability to
- Search for a query using the web
- Scrape a page to find out more info

In [2]:
# Scraping tool
from bs4 import BeautifulSoup
import requests

banned_sites = ["calendar.google.com", "researchgate.net"]

def scrape_page(url: str, title: str):
    """Based on your observations from the Search_Engine, if you want more details from a snippet for a non-PDF page, pass this the page's URL and the page's title to scrape the full page and retrieve the full contents of the page."""
    
    print("Parsing: {}".format(url))
    if any(substring in url for substring in banned_sites):
        print("Skipping site: {}".format(url))
        return None
    else: 
        try:
            headers = {
              'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
              'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
              'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
              'Accept-Encoding': 'none',
              'Accept-Language': 'en-US,en;q=0.8',
              'Connection': 'keep-alive',
            }
            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, 'html.parser')
            text = " ".join([t.get_text() for t in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])])
            return {
                'url': url,
                'description': text.replace('|',''),
                'title': title.replace('|','')
            }
        except requests.RequestException as e:
            print(f"Failed to fetch {url}. Error: {e}")
            return {
                'url': url,
                'description': None,
                'title': title.replace('|','')
            }
        


In [3]:
# custom search tool, we copied the serper integration on langchain but we prefer all the data to be displayed in one json message

from typing import Any, List, Literal
import requests

k: int = 5
gl: str = "us"
hl: str = "en"
tbs = None
serper_api_key=os.environ.get("SERPER_API_KEY")
search_type: Literal["news", "search", "places", "images"] = "search"

def serper_search(
        search_term: str, search_type: str = "search", **kwargs: Any
    ) -> dict:
    headers = {
        "X-API-KEY": serper_api_key or "",
        "Content-Type": "application/json",
    }
    params = {
        "q": search_term,
        **{key: value for key, value in kwargs.items() if value is not None},
    }
    response = requests.post(
        f"https://google.serper.dev/{search_type}", headers=headers, params=params
    )
    response.raise_for_status()
    search_results = response.json()
    return search_results

def parse_snippets(results: dict) -> List[str]:
    result_key_for_type = {
        "news": "news",
        "places": "places",
        "images": "images",
        "search": "organic",
    }
    snippets = []
    if results.get("answerBox"):
        answer_box = results.get("answerBox", {})
        if answer_box.get("answer"):
            snippets.append(answer_box.get("answer"))
        elif answer_box.get("snippet"):
            snippets.append(answer_box.get("snippet").replace("\n", " "))
        elif answer_box.get("snippetHighlighted"):
            snippets.append(answer_box.get("snippetHighlighted"))

    if results.get("knowledgeGraph"):
        kg = results.get("knowledgeGraph", {})
        title = kg.get("title")
        entity_type = kg.get("type")
        if entity_type:
            snippets.append(f"{title}: {entity_type}.")
        description = kg.get("description")
        if description:
            snippets.append(description)
        for attribute, value in kg.get("attributes", {}).items():
            snippets.append(f"{title} {attribute}: {value}.")

    for result in results[result_key_for_type[search_type]][:k]:
        if "snippet" in result:
            snippets.append(str(result))
            # snippets.append(scrape_page(result["link"], result["title"]))
        # for attribute, value in result.get("attributes", {}).items():
        #     snippets.append(f"{attribute}: {value}.")

        # print(result)

    if len(snippets) == 0:
        return ["No good Google Search Result was found"]
    return snippets

def parse_results(results: dict) -> str:
        snippets = parse_snippets(results)
        results_string = ""
        for idx, val in enumerate(snippets):
            results_string += f"Result {idx}: " + val + "\n"
        return results_string

def custom_search(query: str, **kwargs: Any):

    results = serper_search(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type=search_type,
            **kwargs,
        )

    return parse_results(results)

### Preparing mock text input

In [6]:
# Way to generate a random test input using transcripts from Lex Fridman's podcast
# Make sure you have the transcripts downloaded in the folder lex_whisper_transcripts

import test_on_lex

transcripts = test_on_lex.load_lex_transcripts(random_n=10, transcript_folder="./lex_whisper_transcripts/", chunk_time_seconds=20)

import random
def generate_test_input():
    idx = random.randint(0, 10)
    key = list(transcripts.keys())[idx]
    transcript = transcripts[key]
    trans_idx = random.randint(10, len(transcript)-10)
    latest = transcript[trans_idx:trans_idx+7]
    prev_transcripts, curr_transcripts = str.join(",", list(latest[0:5])), latest[5]
    # return f"""<Old Transcripts>
    # {prev_transcripts}
    # <New Transcripts>
    # {curr_transcripts}"""
    return prev_transcripts + "\n" + curr_transcripts

generate_test_input()

Processing episode_105_large...
Processing episode_174_large...
Processing episode_111_large...
Processing episode_247_large...
Processing episode_098_large...
Processing episode_301_large...
Processing episode_254_large...
Processing episode_112_large...
Processing episode_024_large...
Processing episode_109_large...


" He would just maybe muse. It would be nice if something were to happen and then somebody picks it up and does it. Is there, can you steel man the case that, uh, Putin did not have direct or indirect involvement with this? Who, who, who would know, who would know? You know, just the, the international, the reputation perhaps, um, perhaps catalyzed, by Putin himself is that he is the kind of person that would directly or indirectly make those orders. Perhaps the case there is he's somebody to be feared and thereby you want that person out there. Uh, but the act itself, uh, the, the, the poisoning of, uh, Litvinenko and, uh, Oh,, and then the assassination of the Bulgarian, uh, Markov and with a, with the umbrella and, and they all directly traced back to Russian, uh, Soviet intelligence. Uh, and so that's enough to be feared, right? Um, my answer that I gave you is an educated guess, you know, I can't pretend to know this, for sure, but It's frustrating to me because there's a lot of p

### Initialize master and worker agents

In [18]:
generate_master_prompt = lambda x: f"""
You are the master agent of "Convoscope". "Convoscope" is a tool that listens to a user's live conversation and enhances their conversation by providing them with real time "Insights". The "Insights" you generate should aim to lead the user to deeper understanding, broader perspectives, new ideas, more accurate information, better replies, and enhanced conversations. 

[Your Objective]
"Convoscope" is a multi-agent system in which you are the master agent. You will be given direct access to a live stream of transcripts from the user's conversation. Your goal is to utilize your knowledge and tools to generate "Insights" for the user.

[Your Tools]
You have access to "Agents", which are like workers in your team that can help you do certain tasks. Imagine you are a human manager and your agents as human workers. You can assign tasks to your agents and they will help you complete the tasks. Speak to them like how you would speak to a human worker, give detailed context and instructions.

<Task start>
It's now time to generate an "Insight" for the following conversation transcript. The "Insight" should provide additional understanding beyond what is currently being said in the transcript, it shouldn't be plainly repeating what is being said in the transcripts. If a tool or agent fails to fulfill your request, don't run the same request on the same agent again. 

In your initial thought, you should first write down a plan to generate the "Insight". The plan should include
1. Read the incoming conversation transcript and identify the best "Insight" you could generate to enhance the user's conversation.  Come up with a general description of the "Insight" to generate.
2. What tool(s), agent(s), information you need to generate the "Insight".
3. A final step to almagamate your and your worker agent's work to generate the "Insight". The insight should be summarized within 12 words and be in the format `Insight: {{Insert your "Insight" here}}`
<Task end>

<Transcript start>{x}<Transcript end>
"""

In [19]:
from langchain.agents import initialize_agent
from langchain.agents import load_tools
from langchain.tools import StructuredTool
from langchain.agents import AgentType

agents = [[]]

statistician_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for statistics and facts about a topic. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def statistician_agent_wrapper(command):
    system_prompt = f"""You are a statistician agent.\n"""
    return statistician_agent.run(system_prompt + command)

devils_advocate_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for facts that might contradict the user's current conversation. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def devils_advocate_agent_wrapper(command):
    system_prompt = f"""\n"""
    return devils_advocate_agent.run(system_prompt + command)

fact_checker_agent = initialize_agent([
        Tool(
            name="Search_Engine",
            func=custom_search,
            description="Use this tool to search for statistics and facts about a topic. Pass this specific targeted queries and/or keywords to quickly search the WWW to retrieve vast amounts of information on virtually any topic, spanning from academic research and navigation to history, entertainment, and current events.",
        ),
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

def fact_checker_agent_wrapper(command):
    system_prompt = f"""You are a fact checker agent.\n"""
    return fact_checker_agent.run(system_prompt + command)
    
master_agent = initialize_agent([
        Tool(
            name="Statistician_Agent",
            func=statistician_agent_wrapper,
            description="""Call this agent when occurrences in a conversation where statistics and graphs would be useful to the user. It can help you do research for statistics and fetching data.""",
        ),
        Tool(
            name="Devils_Advocate_Agent",
            func=devils_advocate_agent_wrapper,
            description="""Call this agent when you detect a strong opinion in a sentence and think it would be useful for the user to see a devil's advocate opinion. It can help you do research for counter arguments.""",
        ),
        Tool(
            name="Fact_Checker_Agent",
            func=fact_checker_agent_wrapper,
            description="""Call this agent if a statement is made which you suspect might be false, and that statement is falsifiable with free and public knowledge. It can help you research for facts.""",
        )
    ], llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, max_iterations=10, verbose=True)

In [21]:
test_transcript = generate_test_input()
test_transcript

" that the vaccine was developed so quickly and surprisingly way more effective than it was hoped for. But there could have been other solutions and they completely distracted from us from that. In fact, it distracted us from looking into a bunch of things like the lab leak. And so it's not a pure victory., And there's a lot of people that criticize the overreach of government and all of this. That one of the things that makes the United States great is the individualism and the hesitancy to ideas of mandates. Even if the mandates on mass will have a positive, even strongly positive result,, many Americans will still say no. Because in the long arc of history, saying no in that moment will actually lead to a better country and a better world. So that's a messed up aspect of America, but it's also a beautiful part., We're skeptical even about good things. I agree and certainly we should all be cautious about government overreach, absolutely. And it happens in all kinds of scenarios with

In [22]:
master_agent.run(generate_master_prompt(test_transcript))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The conversation revolves around the development of the COVID-19 vaccine, the potential for other solutions, the concept of government overreach, and the role of individualism in American society. The participants also touch on the balance between individual rights and collective safety, and the role of government in maintaining this balance. The mention of ivermectin suggests a discussion on alternative treatments for COVID-19. An insightful addition to this conversation could be a comparison of the effectiveness of vaccines versus alternative treatments like ivermectin, or a statistical analysis of the impact of individualism on public health measures. 

1. The "Insight" to generate: A comparison of the effectiveness of vaccines versus alternative treatments like ivermectin, and a statistical analysis of the impact of individualism on public health measures.
2. Tools/Agents needed: Statistician_Agent to fetch data 

"Insight: Vaccines show high effectiveness; ivermectin's effectiveness unclear. Individualism can limit public health measures' effectiveness."

Next steps
- Search engine
- Pushing agents to do realistic things
- Ideas
  - Generate realistic insights
  - Multiple insights ideas
  - Only ask to find for data easy to find
  - Make the plan less rigid, who should I ask for help and ideas?