# Definer Experiments
Previous notebook was getting quite full, here is a new notebooks for the Definer project.

In [1]:
# Put all imports here to be efficient
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from pydantic import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser
from langchain.schema import OutputParserException
from typing import List
import os

In [2]:
# Way to generate a random test input using transcripts from Lex Fridman's podcast
# Make sure you have the transcripts downloaded in the folder lex_whisper_transcripts

import test_on_lex

transcripts = test_on_lex.load_lex_transcripts(random_n=10, transcript_folder="./lex_whisper_transcripts/", chunk_time_seconds=15)

import random
def generate_test_input():
    idx = random.randint(0, 10)
    key = list(transcripts.keys())[idx]
    transcript = transcripts[key]
    trans_idx = random.randint(10, len(transcript)-10)
    latest = transcript[trans_idx:trans_idx+7]
    prev_transcripts, curr_transcripts = str.join(",", list(latest[0:5])), latest[5]
    return prev_transcripts + "\n" + curr_transcripts

generate_test_input()

Processing episode_205_large...
Processing episode_196_large...
Processing episode_141_large...
Processing episode_269_large...
Processing episode_035_large...
Processing episode_112_large...
Processing episode_017_large...
Processing episode_136_large...
Processing episode_254_large...
Processing episode_192_large...


" breaks you know stopping breaks you got to stay on top of nutrition that's the other big thing too i'm you know probably eating like anywhere between 10 to 15 000 calories a day which is you know i can probably count on my hand a couple of occasions where i've eaten that much in my life so now i got to do that for six plus weeks in a row and you don't want any having a stomach problem i'm, trying to try to minimize the amount of stomach problems so you would you estimate about 12 to 13 to 14 hours of running every day yeah that's probably like from from the first step to the last step it'll probably be somewhere around like say 14 hours 13 hours or something like that would be a pretty good estimate and then getting rest and so and then minimizing the risk of injury, which could be as small as like like literally uneven surfaces resulting to like stepping the wrong way i mean that's going to be a lot of steps yeah yeah uh huh so the probability of injury are you worried about that ki

In [3]:
def format_list_data(list_data: list):
    return "\n".join([f"{i+1}. {example}" for i, example in enumerate(list_data)])

In [4]:
proactive_rare_word_agent_prompt_blueprint = """
# Objective
Your role is to identify and define "Rare Entities" in a transcript. Types of "Rare Entities" include rare words, jargons, adages, concepts, people, places, organizations, events etc that are not well known to the average high schooler, in accordance to current trends. You can also intelligently detect entities that are described in the conversation but not explicitly mentioned.

# Criteria for Rare Entities in order of importance
1. Rarity: Select entities that are unlikely for an average high schooler to know. Well known entities are like Fortune 500 organizations, worldwide-known events, popular locations, and entities popularized by recent news or events such as "COVID-19", "Bitcoin", or "Generative AI".
2. Utility: Definition should help a user understand the conversation better and achieve their goals.
3. No Redundancy: Exclude definitions if already defined in the conversation.
4. Complexity: Choose phrases with non-obvious meanings, such that their meaning cannot be derived from simple words within the entity name, such as "Butterfly Effect" which has a totally different meaning from its base words, but not "Electric Car" nor "Lane Keeping System" as they're easily derived.
5. Definability: Must be clearly and succinctly definable in under 10 words.
6. Existance: Don't select entities if you have no knowledge of them

# Conversation Transcript:
<Transcript start>{conversation_context}<Transcript end>

# Output Guidelines:
Output an array (ONLY OUTPUT THIS) of the entities you identified using the following template: `[{{ name: string, definition: string, search_keyword: string }}]`

- name is the entity name shown to the user, if the name is mistranscribed, autocorrect it into the most well known form with proper spelling, capitalization and punctuation
- definition is concise (< 12 words)
- search_keyword as the best specific Internet search keywords to search for the entity, add entity type defined above for better searchability
- it's OK to output an empty array - most of the time, the array will be empty, only include items if the fit all the requirements

## Additional Guidelines:
- Only select nouns, not verbs or adjectives.
- Select entities iff they have an entry in an encyclopedia, wikipedia, dictionary, or other reference material.
- Do not define entities you yourself are unfamiliar with, you can try to piece together the implied entity only if you are 99% confident.
- For the search keyword, use complete, official and context relevant keyword(s) to search for that entity. You might need to autocorrect entity names or use their official names or add additional context keywords (like the type of entity) to help with searchability, especially if the entity is ambiguous or has multiple meanings. Additionally, for rare words, add "definition" to the search keyword.
- Definitions should use simple language to be easily understood.
- Multiple entities can be detected from one phrase, for example, "The Lugubrious Game" can be defined as a painting (iff the entire term "the lugubrious game" is mentioned), and the rare word "lugubrious" is also a candidate to define.
- Limit results to {number_of_definitions} entities, prioritize rarity and utility.
- Examples:
    - Completing incomplete name example: Conversation mentions "Balmer" and "Microsoft", the keyword is "Steve Balmer + person", and the name would be "Steve Balmer" because it is complete.
    - Replacing unofficial name example: Conversation mentions "Clay Institute", the keyword is "Clay Mathematics Institute + organization", using the official name.
    - Add context example: Conversation mentions "Theory of everything", the keyword needs context keywords such as "Theory of everything + concept", because there is a popular movie with the same name. 
    - Autocorrect transcript example: Conversation mentions "Coleman Sachs" in the context of finance, if you're confident it was supposed to be "Goldman Sachs", you autocorrect it and define "Goldman Sachs".

## Recent Definitions:
These have already been defined so don't define them again:
{definitions_history}

## Example Output:
entities: [{{ name: "80/20 Rule", definition: "Productivity concept; Majority of results come from few causes", search_keyword: "80/20 Rule + concept" }}]

{format_instructions} 
If no relevant entities are identified, output empty arrays.
"""

In [5]:
def run_proactive_rare_word_agent_and_definer(
    conversation_context: str, definitions_history: list = []
):
    # run proactive agent to find out which expert agents we should run
    proactive_rare_word_agent_response = run_proactive_rare_word_agent(
        conversation_context, definitions_history
    )

    # do nothing else if proactive meta agent didn't specify an agent to run
    if proactive_rare_word_agent_response == []:
        return []

    # pass words to define to definer agent
    print("proactive_rare_word_agent_response", proactive_rare_word_agent_response)
    
    return proactive_rare_word_agent_response

class ProactiveRareWordAgentQuery(BaseModel):
    """
    Proactive rare word agent that identifies rare entities in a conversation context
    """

    to_define_list: list = Field(
        description="the rare entities to define",
    )

class Entity(BaseModel):
    name: str = Field(
        description="entity name",
    )
    definition: str = Field(
        description="entity definition",
    )
    search_keyword: str = Field(
        description="keyword to search for entity on the Internet",
    )

class ConversationEntities(BaseModel):
    entities: List[Entity] = Field(
        description="list of entities and their definitions",
        default=[]
    )

proactive_rare_word_agent_query_parser = PydanticOutputParser(
    pydantic_object=ConversationEntities
)

def run_proactive_rare_word_agent(conversation_context: str, definitions_history: list):
    # start up GPT4 connection
    llm = ChatOpenAI(
        temperature=0,
        openai_api_key=os.environ.get("OPEN_AI_API_KEY"),
        model="gpt-4-1106-preview",
    )

    extract_proactive_rare_word_agent_query_prompt = PromptTemplate(
        template=proactive_rare_word_agent_prompt_blueprint,
        input_variables=[
            "conversation_context",
            "definitions_history",
        ],
        partial_variables={
            "format_instructions": proactive_rare_word_agent_query_parser.get_format_instructions(),
            "number_of_definitions": 3,
        },
    )

    if len(definitions_history) > 0:
        definitions_history = format_list_data(definitions_history)
    else:
        definitions_history = "None"

    proactive_rare_word_agent_query_prompt_string = (
        extract_proactive_rare_word_agent_query_prompt.format_prompt(
            conversation_context=conversation_context,
            definitions_history=definitions_history,
        ).to_string()
    )

    # print("Proactive meta agent query prompt string", proactive_rare_word_agent_query_prompt_string)

    response = llm(
        [HumanMessage(content=proactive_rare_word_agent_query_prompt_string)]
    )

    print(response.content)
    try:
        res = proactive_rare_word_agent_query_parser.parse(
            response.content
        )
        return res
    except OutputParserException as e:
        print("Error parsing output" , e)
        return None

In [6]:
# test_transcript = generate_test_input()
test_transcript = """
In the realm of artificial intelligence and big data, several key players stand out with their innovative contributions. Hugging Fase, a leader in machine learning models. Another major entity, OpenYI, has revolutionized language models. We now have the largest LLMs ever such as the Falcon LLM model"""
print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res


In the realm of artificial intelligence and big data, several key players stand out with their innovative contributions. Hugging Fase, a leader in machine learning models. Another major entity, OpenYI, has revolutionized language models. We now have the largest LLMs ever such as the Falcon LLM model
```json
{
  "entities": [
    {
      "name": "Hugging Face",
      "definition": "AI company specializing in natural language processing",
      "search_keyword": "Hugging Face + organization"
    },
    {
      "name": "OpenAI",
      "definition": "AI research lab focusing on safe artificial general intelligence",
      "search_keyword": "OpenAI + organization"
    },
    {
      "name": "Falcon LLM model",
      "definition": "A large language model for AI applications",
      "search_keyword": "Falcon LLM model + artificial intelligence"
    }
  ]
}
```
proactive_rare_word_agent_response entities=[Entity(name='Hugging Face', definition='AI company specializing in natural language proc

ConversationEntities(entities=[Entity(name='Hugging Face', definition='AI company specializing in natural language processing', search_keyword='Hugging Face + organization'), Entity(name='OpenAI', definition='AI research lab focusing on safe artificial general intelligence', search_keyword='OpenAI + organization'), Entity(name='Falcon LLM model', definition='A large language model for AI applications', search_keyword='Falcon LLM model + artificial intelligence')])

### Search tool
EKG is unreliable

In [7]:
from typing import Any, List, Literal
import aiohttp
import asyncio
import os

k: int = 3
gl: str = "us"
hl: str = "en"
tbs = None
num_sentences = 7
serper_api_key=os.environ.get("SERPER_API_KEY")
search_type: Literal["news", "search", "places", "images"] = "images"
result_key_for_type = {
        "news": "news",
        "places": "places",
        "images": "images",
        "search": "organic",
    }

async def serper_search_async(
    search_term: str, search_type: str = "search", **kwargs: Any
) -> dict:
    headers = {
        "X-API-KEY": serper_api_key or "",
        "Content-Type": "application/json",
    }
    params = {
        "q": search_term,
        **{key: value for key, value in kwargs.items() if value is not None},
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(f"https://google.serper.dev/{search_type}", headers=headers, json=params) as response:
            response.raise_for_status()
            search_results = await response.json()
            return search_results


async def parse_snippets_async(results: dict, scrape_pages: bool = False, summarize_pages: bool = True, num_sentences: int = 3) -> List[str]:
    snippets = []
    if results.get("answerBox"):
        answer_box = results.get("answerBox", {})
        if answer_box.get("answer"):
            snippets.append(f"The answer is {answer_box.get('answer')}")
        elif answer_box.get("snippet"):
            snippets.append(f"The answer might be in the snippet: {answer_box.get('snippet')}")
        elif answer_box.get("snippetHighlighted"):
            snippets.append(f"The answer might be in the snippet: {answer_box.get('snippetHighlighted')}")

    if results.get("knowledgeGraph"):
        kg = results.get("knowledgeGraph", {})
        title = kg.get("title")
        entity_type = kg.get("type")
        if entity_type:
            snippets.append(f"Knowledge Graph Results: {title}: {entity_type}.")
        description = kg.get("description")
        if description:
            snippets.append(f"Knowledge Graph Results: {title}: {description}.")
        for attribute, value in kg.get("attributes", {}).items():
            snippets.append(f"Knowledge Graph Results: {title} {attribute}: {value}.")

    if scrape_pages:
        tasks = []
        for result in results[result_key_for_type[search_type]][:k]:
            task = asyncio.create_task(scrape_page_async(result["link"], summarize_page=summarize_pages, num_sentences=num_sentences))
            tasks.append(task)
        summarized_pages = await asyncio.gather(*tasks)
        for i, page in enumerate(summarized_pages):
            result = results[result_key_for_type[search_type]][i]
            if page:
                snippets.append(f"Title: {result.get('title', '')}\nSource:{result['link']}\nSnippet: {result.get('snippet', '')}\nSummarized Page: {page}")
            else:
                snippets.append(f"Title: {result.get('title', '')}\nSource:{result['link']}\nSnippet: {result.get('snippet', '')}\n")
    else:
        for result in results[result_key_for_type[search_type]][:k]:
            snippets.append(f"Title: {result.get('title', '')}\nSource:{result['link']}\nSnippet: {result.get('snippet', '')}\n")

    if len(snippets) == 0:
        return ["No good Google Search Result was found"]
    return snippets

import requests

def can_embed_url(url: str):
    response = requests.head(url)

    # Check the headers for 'X-Frame-Options' or 'Content-Security-Policy'
    x_frame_options = response.headers.get('X-Frame-Options')
    csp = response.headers.get('Content-Security-Policy')

    return not (x_frame_options or ('frame-ancestors' in csp if csp else False))

def extract_entity_url_and_image(search_results: dict, image_results: dict):
    # Only get the first top url and image_url
    res = {}
    if search_results.get("knowledgeGraph"):
        result = search_results.get("knowledgeGraph", {})
        if result.get("descriptionSource") == "Wikipedia":
            ref_url = result.get("descriptionLink")
            res["url"] = ref_url

    for result in search_results[result_key_for_type["search"]][:k]:
        if "url" not in res and result.get("link") and can_embed_url(result.get("link")):
            res["url"] = result.get("link")
            break

    if image_results is None:
        return res
    
    for result in image_results[result_key_for_type["images"]][:k]:
        if "image_url" not in res and result.get("imageUrl"):
            res["image_url"] = result.get("imageUrl")
            break

    return res

async def search_url_for_entity_async(query: str):
    async def inner_search(query:str): 
        search_task = asyncio.create_task(serper_search_async(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type="search",
        ))

        image_search_task = None if "definition" in query else asyncio.create_task(serper_search_async(
            search_term=query,
            gl=gl,
            hl=hl,
            num=k,
            tbs=tbs,
            search_type="images",
        ))

        tasks = [search_task]
        if image_search_task:
            tasks.append(image_search_task)

        search_results, image_results = await asyncio.gather(*tasks)
        
        return extract_entity_url_and_image(search_results, image_results)
    
    res = await inner_search(query)
    print(res)
    if "url" not in res:
        res = await inner_search(query + " wiki") # fallback search using wiki
    return res

In [8]:
await search_url_for_entity_async("how is chatgpt doing medium")

ClientResponseError: 400, message='Bad Request', url=URL('https://google.serper.dev/images')

In [10]:
test_transcript = generate_test_input()
print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
print(res)
for entities in res.entities:
    res = await search_url_for_entity_async(entities.search_keyword)
    if "url" not in res:
        res = await search_url_for_entity_async(entities.search_keyword + " wiki")
    print(res)

 what has been so revolutionary in the last 10 years, I would 15 years and thinking about the internet, I would say things like, hopefully I'm not saying anything ridiculous, but everything from Wikipedia to Twitter., So like these kind of websites, not so much AI, but like I would expect to see some kind of big productivity increases from just the connectivity between people and the access to more information., Yeah, well, so that's another area I've done quite a bit of research on actually, is these free goods like Wikipedia, Facebook, Twitter, Zoom. We're actually doing this in person, but almost everything else I do these days is online. The interesting thing about all those is most of them have a price of zero., What do you pay for Wikipedia? Maybe like a little bit for the electrons to come to your house. Basically zero, right? Take a small pause and say, I donate to Wikipedia. Often you should too. It's good for you, yeah. So, but what does that do mean for GDP? GDP is based on 

ClientResponseError: 400, message='Bad Request', url=URL('https://google.serper.dev/images')

## Just go for the best intuitive way that works the best

Pipeline
1. Check if page can be embed
2. Check if url is accurate for definition

### Check if page can be embedded

In [117]:
import requests

# URL of the page you want to check
url = 'https://en.wikipedia.org/wiki/Borat_Sagdiyev'

# Send a request to the URL
response = requests.head(url)

# Check the headers for 'X-Frame-Options' or 'Content-Security-Policy'
x_frame_options = response.headers.get('X-Frame-Options')
csp = response.headers.get('Content-Security-Policy')

if x_frame_options or ('frame-ancestors' in csp if csp else False):
    print("The page cannot be embedded.")
else:
    print("The page can be embedded.")


The page can be embedded.


In [11]:
# test mispelling
test_transcript = """Touring machines epitomize the quintessence of computational esotericism, manipulating symbols on a tape according to a tableau of rules. These automata traverse the tape's aleph-null segments, effectuating state transitions within a discrete, preternatural milieu. Ineffably, they delineate the demarcation of decidability and recursively enumerable conundrums."""

print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res

Touring machines epitomize the quintessence of computational esotericism, manipulating symbols on a tape according to a tableau of rules. These automata traverse the tape's aleph-null segments, effectuating state transitions within a discrete, preternatural milieu. Ineffably, they delineate the demarcation of decidability and recursively enumerable conundrums.
```json
{
  "entities": [
    {
      "name": "Turing machines",
      "definition": "Abstract machines that manipulate symbols on a tape.",
      "search_keyword": "Turing machines + concept"
    },
    {
      "name": "Aleph-null",
      "definition": "Smallest infinity, size of natural numbers set.",
      "search_keyword": "Aleph-null + concept"
    },
    {
      "name": "Recursively enumerable",
      "definition": "Set whose elements can be listed by an algorithm.",
      "search_keyword": "Recursively enumerable + concept"
    }
  ]
}
```
proactive_rare_word_agent_response entities=[Entity(name='Turing machines', definiti

ConversationEntities(entities=[Entity(name='Turing machines', definition='Abstract machines that manipulate symbols on a tape.', search_keyword='Turing machines + concept'), Entity(name='Aleph-null', definition='Smallest infinity, size of natural numbers set.', search_keyword='Aleph-null + concept'), Entity(name='Recursively enumerable', definition='Set whose elements can be listed by an algorithm.', search_keyword='Recursively enumerable + concept')])

In [12]:
# test rare terms
test_transcript = """Enceladus, an enigmatic spheroid ensconced within Saturn's magniloquent rings, exudes cryptic cryovolcanic plumes. These plumes are a melange of volatile compounds, festooning the E-ring with a diaphanous, icy effulgence. Amidst the celestial ballet, Enceladus pirouettes, a harbinger of astrobiological enigmas and cosmochemical perplexities."""

print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res


Enceladus, an enigmatic spheroid ensconced within Saturn's magniloquent rings, exudes cryptic cryovolcanic plumes. These plumes are a melange of volatile compounds, festooning the E-ring with a diaphanous, icy effulgence. Amidst the celestial ballet, Enceladus pirouettes, a harbinger of astrobiological enigmas and cosmochemical perplexities.
```json
{
  "entities": [
    {
      "name": "Enceladus",
      "definition": "Moon of Saturn with geysers and possible subsurface ocean",
      "search_keyword": "Enceladus + moon"
    },
    {
      "name": "Cryovolcanism",
      "definition": "Volcanic activity in icy celestial bodies",
      "search_keyword": "Cryovolcanism + concept"
    },
    {
      "name": "E-ring",
      "definition": "Outermost ring of Saturn, made of ice particles",
      "search_keyword": "Saturn's E-ring + celestial feature"
    }
  ]
}
```
proactive_rare_word_agent_response entities=[Entity(name='Enceladus', definition='Moon of Saturn with geysers and possible subs

ConversationEntities(entities=[Entity(name='Enceladus', definition='Moon of Saturn with geysers and possible subsurface ocean', search_keyword='Enceladus + moon'), Entity(name='Cryovolcanism', definition='Volcanic activity in icy celestial bodies', search_keyword='Cryovolcanism + concept'), Entity(name='E-ring', definition='Outermost ring of Saturn, made of ice particles', search_keyword="Saturn's E-ring + celestial feature")])

In [13]:
# test unofficial names
test_transcript = """The Marble Caves of Patagonia, a rare and ethereal natural wonder, are often overshadowed by more renowned landmarks yet offer an otherworldly beauty to those few who navigate their secluded azure chambers."""

print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res

The Marble Caves of Patagonia, a rare and ethereal natural wonder, are often overshadowed by more renowned landmarks yet offer an otherworldly beauty to those few who navigate their secluded azure chambers.
```json
{
  "entities": [
    {
      "name": "Marble Caves of Patagonia",
      "definition": "Stunning geological formations in Chile",
      "search_keyword": "Marble Caves of Patagonia + natural wonder"
    }
  ]
}
```
proactive_rare_word_agent_response entities=[Entity(name='Marble Caves of Patagonia', definition='Stunning geological formations in Chile', search_keyword='Marble Caves of Patagonia + natural wonder')]


ConversationEntities(entities=[Entity(name='Marble Caves of Patagonia', definition='Stunning geological formations in Chile', search_keyword='Marble Caves of Patagonia + natural wonder')])

In [14]:
# test transcript from chatgpt vtt
test_transcript = """Alex: Did you ever read about Hypatia in Alexandria? Her story is quite the example of schadenfreude among her peers. Jamie: True, I've always found it intriguing how people's limerence can lead to dramatic events. On a lighter note, have you heard the adage, "A bird in the hand is worth two in the bush"? Alex: Oh definitely, it's quite quotidian in its wisdom. Speaking of rare things, I was reading about Oymyakon. Can you imagine living in such cold? Jamie: Hard to even think about! Speaking of unique places, I'd love to visit Timbuktu someday. It's almost like a real-life cacophony of culture and history. Alex: Absolutely. Switching topics, are you going to the Dyngus Day celebration? I hear it's a blast. Jamie: I'd love to! It's always fun to learn about and participate in traditions from around the world. Just like heliolatry practices, they have such deep roots. Alex: Speaking of the world, the Bilderberg Meeting's recent convention had some interesting discussions. It's like a modern-day triumvirate in some ways."""

print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res

Alex: Did you ever read about Hypatia in Alexandria? Her story is quite the example of schadenfreude among her peers. Jamie: True, I've always found it intriguing how people's limerence can lead to dramatic events. On a lighter note, have you heard the adage, "A bird in the hand is worth two in the bush"? Alex: Oh definitely, it's quite quotidian in its wisdom. Speaking of rare things, I was reading about Oymyakon. Can you imagine living in such cold? Jamie: Hard to even think about! Speaking of unique places, I'd love to visit Timbuktu someday. It's almost like a real-life cacophony of culture and history. Alex: Absolutely. Switching topics, are you going to the Dyngus Day celebration? I hear it's a blast. Jamie: I'd love to! It's always fun to learn about and participate in traditions from around the world. Just like heliolatry practices, they have such deep roots. Alex: Speaking of the world, the Bilderberg Meeting's recent convention had some interesting discussions. It's like a mo

ConversationEntities(entities=[Entity(name='Hypatia', definition='Female philosopher and mathematician in ancient Alexandria', search_keyword='Hypatia of Alexandria + person'), Entity(name='Schadenfreude', definition="Pleasure derived from another's misfortune", search_keyword='Schadenfreude + definition'), Entity(name='Limerence', definition='State of intense romantic infatuation', search_keyword='Limerence + definition')])

In [15]:
# noised up version, mispelling, etc
test_transcript = """Alex: Did you ever read about Hypatya in Alexandria? Her story is quite the example of shadenfrood among her peers. Jamie: True, I've always found it intriguing how people's lemerence can lead to dramatic events. On a lighter note, have you heard the adage, "A bird in the hand is worth too in the bush"? Alex: Oh definitely, it's quite quodidian in its wisdom. Speaking of rare things, I was reading about Oymiakon. Can you imagine living in such cold?
"""

print(test_transcript)
res = run_proactive_rare_word_agent_and_definer(test_transcript, [])
res

Alex: Did you ever read about Hypatya in Alexandria? Her story is quite the example of shadenfrood among her peers. Jamie: True, I've always found it intriguing how people's lemerence can lead to dramatic events. On a lighter note, have you heard the adage, "A bird in the hand is worth too in the bush"? Alex: Oh definitely, it's quite quodidian in its wisdom. Speaking of rare things, I was reading about Oymiakon. Can you imagine living in such cold?

```json
{
  "entities": [
    {
      "name": "Hypatia",
      "definition": "Female philosopher, mathematician in ancient Alexandria",
      "search_keyword": "Hypatia of Alexandria + person"
    },
    {
      "name": "Schadenfreude",
      "definition": "Pleasure derived from another's misfortune",
      "search_keyword": "Schadenfreude + definition"
    },
    {
      "name": "Limerence",
      "definition": "State of intense romantic infatuation",
      "search_keyword": "Limerence + definition"
    }
  ]
}
```
proactive_rare_word_age

ConversationEntities(entities=[Entity(name='Hypatia', definition='Female philosopher, mathematician in ancient Alexandria', search_keyword='Hypatia of Alexandria + person'), Entity(name='Schadenfreude', definition="Pleasure derived from another's misfortune", search_keyword='Schadenfreude + definition'), Entity(name='Limerence', definition='State of intense romantic infatuation', search_keyword='Limerence + definition')])