In [2]:
from typing import List, Dict, Any, Annotated
from typing_extensions import TypedDict
import datetime
import pprint
from langchain_core.messages import AIMessage, HumanMessage

pp = pprint.pprint

# class HumanMessage:
#     def __init__(self, content: str):
#         self.content = content

# class AIMessage:
#     def __init__(self, content: str):
#         self.content = content

print("Set up done")

Set up done


In [3]:
class State(TypedDict):
    messages: List[Any]
    user_info: Dict[str, Any]
    search_results: List[Dict[str, Any]]
    fetched_pages: List[Dict[str, Any]]
    summmaries: List[Dict[str, Any]]
    analysis: Dict[str, Any]

state: State = {
    "messages": [HumanMessage(content= "Analyze competitor AcmeCorp pricing and features")],
    "user_info": {},
    "search_results": [],
    "fetched_pages": [],
    "summaries": [],
    "analysis": {}
}

pp(state)

{'analysis': {},
 'fetched_pages': [],
 'messages': [HumanMessage(content='Analyze competitor AcmeCorp pricing and features', additional_kwargs={}, response_metadata={})],
 'search_results': [],
 'summaries': [],
 'user_info': {}}


In [4]:
def search_web(state: State) -> Dict[str, Any]:
    """
    Pretend to search the web for the user's query.
    Instead of calling a real API, we return a fixed list of 2 fake results
    """
    query = state['messages'][-1].content.strip()

    results = [
        {"url": "https://example.com/competitor-news", "title": "Competitor News"},
        {"url": "https://example.com/pricing", "title": "Competitor Pricing"},
    ]

    state['search_results'] = results

    ai_msg = AIMessage(content=f"Found {len(results)} for '{query}'")

    return {"messages": [ai_msg], **state}

out = search_web(state)
print(out['messages'][0].content)
pp(out['search_results'])

Analyze competitor AcmeCorp pricing and features
[{'title': 'Competitor News', 'url': 'https://example.com/competitor-news'},
 {'title': 'Competitor Pricing', 'url': 'https://example.com/pricing'}]


In [5]:
import time

FETCH_CACHE = {}

def fetch_pages(state: State) -> Dict[str, Any]:
    """
    Simulates fetching the content of each 'search_results' URL.
    Creates a tiny HTML snippet for each URL and marks whether it came from cache or simulated fetch.
    """
    fetched = []
    for r in state.get('search_results', []):
        url = r['url']
        title = r['title']

        if url in FETCH_CACHE:
            content = FETCH_CACHE[url]
            source = "cache"
        else:
            content = f"<html><body><h1>{title}</h1><p>Sample content from {url}</p></body></html>"
            FETCH_CACHE[url] = content
            source = "fetched"
            time.sleep(0.1)

        fetched.append({
            "url": url,
            "title": title,
            "html": content[:500],
            "source": source
        })
    state['fetched_pages'] = fetched
    ai_msg = AIMessage(content=f"Fetched {len(fetched)} pages (simulated).")
    return {"messages": [ai_msg], **state}

out2 = fetch_pages(out)
print(out2['messages'][0].content)
pp(out2['fetched_pages'])
    




Analyze competitor AcmeCorp pricing and features
[{'html': '<html><body><h1>Competitor News</h1><p>Sample content from '
          'https://example.com/competitor-news</p></body></html>',
  'source': 'fetched',
  'title': 'Competitor News',
  'url': 'https://example.com/competitor-news'},
 {'html': '<html><body><h1>Competitor Pricing</h1><p>Sample content from '
          'https://example.com/pricing</p></body></html>',
  'source': 'fetched',
  'title': 'Competitor Pricing',
  'url': 'https://example.com/pricing'}]


In [7]:
def summarize_pages(state: State) -> Dict[str, Any]:
    """
    Read state['fetched_pages'] and produce a tiny 1-line summary for each page.
    This keeps things simple and deterministic for learning in Jupyter.
    """
    fetched = state.get('fetched_pages', [])
    summaries = []

    for page in fetched:
        title = page.get('title', 'No title')
        url = page.get('url', '')

        summary_text = f"{title} - short summary (source: {url.split('//')[-1][:40]})"
        summaries.append({"url": url, "summary": summary_text})
    state['summaries'] = summaries

    ai_msg = AIMessage(content=f"created {len(summaries)} summaries (simulated).")
    return {"messages": [ai_msg], **state}
out3 = summarize_pages(out2)
print(out3['messages'][0].content)
from pprint import pprint; pprint(out3['summaries'])

Analyze competitor AcmeCorp pricing and features
[{'summary': 'Competitor News - short summary (source: '
             'example.com/competitor-news)',
  'url': 'https://example.com/competitor-news'},
 {'summary': 'Competitor Pricing - short summary (source: example.com/pricing)',
  'url': 'https://example.com/pricing'}]


In [None]:
def analyze_competitor(state: State) -> Dict[str, Any]:
    """
    Combine all summaries from state['summaries'] into a simple analysis text.
    For learning, this is a fake analysis: we just stitch summaries together.
    """
    summaries = state.get('summaries', [])
    if not summaries:
        analysis_text = "No summaries availble for analysis."
    else:
        analysis_text = "Competitor Analysis Report:\n"
        for s in summaries:
            analysis_text += f"- From "