# Email Research Assistant


## Imports

In [3]:
import requests
import json
import os
from pydantic import BaseModel
from typing import List, Dict, Any
from langchain_core.prompts import ChatPromptTemplate
import os
import pathlib
from bs4 import BeautifulSoup
import re
from bs4 import BeautifulSoup
import re
from langchain_openai import ChatOpenAI
from pydantic import Field
from typing import Literal
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain.schema import HumanMessage, AIMessage
from __future__ import print_function
import sib_api_v3_sdk
from sib_api_v3_sdk.rest import ApiException
# from IPython.display import Markdown, display, HTML
# from llm_factor import get_llm

In [6]:
with open("../.env", "r") as f:
    for line in f:
        key, value = line.strip().split("=")
        os.environ[key] = value


In [None]:
# Uses defaults from .env
# llm = get_llm()
#
# # Or override if needed
# custom_llm = get_llm(provider="anthropic", model_name="claude-3-opus")

## Searching Google & Filtering Results

In [7]:
search_terms = ["Agentic AI", "OpenAI LinkedIn", "Agentic AI Enterprise"]

In [8]:
class ResultRelevance(BaseModel):
    explanation: str
    id: str

class RelevanceCheckOutput(BaseModel):
    relevant_results: List[ResultRelevance]

def search_serper(search_query):
    url = "https://google.serper.dev/search"
    
    payload = json.dumps({
        "q": search_query,
        "gl": "gb", 
        "num": 30,
        "tbs": "qdr:d"
    })

    headers = {
        'X-API-KEY': '58670c52d6dbd47c4c094dd01556874d28ea3e6e',
        'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)
    results = json.loads(response.text)
    results_list = results.get('organic', [])

    all_results = []
    for id, result in enumerate(results_list, 1):
        result_dict = {
            'title': result['title'],
            'link': result['link'],
            'snippet': result['snippet'],
            'search_term': search_query,
            'id': id
        }
        all_results.append(result_dict)
    return all_results


# Read and parse .env file manually
with open('../.env', 'r') as f:
    for line in f:
        if line.strip():
            key, value = line.strip().split('=', 1)
            os.environ[key] = value

def load_prompt(prompt_name):
    with open(f"../prompts/{prompt_name}.md", "r") as file:
        return file.read()


def check_search_relevance(search_results: Dict[str, Any]) -> RelevanceCheckOutput:
    """
    Analyze search results and determine the most relevant ones.
    
    Args:
        search_results: Dictionary containing search results to analyze
        
    Returns:
        RelevanceCheckOutput containing the most relevant results and explanation
    """
    prompt = load_prompt("relevance_check")
    
    prompt_template = ChatPromptTemplate.from_messages([
        ("system", prompt)
    ])

    llm = ChatOpenAI(model="gpt-4.1-mini").with_structured_output(RelevanceCheckOutput)
    llm_chain = prompt_template | llm
    
    return llm_chain.invoke({'input_search_results': search_results})



relevant_results = []
for search_term in search_terms:
    python_results = search_serper(search_term)
    results = check_search_relevance(python_results)
    
    # Get the relevant result IDs from the LLM output
    relevant_ids = [r.id for r in results.relevant_results]
    
    # Filter original results to only include those with matching IDs
    filtered_results = [r for r in python_results if str(r['id']) in relevant_ids]
    
    relevant_results.extend(filtered_results)
  


In [9]:
len(relevant_results)

15

## Scrape & Convert to Markdown

In [10]:
def convert_html_to_markdown(html_content):
    # Create BeautifulSoup object
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Convert common HTML elements to markdown
    
    # Headers
    for h in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        level = int(h.name[1])
        h.replace_with('#' * level + ' ' + h.get_text() + '\n\n')
    
    # Links
    for a in soup.find_all('a'):
        href = a.get('href', '')
        text = a.get_text()
        if href and text:
            a.replace_with(f'[{text}]({href})')
    
    # Bold
    for b in soup.find_all(['b', 'strong']):
        b.replace_with(f'**{b.get_text()}**')
    
    # Italic
    for i in soup.find_all(['i', 'em']):
        i.replace_with(f'*{i.get_text()}*')
    
    # Lists
    for ul in soup.find_all('ul'):
        for li in ul.find_all('li'):
            li.replace_with(f'- {li.get_text()}\n')
    
    for ol in soup.find_all('ol'):
        for i, li in enumerate(ol.find_all('li'), 1):
            li.replace_with(f'{i}. {li.get_text()}\n')
    
    # Get text and clean up
    text = soup.get_text()
    
    # Remove excess whitespace/newlines
    text = re.sub(r'\n\s*\n', '\n\n', text)
    text = text.strip()
    
    return text

SCRAPING_API_KEY = os.environ.get("SCRAPING_API_KEY")
if not SCRAPING_API_KEY:
    raise EnvironmentError("SCRAPING_API_KEY not found in environment variables.")

def scrape_and_save_markdown(relevant_results):
    """
    Scrapes HTML content from URLs in relevant_results and saves as markdown files.
    
    Args:
        relevant_results: List of dictionaries containing search results with URLs
        
    Returns:
        List of dictionaries containing markdown content and metadata
    """
    # Create scraped_html directory if it doesn't exist
    pathlib.Path("../scraped_markdown").mkdir(exist_ok=True)

    markdown_contents = []
    for result in relevant_results:
        if 'link' in result:
            payload = {
                "api_key": SCRAPING_API_KEY,
                "url": result['link'],
                "render_js": "true"
            }

            response = requests.get("https://scraping.narf.ai/api/v1/", params=payload)
            if response.status_code == 200:
                # Create filename from ID or URL if ID not available
                filename = f"{result.get('id', hash(result['link']))}.md"
                filepath = os.path.join("../scraped_markdown", filename)
                
                # Convert HTML to markdown
                markdown_content = convert_html_to_markdown(response.content.decode())
                
                # Save markdown content to file
                with open(filepath, 'w', encoding='utf-8') as f:
                    f.write(markdown_content)
                
                markdown_contents.append({
                    'url': result['link'],
                    'filepath': filepath,
                    'markdown': markdown_content,
                    'title': result.get('title', ''),
                    'id': result.get('id', '')
                })
            else:
                print(f"Failed to fetch {result['link']}: Status code {response.status_code}")

    print(f"Successfully downloaded and saved {len(markdown_contents)} pages as markdown to scraped_markdown/")
    return markdown_contents

markdown_contents = scrape_and_save_markdown(relevant_results)

Successfully downloaded and saved 15 pages as markdown to scraped_markdown/


In [12]:
markdown_contents

[{'url': 'https://www.reddit.com/r/dotnet/comments/1jytpnv/agentic_ai_coding_and_net_am_i_missing_something/',
  'filepath': '../scraped_markdown/5.md',
  'markdown': 'Agentic AI coding and .NET - am I missing something? : r/dotnet\n\n[\nSkip to main content\n\n](#main-content)\n\nAgentic AI coding and .NET - am I missing something? : r/dotnet\n\n### We value your privacy\n\n          Reddit and its partners use cookies and similar technologies to provide you with a better experience. By accepting all cookies, you agree to our use of cookies to deliver and maintain our services and site, improve the quality of Reddit, personalize Reddit content and advertising, and measure the effectiveness of advertising. By rejecting non-essential cookies, Reddit may still use certain cookies to ensure the proper functionality of our platform.\n          For more information, please see our\n        [Cookie Notice](https://reddit.com/en-us/policies/cookies)\n        and our\n        [Privacy Policy](

## Generate Summaries of Markdown

In [13]:
def generate_summaries(markdown_contents):
    """
    Generates summaries for markdown content using GPT-4.
    
    Args:
        markdown_contents: List of dictionaries containing markdown content and metadata
        
    Returns:
        List of dictionaries containing summaries and URLs
    """
    # Create markdown_summaries directory if it doesn't exist
    pathlib.Path("../markdown_summaries").mkdir(exist_ok=True)

    # Load the summary prompt
    summary_prompt = load_prompt("summarise_markdown_page")

    # Create prompt template
    summary_template = ChatPromptTemplate.from_messages([
        ("system", summary_prompt)
    ])

    # Initialize LLM
    llm = ChatOpenAI(model="gpt-4.1-mini")
    summary_chain = summary_template | llm

    # Generate and save summaries
    summaries = []
    for content in markdown_contents:
        try:
            # Generate summary, limiting to first 2000 words
            summary = summary_chain.invoke({
                'markdown_input': ' '.join(content['markdown'].split()[:2000])
            })
            
            # Create filename for summary
            summary_filename = f"summary_{content['id']}.md"
            summary_filepath = os.path.join("../markdown_summaries", summary_filename)
            
            # Save summary to file
            with open(summary_filepath, 'w', encoding='utf-8') as f:
                f.write(summary.content)
            
            # Add to summaries list
            summaries.append({
                'markdown_summary': summary.content,
                'url': content['url']
            })
                
        except Exception as e:
            print(f"Failed to summarize {content['filepath']}: {str(e)}")

    print(f"Successfully generated summaries for {len(markdown_contents)} pages in markdown_summaries/")
    return summaries


summaries = generate_summaries(markdown_contents)

Successfully generated summaries for 15 pages in markdown_summaries/


In [14]:
summaries

[{'markdown_summary': "## Detailed Summary of the Article\n\nThe article on Reddit discusses experiences and opinions related to Agentic AI coding tools and their performance when used with .NET technologies. It is centered around a post on the r/dotnet subreddit, initiated by a user named hades200082, who has been experimenting with AI coding tools like GitHub Copilot's new agent mode, IDEs such as Cursor and Windsurf, and plugins like RooCode/Cline. The user reports that these tools perform better with interpreted languages like JavaScript, Python, and PHP compared to .NET, noting higher inaccuracies when generating .NET code.\n\n### Key Points\n\n1. **Static vs. Dynamic Typing**: Commenters in the thread, including BramFokke, discuss how problems in static languages like C# manifest at build time, whereas in dynamically typed languages like JavaScript and Python, these issues emerge at runtime. CompassionateSkeptic adds that the complexity in the core feedback loop of Agentic AI sys

## LangGraph Email Summary & Review

### Graph State

In [15]:
class State(TypedDict):
    messages: Annotated[list, add_messages]
    summaries: List[dict]
    approved: bool
    created_summaries: Annotated[List[dict], Field(description="The summaries that have been created by the summariser")]

graph_builder = StateGraph(State)

### Summariser Node

In [17]:
# Initialize components
llm = ChatOpenAI(model="gpt-4.1-mini")

In [18]:
# Load templates and prompts
with open("email_template.md", "r") as f:
    email_template = f.read()

class SummariserOutput(BaseModel):
    email_summary: str = Field(description="The summary email of the content")
    message: str = Field(description="A message to the reviewer, asking for feedback on the summary")

summariser_prompt = ChatPromptTemplate.from_messages([
    ("system", load_prompt("summariser")),
    ("placeholder", "{messages}"),
])

llm_summariser = summariser_prompt | llm.with_structured_output(SummariserOutput)

def summariser(state: State):
    summariser_output = llm_summariser.invoke({"messages": state["messages"], "list_of_summaries": state["summaries"], "input_template": email_template})
    new_messages = [AIMessage(content=summariser_output.email_summary), AIMessage(content=summariser_output.message)]
    return {"messages": new_messages, "created_summaries": [summariser_output.email_summary]}

### Reviewer Node

In [19]:
class ReviewerOutput(BaseModel):
    approved: bool = Field(description="Whether the summary is approved or not")
    message: str = Field(description="A message to the reviewer, asking for feedback on the summary")

reviewer_prompt = ChatPromptTemplate.from_messages([
    ("system", load_prompt("reviewer")),
    ("placeholder", "{messages}"),
])


llm_reviewer = reviewer_prompt | llm.with_structured_output(ReviewerOutput)


def reviewer(state: State):
    # Convert AIMessages to HumanMessages and vice versa
    converted_messages = []
    for msg in state["messages"]:
        if isinstance(msg, AIMessage):
            converted_messages.append(HumanMessage(content=msg.content))
        elif isinstance(msg, HumanMessage):
            converted_messages.append(AIMessage(content=msg.content))
        else:
            converted_messages.append(msg)
    state["messages"] = converted_messages
    reviewer_output = llm_reviewer.invoke({"messages": state["messages"]})
    new_messages = [HumanMessage(content=reviewer_output.message)]
    return {"messages": new_messages, "approved": reviewer_output.approved}


### Build the Graph

In [20]:
def conditional_edge(state: State) -> Literal["summariser", END]:
    if state["approved"]:
        return END
    else:
        return "summariser"


# Create and configure the graph
graph_builder.add_node("summariser", summariser)
graph_builder.add_node("reviewer", reviewer)
graph_builder.add_edge(START, "summariser")
graph_builder.add_edge("summariser", "reviewer")
graph_builder.add_conditional_edges('reviewer', conditional_edge)

# Compile and run the graph
graph = graph_builder.compile()

In [22]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph().draw_mermaid_png()))
except Exception as e:
    print(e)

HTTPSConnectionPool(host='mermaid.ink', port=443): Read timed out. (read timeout=10)


### Execute the Graph!

In [23]:
output = graph.invoke({"summaries": summaries})

In [24]:
final_summary = output["created_summaries"][-1]

display(Markdown(final_summary))


<html>
  <body>
    <h1>Big Company Updates 🏢</h1>
    <h2>Top Stories</h2>
    <ul>
      <li><strong>OpenAI's A-SWE Adventure 🚀:</strong> Meet A-SWE, OpenAI's new AI agent that might just step into the shoes of software engineers. This bold move sparks both excitement and concerns over future job dynamics. <a href="https://www.pymnts.com/artificial-intelligence-2/2025/openai-developing-ai-agent-to-replace-software-engineers-cfo-says/">Explore the full tale</a></li>
      <li><strong>Salesforce's AgentExchange 🎉:</strong> Salesforce unveils the first enterprise marketplace for AI! AgentExchange is your new go-to hub for AI applications, streamlining and spicing up AI agent development. <a href="https://www.linkedin.com/posts/salesforce_the-first-enterprise-marketplace-for-agentic-activity-7317577648111570944-3wW7">Check it out</a></li>
      <li><strong>New Era of Secure API 🌐:</strong> OpenAI introduces mandatory ID verification as a smart move to keep AI access secure and responsible. <a href="https://www.linkedin.com/pulse/ai-access-changing-why-openai-wants-see-your-id-chandrakumar-r-pillai-7aome">See the scoop</a></li>
      <li><strong>Meet GPT-4.1 🎈:</strong> Say hello to the enhanced GPT-4.1 series featuring improved coding abilities and a bigger context window. Ready to transform the AI scene! <a href="https://cryptobriefing.com/unveiling-gpt-4-1-series-enhancements/">Dive deeper</a></li>
    </ul>
    <h2>In-Depth Insights</h2>
    <h3>1. OpenAI's Development of A-SWE 🌟</h3>
    <p>OpenAI is pioneering with A-SWE, a brilliant AI capable of handling software engineering roles including app development and bug testing. While this offers significant business prospects, it also raises questions on its impact on job markets. <a href="https://www.pymnts.com/artificial-intelligence-2/2025/openai-developing-ai-agent-to-replace-software-engineers-cfo-says/">Uncover more</a>.</p>
    <h3>2. Salesforce's Shiny New AgentExchange 🛠️</h3>
    <p>Step into the future with Salesforce's AgentExchange, designed to seamlessly integrate AI into your business operations. This platform stands ready to accelerate AI solutions through a host of prebuilt tools and templates. <a href="https://www.linkedin.com/posts/salesforce_the-first-enterprise-marketplace-for-agentic-activity-7317577648111570944-3wW7">Discover the platform</a>.</p>
    <h3>3. Securing the Future of API Access 🔒</h3>
    <p>OpenAI is rolling out the "Verified Organization Program" — a creative initiative to ensure secure AI access and prevent misuse, aligning closely with international compliance standards. <a href="https://www.linkedin.com/pulse/ai-access-changing-why-openai-wants-see-your-id-chandrakumar-r-pillai-7aome">Learn more</a>.</p>
    <h1>Industry Trends 🌍</h1>
    <h2>Emerging Innovations</h2>
    <ul>
      <li><strong>Agentic AI Systems 🧠:</strong> From task-based support to full-blown AI-driven workflows! Agentic AI is revolutionizing software development by autonomously managing processes and boosting innovation. <a href="https://about.gitlab.com/the-source/ai/emerging-agentic-ai-trends-reshaping-software-development/">Delve deeper</a></li>
      <li><strong>Evaluating AI Behaviors 🧪:</strong> Microsoft jumps in with new metrics to keep tabs on AI systems’ performance. A crucial step in making AI reliable and effective across applications. <a href="https://techcommunity.microsoft.com/blog/azure-ai-services-blog/evaluating-agentic-ai-systems-a-deep-dive-into-agentic-metrics/4403923">Find out more</a></li>
      <li><strong>Privacy in a Digital Age 🔍:</strong> OpenAI’s CEO highlights the potential privacy risks of AI navigating the internet autonomously, urging the industry to ramp up security measures. <a href="https://www.windowscentral.com/software-apps/sam-altman-predicts-agentic-ai-clicking-around-the-internet">Read the discussion</a></li>
    </ul>
    <h2>Detailed Analysis</h2>
    <h3>1. Rising Stars in AI — Agentic Systems ⭐</h3>
    <p>Agentic AI is pushing boundaries in software development, taking us beyond simple coding assistance to managing entire workflows—making way for greater creativity and focus on strategic tasks. Insights from GitLab underscore the need for smooth integration to boost development efficiency. <a href="https://about.gitlab.com/the-source/ai/emerging-agentic-ai-trends-reshaping-software-development/">Find the full story</a>.</p>
    <h3>2. Microsoft’s Innovative Metrics in AI 🏆</h3>
    <p>Microsoft introduces cutting-edge metrics for AI system evaluation—keeping AI performances accountable and transparent through dimensions like Task Adherence and Intent Resolution, pivotal for trustworthy AI implementations. <a href="https://techcommunity.microsoft.com/blog/azure-ai-services-blog/evaluating-agentic-ai-systems-a-deep-dive-into-agentic-metrics/4403923">Explore more</a>.</p>
  </body>
</html>

## Send the Email

In [27]:
def send_email(email_content: str):
    """Send email using Sendinblue API"""
    configuration = sib_api_v3_sdk.Configuration()
    configuration.api_key['api-key'] = os.getenv("SENDINGBLUE_API_KEY")
    reciever = os.getenv("DESTINATION_EMAIL")
    api_instance = sib_api_v3_sdk.TransactionalEmailsApi(sib_api_v3_sdk.ApiClient(configuration))

    email_params = {
        "subject": "Daily AI Research Summary",
        "sender": {"name": "Anubhab", "email": "apikey214@gmail.com"},
        "html_content": email_content,
        "to": [{"email": "anubhab13rta@gmail.com", "name": "Anubhab"},
               {"email": "devoscientist214@gmail.com", "name": "Devo Scientist"}

               ],
        "params": {"subject": "Daily AI Research Summary"}
    }

    send_smtp_email = sib_api_v3_sdk.SendSmtpEmail(**email_params)

    try:
        api_response = api_instance.send_transac_email(send_smtp_email)
        print(api_response)
    except ApiException as e:
        print(f"Exception when calling SMTPApi->send_transac_email: {e}\n")


send_email(final_summary)

{'message_id': '<202504150904.46993324445@smtp-relay.mailin.fr>',
 'message_ids': None}
