In [22]:
import os
from langchain_openai import AzureChatOpenAI
from langchain.chains import LLMMathChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents.agent_types import AgentType
from langchain.agents import Tool, initialize_agent
from dotenv import load_dotenv

load_dotenv()

True

In [23]:
llm = AzureChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), 
                openai_api_base="https://free-cdo.openai.azure.com/openai/deployments/cod-free-gpt4o/chat/completions?api-version=2024-02-15-preview",
                openai_api_version="2024-02-15-preview",
                openai_api_type="azure",
                temperature=0.2
   )

In [24]:
wikipedia = WikipediaAPIWrapper()

wikipedia_tool = Tool(
                    name="Wikipedia",
                    func=wikipedia.run,
	                description="A useful tool for searching the Internet to find information on world events, \
                               issues, dates, years, etc. Worth using for general topics. Use precise questions."
                    )

In [25]:
problem_chain = LLMMathChain.from_llm(llm=llm)

math_tool = Tool.from_function(name="Calculator",
                func=problem_chain.run,
                description="Useful for when you need to answer questions about math. \
                    This tool is only for math questions and nothing else. Only inputmath expressions.")

In [26]:
word_problem_template = """You are a reasoning agent tasked with solving 
the user's logic-based questions. Logically arrive at the solution, and be 
factual. In your answers, clearly detail the steps involved and give the 
final answer. Provide the response in bullet points. 
Question {question} Answer"""

math_assistant_prompt = PromptTemplate(input_variables=["question"],
                                       template=word_problem_template
                                       )
word_problem_chain = LLMChain(llm=llm,
                              prompt=math_assistant_prompt)

word_problem_tool = Tool.from_function(name="Reasoning Tool",
                                       func=word_problem_chain.run,
                                       description="Useful for when you need to answer logic-based/reasoning questions.",
                                    )

  warn_deprecated(


In [27]:
agent = initialize_agent(
            tools=[wikipedia_tool, math_tool, word_problem_tool],
            llm=llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True,
            handle_parsing_errors=True
)

print(agent.invoke({"input": "I have 3 apples and 4 oranges. I give half of my oranges \
               away and buy two dozen new ones, alongwith three packs of \
               strawberries. Each pack of strawberry has 30 strawberries. \
               How  many total pieces of fruit do I have at the end?"}))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to determine the total number of fruits after performing the given actions. I'll break down the problem step by step.

1. Start with 3 apples and 4 oranges.
2. Give away half of the oranges.
3. Buy two dozen new oranges.
4. Buy three packs of strawberries, each containing 30 strawberries.

First, I'll calculate the number of oranges after giving away half and then adding two dozen new ones. Then, I'll calculate the total number of strawberries. Finally, I'll sum all the fruits together.

Action: Calculator
Action Input: 4 / 2[0m
Observation: [33;1m[1;3mAnswer: 2.0[0m
Thought:[32;1m[1;3mI now know that half of the oranges (4) is 2. So, after giving away half, I have 2 oranges left. Next, I need to add two dozen (24) new oranges to these 2 oranges.

Action: Calculator
Action Input: 2 + 24[0m
Observation: [33;1m[1;3mAnswer: 26[0m
Thought:[32;1m[1;3mI now know that after giving away half of the oranges and buyi

### Unrelated tests for web scraping...

In [4]:
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer

# Load HTML
loader = AsyncChromiumLoader(["https://www.wsj.com"])
html = loader.load()

In [6]:
# Transform
bs_transformer = BeautifulSoupTransformer()
docs_transformed = bs_transformer.transform_documents(html, tags_to_extract=["span"])

In [7]:
# Result
docs_transformed[0].page_content[0:500]

''

In [8]:
from langchain_community.document_loaders import AsyncHtmlLoader

urls = ["https://www.espn.com", "https://lilianweng.github.io/posts/2023-06-23-agent/"]

loader = AsyncHtmlLoader(urls)
docs = loader.load()

Fetching pages: 100%|##########| 2/2 [00:06<00:00,  3.42s/it]
  def _replace_charref(s):
  def _replace_charref(s):


In [10]:
from langchain_community.document_transformers import Html2TextTransformer

html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
docs_transformed[0].page_content[0:500]

'Skip to main content  Skip to navigation\n\n<\n\n>\n\nMenu\n\n## ESPN\n\n  *   *   *   * scores\n\nNEW! Find where to watch all of your favorite sports!\n\n  * NFL\n  * NBA\n  * MLB\n  * NCAAF\n  * NHL\n  * Soccer\n  * WNBA\n  * More Sports\n\n    * Boxing\n    * CFL\n    * NCAA\n    * Cricket\n    * F1\n    * Golf\n    * Horse\n    * LLWS\n    * MMA\n    * NASCAR\n    * NBA G League\n    * NBA Summer League\n    * NCAAM\n    * NCAAW\n    * NWSL\n    * Olympics\n    * PLL\n    * Professional Wrestling\n    * Racing\n    * RN BB\n    * RN'

In [11]:
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
import requests
from bs4 import BeautifulSoup

In [15]:
llm = AzureChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), 
                openai_api_base="https://free-cdo.openai.azure.com/openai/deployments/cod-free-gpt4o/chat/completions?api-version=2024-02-15-preview",
                openai_api_version="2024-02-15-preview",
                openai_api_type="azure",
                temperature=0.2
   )

In [16]:
class WebBrowserTool(BaseTool):
    name = "Web Browser"
    description = "A tool for browsing and extracting information from websites."

    def _run(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract text content from the page
        text_content = soup.get_text(separator=' ', strip=True)
        
        # Limit the content to a reasonable length
        return text_content[:5000]  # Adjust as needed

    def _arun(self, url: str) -> str:
        # For asynchronous operations
        raise NotImplementedError("WebBrowserTool does not support async")

In [19]:
from langchain.agents import load_tools

web_browser = WebBrowserTool()
tools = load_tools(["llm-math"], llm=llm) + [web_browser]

In [20]:
from langchain.agents import initialize_agent, AgentType

agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

  warn_deprecated(


In [21]:
# Use the agent to browse a website and extract information
result = agent.run("Summarize the content of https://www.example.com")
print(result)

  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: Summarize the content of https://www.example.com
Thought: I need to visit the website to gather information for the summary.
Action:
```
{
  "action": "Web Browser",
  "action_input": "https://www.example.com"
}
```[0m
Observation: [33;1m[1;3mExample Domain Example Domain This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission. More information...[0m
Thought:[32;1m[1;3mThe content of https://www.example.com is a placeholder website used for illustrative purposes in documents. It states that the domain is available for use in literature without needing prior coordination or permission.

Final Answer: The content of https://www.example.com is a placeholder website used for illustrative purposes in documents, and it can be used without prior coordination or permission.[0m

[1m> Finished chain.[0m
The content of 

In [31]:
agent = initialize_agent(
            tools=[wikipedia_tool, math_tool, word_problem_tool, web_browser],
            llm=llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True,
            handle_parsing_errors=True
)

# result = agent.invoke({"input": "I have 3 apples and 4 oranges. I give half of my oranges \
#                away and buy two dozen new ones, alongwith three packs of \
#                strawberries. Each pack of strawberry has 30 strawberries. \
#                How  many total pieces of fruit do I have at the end?"})
# print(result)

result = agent.run("Summarize the content of https://www.example.com")
print(result)


In [35]:
import requests
from bs4 import BeautifulSoup
import html2text

def scrape_and_convert_to_markdown(url):
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove unwanted elements (optional)
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Convert HTML to Markdown
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.ignore_images = False
        markdown_content = h.handle(str(soup))
        
        return markdown_content
    else:
        return f"Failed to retrieve the webpage. Status code: {response.status_code}"

# Example usage
url = "https://www.vantage.sh/blog/azure-openai-vs-amazon-bedrock-cost"

markdown_result = scrape_and_convert_to_markdown(url)
print(markdown_result)

[ NEW Vantage Launches Usage-Based Reporting -> ](/blog/vantage-launches-
usage-based-reporting)

[ ![](/images/nav-logo.svg) ](/) [We're Hiring!](/careers)

  * Features expand_more
  * Integrations expand_more
  * Resources expand_more
  * [ Partners ](/partners)
  * [ Pricing ](/pricing)
  * [ Docs ](https://docs.vantage.sh/)

    * Visibility
    * [ article Cost Reporting Easily build complex reports ](/features/cost-reports)
    * [ ![](/images/icons/icon-k8.svg) Kubernetes Monitoring and efficiency metrics ](/features/kubernetes)
    * [ sell Virtual Tagging Custom cost allocation tags ](/features/virtual-tagging)
    * [ swap_horiz Network Flow Reports Network cost visibility ](/features/network-flow-reports)
    * [ account_tree Cost Allocation Organizational cost hierarchies ](/features/segments)
    * [ dialpad Budgeting Budgeting and budget alerts ](/features/budgets)
    * [ category Resource Reports Discover active resources ](/features/resource-inventory)
    * [ data_us

In [36]:
# Optionally, save the markdown content to a file
with open("output.md", "w", encoding="utf-8") as f:
    f.write(markdown_result)

Set up a tool with the above

In [37]:
class WebScraperMarkdownTool(BaseTool):
    
    name = "Web Scraper to Markdown"
    description = "Useful for scraping content from a website and converting it to markdown format. Input should be a valid URL."

    def _run(self, url: str) -> str:
        try:
            # Send a GET request to the URL
            response = requests.get(url)
            
            # Check if the request was successful
            if response.status_code == 200:
                # Parse the HTML content
                soup = BeautifulSoup(response.text, 'html.parser')
                
                # Remove unwanted elements (optional)
                for script in soup(["script", "style"]):
                    script.decompose()
                
                # Convert HTML to Markdown
                h = html2text.HTML2Text()
                h.ignore_links = False
                h.ignore_images = False
                markdown_content = h.handle(str(soup))
                
                return markdown_content
            else:
                return f"Failed to retrieve the webpage. Status code: {response.status_code}"
        except Exception as e:
            return f"An error occurred: {str(e)}"

    def _arun(self, url: str) -> str:
        # This tool doesn't support async operations
        raise NotImplementedError("WebScraperMarkdownTool does not support async")
    

In [41]:
from langchain.agents import initialize_agent, AgentType

# Create an instance of our custom tool
web_scraper_tool = WebScraperMarkdownTool()

# Initialize the agent with our custom tool
tools = [web_scraper_tool]
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)

# Use the agent
result = agent.run("Get the Claude Sonnet price for 1000 output token, see https://www.vantage.sh/blog/azure-openai-vs-amazon-bedrock-cost")
print(result)

The price for 1000 output tokens for Claude Sonnet is $0.015.
