In [None]:
import os
from langchain_openai import AzureChatOpenAI
from langchain.chains import LLMMathChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.utilities import WikipediaAPIWrapper
from langchain.agents.agent_types import AgentType
from langchain.agents import Tool, initialize_agent
from dotenv import load_dotenv

load_dotenv()

In [None]:
llm = AzureChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), 
                openai_api_base="https://free-cdo.openai.azure.com/openai/deployments/cod-free-gpt4o/chat/completions?api-version=2024-02-15-preview",
                openai_api_version="2024-02-15-preview",
                openai_api_type="azure",
                temperature=0.2
   )

In [None]:
wikipedia = WikipediaAPIWrapper()

wikipedia_tool = Tool(
                    name="Wikipedia",
                    func=wikipedia.run,
	                description="A useful tool for searching the Internet to find information on world events, \
                               issues, dates, years, etc. Worth using for general topics. Use precise questions."
                    )

In [None]:
problem_chain = LLMMathChain.from_llm(llm=llm)

math_tool = Tool.from_function(name="Calculator",
                func=problem_chain.run,
                description="Useful for when you need to answer questions about math. \
                    This tool is only for math questions and nothing else. Only inputmath expressions.")

In [None]:
word_problem_template = """You are a reasoning agent tasked with solving 
the user's logic-based questions. Logically arrive at the solution, and be 
factual. In your answers, clearly detail the steps involved and give the 
final answer. Provide the response in bullet points. 
Question {question} Answer"""

math_assistant_prompt = PromptTemplate(input_variables=["question"],
                                       template=word_problem_template
                                       )
word_problem_chain = LLMChain(llm=llm,
                              prompt=math_assistant_prompt)

word_problem_tool = Tool.from_function(name="Reasoning Tool",
                                       func=word_problem_chain.run,
                                       description="Useful for when you need to answer logic-based/reasoning questions.",
                                    )

In [None]:
agent = initialize_agent(
            tools=[wikipedia_tool, math_tool, word_problem_tool],
            llm=llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True,
            handle_parsing_errors=True
)

print(agent.invoke({"input": "I have 3 apples and 4 oranges. I give half of my oranges \
               away and buy two dozen new ones, alongwith three packs of \
               strawberries. Each pack of strawberry has 30 strawberries. \
               How  many total pieces of fruit do I have at the end?"}))

### Unrelated tests for web scraping...

In [None]:
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_community.document_transformers import BeautifulSoupTransformer

# Load HTML
loader = AsyncChromiumLoader(["https://www.wsj.com"])
html = loader.load()

In [None]:
# Transform
bs_transformer = BeautifulSoupTransformer()
docs_transformed = bs_transformer.transform_documents(html, tags_to_extract=["span"])

In [None]:
# Result
docs_transformed[0].page_content[0:500]

In [None]:
from langchain_community.document_loaders import AsyncHtmlLoader

urls = ["https://www.espn.com", "https://lilianweng.github.io/posts/2023-06-23-agent/"]

loader = AsyncHtmlLoader(urls)
docs = loader.load()

In [None]:
from langchain_community.document_transformers import Html2TextTransformer

html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
docs_transformed[0].page_content[0:500]

In [None]:
from langchain.tools import BaseTool
from langchain.agents import initialize_agent, AgentType
import requests
from bs4 import BeautifulSoup

In [None]:
llm = AzureChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), 
                openai_api_base="https://free-cdo.openai.azure.com/openai/deployments/cod-free-gpt4o/chat/completions?api-version=2024-02-15-preview",
                openai_api_version="2024-02-15-preview",
                openai_api_type="azure",
                temperature=0.2
   )

In [None]:
class WebBrowserTool(BaseTool):
    name = "Web Browser"
    description = "A tool for browsing and extracting information from websites."

    def _run(self, url: str) -> str:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract text content from the page
        text_content = soup.get_text(separator=' ', strip=True)
        
        # Limit the content to a reasonable length
        return text_content[:5000]  # Adjust as needed

    def _arun(self, url: str) -> str:
        # For asynchronous operations
        raise NotImplementedError("WebBrowserTool does not support async")

In [None]:
from langchain.agents import load_tools

web_browser = WebBrowserTool()
tools = load_tools(["llm-math"], llm=llm) + [web_browser]

In [None]:
from langchain.agents import initialize_agent, AgentType

agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [None]:
# Use the agent to browse a website and extract information
result = agent.run("Summarize the content of https://www.example.com")
print(result)

In [None]:
agent = initialize_agent(
            tools=[wikipedia_tool, math_tool, word_problem_tool, web_browser],
            llm=llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True,
            handle_parsing_errors=True
)

# result = agent.invoke({"input": "I have 3 apples and 4 oranges. I give half of my oranges \
#                away and buy two dozen new ones, alongwith three packs of \
#                strawberries. Each pack of strawberry has 30 strawberries. \
#                How  many total pieces of fruit do I have at the end?"})
# print(result)

result = agent.run("Summarize the content of https://www.example.com")
print(result)


In [None]:
import requests
from bs4 import BeautifulSoup
import html2text

def scrape_and_convert_to_markdown(url):
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove unwanted elements (optional)
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Convert HTML to Markdown
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.ignore_images = False
        markdown_content = h.handle(str(soup))
        
        return markdown_content
    else:
        return f"Failed to retrieve the webpage. Status code: {response.status_code}"

# Example usage
url = "https://www.vantage.sh/blog/azure-openai-vs-amazon-bedrock-cost"

markdown_result = scrape_and_convert_to_markdown(url)
print(markdown_result)

In [None]:
# Optionally, save the markdown content to a file
with open("output.md", "w", encoding="utf-8") as f:
    f.write(markdown_result)

Set up a tool with the above

In [None]:
class WebScraperMarkdownTool(BaseTool):
    
    name = "Web Scraper to Markdown"
    description = "Useful for scraping content from a website and converting it to markdown format. Input should be a valid URL."

    def _run(self, url: str) -> str:
        try:
            # Send a GET request to the URL
            response = requests.get(url)
            
            # Check if the request was successful
            if response.status_code == 200:
                # Parse the HTML content
                soup = BeautifulSoup(response.text, 'html.parser')
                
                # Remove unwanted elements (optional)
                for script in soup(["script", "style"]):
                    script.decompose()
                
                # Convert HTML to Markdown
                h = html2text.HTML2Text()
                h.ignore_links = False
                h.ignore_images = False
                markdown_content = h.handle(str(soup))
                
                return markdown_content
            else:
                return f"Failed to retrieve the webpage. Status code: {response.status_code}"
        except Exception as e:
            return f"An error occurred: {str(e)}"

    def _arun(self, url: str) -> str:
        # This tool doesn't support async operations
        raise NotImplementedError("WebScraperMarkdownTool does not support async")
    

In [None]:
from langchain.agents import initialize_agent, AgentType

# Create an instance of our custom tool
web_scraper_tool = WebScraperMarkdownTool()

# Initialize the agent with our custom tool
tools = [web_scraper_tool]
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)

# Use the agent
result = agent.run("Get the Claude Sonnet price for 1000 output token, see https://www.vantage.sh/blog/azure-openai-vs-amazon-bedrock-cost")
print(result)