# Importing the necessary libraries

In [1]:
import comet_ml
import os
import openai

os.environ["COMET_API_KEY"] = "..."
os.environ["OPENAI_API_KEY"] = "..."
os.environ["SERPAPI_API_KEY"] = "..."

# Scraping the data 

In [2]:
import requests
from bs4 import BeautifulSoup

# URL of the Wikipedia page to scrape
url = 'https://en.wikipedia.org/wiki/MrBeast'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find all the text on the page
text = soup.get_text()

# Open a new file called 'output.txt' in write mode and store the file object in a variable
with open('output.txt', 'w', encoding='utf-8') as file:
    # Write the string to the file
    file.write(text)

# load the document
with open('./output.txt', encoding='utf-8') as f:
    text = f.read()

# Data ingestion

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

# define the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
)

texts = text_splitter.create_documents([text])

# define the embeddings model
embeddings = OpenAIEmbeddings()

# use the text chunks and the embeddings model to fill our vector store
db = Chroma.from_documents(texts, embeddings)

# Tracking an agent with Comet ML

In [5]:
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.agents import initialize_agent, load_tools
from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler
from langchain.llms import OpenAI

comet_callback = CometCallbackHandler(
    complexity_metrics=True,
    project_name="tracking-langchain-experiments",
    stream_logs=True,
    tags=["qa"],
)

callbacks = [StdOutCallbackHandler(), comet_callback]

users_question = "Who is MrBeast? What is his age raised to the power of 0.54?"

# use our vector store to find similar text chunks
results = db.similarity_search(
    query=users_question,
    n_results=5
)

# define the prompt template
template = """

You are a chatbot who loves helping people! Given the context sections below, 
answer the question using only the context provided. If you're not sure 
and the answer not explicitly writting in the documentation,  
just say "Sorry, I don't know how to assist with this."

Context sections:
{context}

Question:
{users_question}

Answer:
"""

prompt = PromptTemplate(template=template, input_variables=["context", "users_question"])

# fill the prompt template
prompt_text = prompt.format(context = results, users_question = users_question)

# ask the defined LLM
llm = OpenAI(temperature=1, callbacks=callbacks)

tools = load_tools(["serpapi", "llm-math"], llm=llm, callbacks=callbacks)
agent = initialize_agent(
    tools,
    llm,
    agent="zero-shot-react-description",
    callbacks=callbacks,
    verbose=True,
)
agent.run(prompt_text)
comet_callback.flush_tracker(agent, finish=True)

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in 'd:\\Videolar\\Tirendaz\\English\\LangChain\\Projects\\agent-with-tools' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/tirendaz-academy/tracking-langchain-experiments/ab9ed69730ef445980fea5a5c88e474e





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m MrBeast's age is likely mentioned somewhere in the context documents.
Action: Search
Action Input: "MrBeast age"[0m
Observation: [36;1m[1;3m25 years[0m
Thought:[32;1m[1;3m I can use the calculator to find the answer to the second part of the question.
Action: Calculator
Action Input: 25 ^ 0.54[0m
Observation: [33;1m[1;3mAnswer: 5.687057308780144[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: MrBeast is an American YouTuber and philanthropist born in May 7, 1998 and his age raised to the power of 0.54 is 5.687057308780144.[0m

[1m> Finished chain.[0m


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/tirendaz-academy/tracking-langchain-experiments/ab9ed69730ef445980fea5a5c88e474e
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     automated_readability_index_25% [4]   : (5.5, 61.7)
[1;38;5;39mCOMET INFO:[0m     automated_readability_index_50% [4]   : (5.5, 61.7)
[1;38;5;39mCOMET INFO:[0m     automated_readability_index_75% [4]   : (5.5, 61.7)
[1;38;5;39mCOMET INFO:[0m     automated_readability_index_count     : 1.0
[1;38;5;39mCOMET INFO:[0m     automated_readability_index_max [4]   : (

## Resources 

- [All You Need to Know to Build Your First LLM App](https://towardsdatascience.com/all-you-need-to-know-to-build-your-first-llm-app-eb982c78ffac)
- [Conversation QA Gradio ](https://github.com/hwchase17/conversation-qa-gradio/tree/master)

Thanks for reading. Let's connect [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [Twitter](http://twitter.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) 😎