In [1]:
import sys
sys.path.append('/Users/cohlem/Projects/OkProfessor/')

In [24]:
import asyncio
import time
from concurrent.futures import ThreadPoolExecutor
from researcher.config import Config
from researcher.search.duckduckgo import Duckduckgo
from researcher.utils.functions import * 
from researcher.retriever.langchain_hybrid_retriever import HybridRetriever
from researcher.scraping.scrape import Scraper
from researcher.context.chunking import Chunking


class Researcher:
    def __init__(self, query ):
        self.query = query
        self.cfg = Config()
        self.agent = None
        self.role = None
        self.visited_urls = set()
        self.context = []
        
    async def run(self):
        """
        Run the researcher
        """
        if self.cfg.search_engine == 'Duckduckgo':
            retriever = Duckduckgo()
            
        print(f'📘 Starting research for query: {self.query}')
        self.agent, self.role = await choose_agent(self.query, self.cfg.llm )
        print(f'Running {self.agent} ...')
        
        #query modification
        sub_queries = await get_sub_queries(self.query, self.role, self.cfg) + [self.query]
        
#         tasks = [self.process_query(each_query) for each_query in sub_queries]
#         context_results = await asyncio.gather(*tasks)
        
#         for context in context_results:
#             self.context.append(context)
    
        
        
    
    
    
    
        for each_query in sub_queries:
            
            print(f'🔍 Searching web with query: {each_query}')
            content = await self.get_content_using_query(each_query)
            context = await self.get_similar_context(each_query, content)
            self.context.append(context)
            
        
        result = await generate_report(self.context, self.query, self.role, self.cfg)
        
        return result    
        
    
    async def get_content_using_query(self,query):

        
        search_engine = Duckduckgo(query=query)
        search_urls = search_engine.search(max_results = self.cfg.max_search_results_per_query)

        search_urls = [url.get('href') for url in search_urls]

        new_search_urls = await self.get_unique_urls(search_urls) #filter out the same urls 

        content_scraper = Scraper(new_search_urls)
        content = content_scraper.run()

        return content
    
    async def get_chunks(self, content):
        
        chunks = []
        chunking = Chunking(self.cfg.chunk_size ,self.cfg.chunk_overlap)

        for each_content in content:
            chunks += chunking.run(content=each_content['raw_content'], metadatas= {'url': each_content['url'] })
            
        return chunks
    
    async def get_unique_urls(self, urls):
        
        new_urls = []
        for url in urls:
            if url not in self.visited_urls:
                
                print(f'✅ Adding url {url} to our research')
                
                new_urls.append(url)
                self.visited_urls.add(url)
                
        return new_urls
                
    
    async def get_similar_context(self, query, content):
        
        #chunk where?
        chunks = await self.get_chunks(content)
        hybrid_retriever = HybridRetriever(chunks ,max_results = self.cfg.max_chunks_per_query)
        similar_context = hybrid_retriever.get_context(query)

        return similar_context
    

    

In [25]:
async def generate_report(context, question, agent_role, cfg):
    
    # try and except block remaining
    response = await create_chat_completion(
            messages = [
                    {"role": "system", "content": f"{agent_role}"},
                    {"role": "user", "content": f"task: {generate_report_prompt(question, context)}"}], 
#             model=cfg.llm
              model= cfg.llm
    )
    
    return response

In [41]:
generate_report_prompt('investing', 'context')

'Information: """context"""\n\nUsing the above information, answer the following query or task: "investing" in a detailed report -- The report should focus on the answer to the query, should be well structured, informative, in depth and comprehensive, with facts and numbers if available and a minimum of 1000 words.\nYou should strive to write the report as long as you can using all relevant and necessary information provided.\nYou must write the report with markdown syntax.\n Use an unbiased and journalistic tone. \nYou MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.\nYou MUST write all used source urls at the end of the report as references, and make sure to not add duplicated sources, but only one reference for each.\nYou MUST write the report in apa format.\n Cite search results using inline notations. Only cite the most             relevant results that answer the query accurately. Place these c

In [26]:
parallel_r = Researcher('Investment strategies and buying rental properties')

In [27]:
queries = ['Pros and cons of investing in rental properties', 'Best investment strategies for rental properties in 2023', 'Rental property market trends and analysis', 'Investment strategies and buying rental properties']

In [28]:

import psutil
import time



# Get the process ID (PID) of the current Python script
pid = psutil.Process().pid

# Start measuring CPU usage
psutil.cpu_percent(interval=None)

start_time = time.time()
parallel_ans = await parallel_r.run()
end_time = time.time()

print('time taken by the async parallel method is', end_time - start_time)


# Stop measuring CPU usage and get the average percentage during the task
cpu_usage = psutil.cpu_percent(interval=None)

print(f"Average CPU Usage during the CPU-intensive task: {cpu_usage}%")



📘 Starting research for query: Investment strategies and buying rental properties
Running 💰 Finance Agent ...
🔍 Searching web with query: Pros and cons of investing in rental properties
✅ Adding url https://learn.roofstock.com/blog/pros-and-cons-of-owning-rental-property to our research
✅ Adding url https://www.investopedia.com/articles/investing/051515/pros-cons-owning-rental-property.asp to our research
✅ Adding url https://listwithclever.com/real-estate-blog/pros-and-cons-of-investing-in-rental-properties/ to our research
✅ Adding url https://learn.roofstock.com/blog/is-rental-property-a-good-investment to our research
✅ Adding url https://www.forbes.com/sites/forbesbusinesscouncil/2023/12/04/rental-property-investing-in-2024-is-it-worth-it-for-smaller-investors-and-landlords/ to our research
🔍 Searching web with query: Best investment strategies for rental properties in 2023
✅ Adding url https://www.forbes.com/sites/forbesbusinesscouncil/2023/09/21/guide-to-rental-property-investin

In [29]:
parallel_ans

"# Report on Investment Strategies and Buying Rental Properties\n\n## Introduction\nInvesting in rental properties has been a popular strategy for individuals seeking to diversify their investment portfolios and generate passive income. This report aims to provide an in-depth analysis of investment strategies and considerations when buying rental properties. The report will cover various aspects such as the pros and cons of investing in rental properties, rental market trends, location considerations, financing options, and alternative investment strategies.\n\n## Pros and Cons of Investing in Rental Properties\nInvesting in rental properties offers several advantages, including the potential for regular rental income, property value appreciation, and tax benefits. Rental income can offset mortgage and managing expenses, leading to positive rental yield, which can be used for property renovation, portfolio diversification, or purchasing additional properties ([Investopedia](https://www

In [34]:
import markdown
from IPython.display import HTML, display

html_output = markdown.markdown(parallel_ans)

display(HTML(html_output))

In [8]:
parallel_ans[0]

[Document(page_content='The Pros and Cons of Investing in Rental Properties\nLike any investment, it’s important that you understand the benefits and limitations of owning a rental property. When you recognize the pros and cons of rental property investment, you can calculate whether this investment option suits your personal circumstances and long-term financial goals.\nRental Income\nThe most immediate benefit of rental property investing is the income you derive from renters. Ideally, your rental income should offset the mortgage and/or managing expenses incurred by the rental property. If a rental property sustains positive rental yield, you can use the increased annual cash flow to renovate the property, purchase another property, or diversify your investment portfolio.\nProperty Value Growth', metadata={'url': 'https://listwithclever.com/real-estate-blog/pros-and-cons-of-investing-in-rental-properties/'}),
 Document(page_content='20 must-know pros and cons of owning rental proper

In [13]:
full_prompt = generate_report_prompt('investment strategies', parallel_ans)

In [36]:
import tiktoken


def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [37]:
print(num_tokens_from_string(parallel_ans))

1025


In [39]:
0.0010*8 + 0.002

0.01

In [40]:
0.01*5

0.05

In [21]:
agent_role

NameError: name 'agent_role' is not defined

In [23]:
async def generate_report(context, question, agent_role):
    
    # try and except block remaining
    response = await create_chat_completion(
            messages = [
                    {"role": "system", "content": f"{agent_role}"},
                    {"role": "user", "content": f"task: {generate_report_prompt(question, context)}"}], 
            model=model
    )
    
    return response

In [60]:
r = Researcher('Investment strategies and buying rental properties')

In [61]:
x = await r.run()

📘 Starting research for query: Investment strategies and buying rental properties
Running 💰 Finance Agent ...
🔍 Searching web with query: Best investment strategies for rental properties
✅ Adding url https://www.forbes.com/advisor/investing/rental-property-real-estate-investing/ to our research
✅ Adding url https://smartasset.com/investing/investing-in-rental-property-for-beginners to our research
✅ Adding url https://www.investopedia.com/articles/investing/090815/buying-your-first-investment-property-top-10-tips.asp to our research
✅ Adding url https://www.mashvisor.com/blog/rental-property-investment-strategy-definitive-guide/ to our research
✅ Adding url https://www.moneygeek.com/mortgage/resources/rental-property-investing/ to our research
🔍 Searching web with query: Pros and cons of buying rental properties as an investment
✅ Adding url https://learn.roofstock.com/blog/pros-and-cons-of-owning-rental-property to our research
✅ Adding url https://www.investopedia.com/articles/invest

In [38]:
x

[[Document(page_content="Rental Properties: Pros and Cons\nRental Properties: Pros and Cons\nPete Rathburn is a copy editor and fact-checker with expertise in economics and personal finance and over twenty years of experience in the classroom.\nOwning a rental property can be financially rewarding. If you're exploring this type of real estate as an investment, be aware of the risks and responsibilities.\nRental Properties: An Overview\nThe idea of buying a home or apartment to rent out for profit may sound alluring. But buying a rental property for income and long-term capital appreciation can have its ups and downs. For example, the housing market can fluctuate depending on location, supply and demand, and the economy.", metadata={'url': 'https://www.investopedia.com/articles/investing/051515/pros-cons-owning-rental-property.asp'}),
  Document(page_content='The Pros and Cons of Investing in Rental Properties\nLike any investment, it’s important that you understand the benefits and lim

In [37]:
len(x[4])

IndexError: list index out of range

In [19]:
something = await r.get_content_using_query('Pros and cons of investing in rental properties')

✅ Adding url https://www.sofi.com/learn/content/the-pros-and-cons-of-owning-rental-property/ to our research


## Playing with asyncio

In [79]:
import asyncio

async def gg():
    task = asyncio.create_task(haha())
    print('A')
    await asyncio.sleep(1)

    print('B')
    await task
    
async def haha():
    print('1')
    await asyncio.sleep(2)
    print('2')

In [80]:
await gg()

A
1
B
2


In [81]:
import os
import psutil

# Getting loadover15 minutes
load1, load5, load15 = psutil.getloadavg()

cpu_usage = (load15/os.cpu_count()) * 100

print("The CPU usage is : ", cpu_usage)


The CPU usage is :  18.609619140625


In [83]:
import psutil
import time

def cpu_intensive_task():
    # Simulate a CPU-intensive task with a for loop
    for _ in range(10**7):
        _ = 2 ** 2  # Some simple computation

# Get the process ID (PID) of the current Python script
pid = psutil.Process().pid

# Start measuring CPU usage
psutil.cpu_percent(interval=None)

# Perform CPU-intensive task
cpu_intensive_task()

# Stop measuring CPU usage and get the average percentage during the task
cpu_usage = psutil.cpu_percent(interval=None)

print(f"Average CPU Usage during the CPU-intensive task: {cpu_usage}%")


Average CPU Usage during the CPU-intensive task: 19.1%
