In [2]:
pwd

'/Users/cohlem/Projects/OkProfessor/researcher/notebooks'

In [5]:
import sys
sys.path.append('/Users/cohlem/Projects/OkProfessor/')

In [77]:
from researcher.config import Config
from researcher.search.duckduckgo import Duckduckgo
from researcher.utils.functions import * 


class Researcher:
    def __init__(self, query ):
        self.query = query
        self.cfg = Config()
        self.agent = None
        self.role = None
        self.visited_urls = set()
        self.context = []
        
    async def run(self):
        """
        Run the researcher
        """
        if self.cfg.search_engine == 'Duckduckgo':
            retriever = Duckduckgo()
            
        print(f'Running research for query: {self.query}')
        self.agent, self.role = await choose_agent(self.query, self.cfg.llm )
        print(f'Running {self.agent} ...')
        
        #query modification
        sub_queries = await get_sub_queries(self.query, self.role, self.cfg) + [self.query]
        
        for each_query in sub_queries:
            
            print(f'Running research for query {each_query}')
            content = await get_content_using_query(each_query)
            
            context = await get_similar_context(each_query, content)
            
            self.context.append(context)
        

        
    async def get_content_using_query(query):

        search_engine = Duckduckgo(query = query)
        search_urls = search_engine.search(max_results = r.cfg.max_search_results_per_query)

        search_urls = [url.get('href') for url in search_urls]

        new_search_urls = get_unique_urls(search_urls) #filter out the same urls 
       

        content_scraper = Scraper(new_search_urls)
        content = content_scraper.run()

        return content
    
    
    async def get_unique_urls(self, urls):
        
        new_urls = []
        for url in urls:
            if url not in self.visited_urls:
                
                print(f'✅ Adding url {url} to our research')
                
                new_urls.append(url)
                self.visited_urls.add(url)
                
        return new_urls
                
        

In [78]:
 
r = Researcher('investment in real estate and buying properties')

In [79]:
ans = await r.get_unique_urls(['https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1',
 'https://time.com/personal-finance/article/how-to-invest-in-real-estate/',
 'https://www.investopedia.com/mortgage/real-estate-investing-guide/',
 'https://smartasset.com/investing/how-to-buy-your-first-investment-property',
 'https://www.forbes.com/sites/forbesbusinesscouncil/2023/10/30/exploring-the-pros-and-cons-of-real-estate-investment/'])

✅ Adding url https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1 to our research
✅ Adding url https://time.com/personal-finance/article/how-to-invest-in-real-estate/ to our research
✅ Adding url https://www.investopedia.com/mortgage/real-estate-investing-guide/ to our research
✅ Adding url https://smartasset.com/investing/how-to-buy-your-first-investment-property to our research
✅ Adding url https://www.forbes.com/sites/forbesbusinesscouncil/2023/10/30/exploring-the-pros-and-cons-of-real-estate-investment/ to our research


In [80]:
ans = await r.get_unique_urls(['https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1',
 'https://new_url.com/personal-finance/article/how-to-invest-in-real-estate/',
 'https://www.gg_url.com/mortgage/real-estate-investing-guide/',
 'https://smartasset.com/investing/how-to-buy-your-first-investment-property',
 'https://www.forbes.com/sites/forbesbusinesscouncil/2023/10/30/exploring-the-pros-and-cons-of-real-estate-investment/'])

✅ Adding url https://new_url.com/personal-finance/article/how-to-invest-in-real-estate/ to our research
✅ Adding url https://www.gg_url.com/mortgage/real-estate-investing-guide/ to our research


In [81]:
ans

['https://new_url.com/personal-finance/article/how-to-invest-in-real-estate/',
 'https://www.gg_url.com/mortgage/real-estate-investing-guide/']

In [16]:
r.cfg.max_search_results_per_queryaa

5

In [7]:
 
r = Researcher('investment in real estate and buying properties')

response = await r.run()

print(response)



Running research for query: investment in real estate and buying properties
Running 🏢 Real Estate Agent ...
['Real estate market trends November 2023', 'Property value forecast 2024', 'Rental income potential in [specific location]', 'investment in real estate and buying properties']


In [23]:
response

['Real estate market trends November 2023',
 'Property value forecast 2024',
 'Rental income potential in [specific location]',
 'investment in real estate and buying properties']

In [10]:
from researcher.search.duckduckgo import Duckduckgo

In [11]:
search_engine = Duckduckgo('investment in real estate and buying properties')


In [18]:
search_urls = list(search_engine.search(max_results=r.cfg.max_search_results_per_query))

In [20]:
search_urls = [url['href'] for url in search_urls]

In [21]:
search_urls

['https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1',
 'https://time.com/personal-finance/article/how-to-invest-in-real-estate/',
 'https://www.investopedia.com/mortgage/real-estate-investing-guide/',
 'https://smartasset.com/investing/how-to-buy-your-first-investment-property',
 'https://www.forbes.com/sites/forbesbusinesscouncil/2023/10/30/exploring-the-pros-and-cons-of-real-estate-investment/']

In [27]:
response

['Real estate market trends November 2023',
 'Property value forecast 2024',
 'Rental income potential in [specific location]',
 'investment in real estate and buying properties']

In [36]:
new = Duckduckgo('investment in real estate and buying properties')



In [38]:
list(new.search())

[{'title': 'How To Invest In Real Estate (2023) | TIME Stamped',
  'href': 'https://time.com/personal-finance/article/how-to-invest-in-real-estate/',
  'body': 'Buying and leasing out a rental property to short- or long-term tenants is a classic way to invest in real estate. A huge perk of being a landlord is that you can deduct many of the costs...'},
 {'title': 'How to Buy Your First Investment Property | SmartAsset',
  'href': 'https://smartasset.com/investing/how-to-buy-your-first-investment-property',
  'body': 'The first step in the process of buying an investment property is figuring out what type of property you want to purchase. Single-family homes typically require less low maintenance and may have higher potential, while multi-family homes offer the advantage of multiple income streams.'},
 {'title': 'Real-Estate Investing: How to Find a Great Deal on a Property',
  'href': 'https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1

In [56]:
from researcher.scraping.scrape import Scraper

In [None]:
def get_unique_urls(urls):
    self.visited_urls 

In [63]:
async def get_content_using_query(query):
    
    search_engine = Duckduckgo(query = query)
    search_urls = search_engine.search(max_results = r.cfg.max_search_results_per_query)
    
    search_urls = [url.get('href') for url in search_urls]
    
    new_search_urls = get_unique_urls(search_urls)
    print(search_urls)
    
    scraper = Scraper(search_urls)
    content = scraper.run()
    
    
    
    return content
    
    

In [64]:
ans = get_content_using_query('Property value forecast 2024')

['https://www.realtor.com/research/2024-national-housing-forecast/', 'https://www.noradarealestate.com/blog/housing-prices-will-fall-in-2024/', 'https://www.housingwire.com/articles/heres-what-you-can-expect-from-2024-housing-market/', 'https://smartasset.com/data-studies/projected-home-price-increases-2024', 'https://fortune.com/2023/10/21/housing-market-2024-home-price-forecast-outlook-by-zillow/']


In [72]:
ans[4]

{'url': 'https://fortune.com/2023/10/21/housing-market-2024-home-price-forecast-outlook-by-zillow/',
 'raw_content': 'Where the housing market has Morgan Stanley only seeing pain for homebuyers ahead, Zillow keeps cutting its forecast. Here’s why\nThe housing market hasn’t seen a climate like this in a long time—but when exactly is a big question. To go by existing home sales, it hasn’t been this bad since 2010, the depths of the Great Recession, as a 15% collapse in September sent transactions to a 13-year low, a “deep freeze” that Zillow had warned about back in the spring. Other economists, such as Mark Fleming, chief economist at Fortune 500 financial services firm First American, and Jeseo Park, from Bank of America Research, see conditions that recall the “housing recession” of the 1980s. Morgan Stanley took the temperature of the market earlier this week and saw just more pain for homebuyers ahead: an up to 5% rise in home prices nationwide, reversing its previous call for price

In [53]:


for each_query in response:
    
    content = get_content_using_query(each_query)
    
    relevant_context = 
    self.context.append() #all relevant context
    
    

In [41]:
len(results)

5

In [25]:
search_urls

{'https://smartasset.com/investing/how-to-buy-your-first-investment-property',
 'https://time.com/personal-finance/article/how-to-invest-in-real-estate/',
 'https://www.businessinsider.com/how-to-find-good-deal-property-buy-real-estate-investing-2023-11?op=1',
 'https://www.forbes.com/sites/forbesbusinesscouncil/2023/10/30/exploring-the-pros-and-cons-of-real-estate-investment/',
 'https://www.investopedia.com/mortgage/real-estate-investing-guide/'}

In [29]:
gg = {'a': 1, 'b': 2}

In [32]:
ans = gg.get('c')