In [1]:
%load_ext autoreload
%autoreload 2

In [22]:
from src.web_tasks.scraper import BusinessListingScraper
from contracts.business_listings.prompt_contract import ContentCleanPrompt, InformationExtractionPrompt
from src.genai_assistance.content_processor import GenAIBusinessListingProcessor
from webdriver_manager.chrome import ChromeDriverManager
import os
from dotenv import load_dotenv
from openai import AsyncOpenAI
from src.config import OPENAIVARS
import json

In [3]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [4]:
openai = AsyncOpenAI()
openai_model = OPENAIVARS.open_ai_model


### Define fixed variables

In [5]:
ROOT_DIR = os.path.dirname(os.path.abspath("__file__")) 
STORED_DATA_PATH = os.path.join(ROOT_DIR, "data", "listings_output.json") #PreScraped content

### Scrape Data

In [7]:
DRIVER_PATH = ChromeDriverManager().install()

scraper = BusinessListingScraper(search_page_url="https://uk.businessesforsale.com/uk/search/businesses-for-sale",
                                max_pages=1,
                                driver_path=DRIVER_PATH,
                                stored_page_content_path=STORED_DATA_PATH #this will bypass the scraping and pull from stored data
                                )

listings_output = scraper.extract_listings_and_page_content()

### Augment listings 
 - removing noise from content
 - extract information in a structured format
 - generate a brief analysis of business opportunity

In [18]:
listing_processor = GenAIBusinessListingProcessor(cleaning_prompts=[ContentCleanPrompt.systemp_prompt, 
                                                                    ContentCleanPrompt.user_prompt],
                                                  extraction_prompts=[InformationExtractionPrompt.system_prompt,
                                                                      InformationExtractionPrompt.user_prompt],
                                                  openai_instance=openai,
                                                  openai_model=openai_model)

async def run_listings():
    return await listing_processor.process_listings(listings_output)

genai_augmented_listings = await run_listings()

In [23]:
with open( os.path.join(ROOT_DIR, "data", "genai_extracted_info.json"), "w", encoding="utf-8") as f:
    json.dump(genai_augmented_listings, f, indent=4, ensure_ascii=False)  

In [43]:
with open( os.path.join(ROOT_DIR, "data", "genai_extracted_info.json"), "r") as f:
    genai_augmented_listings_ = json.load(f)  

In [63]:
shortlisted = []

for listing in genai_augmented_listings_:
    try:
        if listing["extracted_info"]["Recommendation"] == "Shortlist":
            shortlisted.append(listing)
    except KeyError:
        print(listing)
        pass

shortlisted

{'name': 'The Fat Pizza - Pizza Takeaway Franchise in Warwick', 'url': 'https://uk.businessesforsale.com/uk/franchises/opportunities/the-fat-pizza-franchise?location=Warwick', 'raw_content': 'The Fat Pizza - Pizza Takeaway Franchise available in Warwick, Pizza Takeaway franchises\nClose\nChoose your country\nArgentina\nAustralia\nBrazil\nBulgaria\nCanada\nChina\nCyprus\nFrance\nGermany\nGreece\nIndia\nIndonesia\nIreland\nIsrael\nMalaysia\nMexico\nNew Zealand\nPoland\nPortugal\nSingapore\nSouth Africa\nSpain\nSweden\nThailand\nUAE\nUK\nUSA\nOr view all businesses for sale\nWorldwide\nuk\nPromote Your Franchise\nBack to Search\nFranchise ID:\nFRN8382\nHome\nFood Franchises\nPizza Franchises\nThe Fat Pizza Franchise\nThe Fat Pizza - Pizza Takeaway Franchise\nJoin one of the UK’s fastest-growing and in-demand pizza & delivery operations!\nTerritory Information\nLocation:\nWarwick\nFranchise Fee:\n£12,000\nLiquid Capital:\n£30,000\nRequest Free Information\nOther Areas Available\nThroughout

[{'name': 'Hair Repair Club Franchise In United Kingdom',
  'url': 'https://uk.businessesforsale.com/uk/hair-repair-club-franchise-in-united-kingdom.aspx',
  'raw_content': "Buy a Hair Repair Club Franchise In United Kingdom\nClose\nChoose your country\nArgentina\nAustralia\nBrazil\nBulgaria\nCanada\nChina\nCyprus\nFrance\nGermany\nGreece\nIndia\nIndonesia\nIreland\nIsrael\nMalaysia\nMexico\nNew Zealand\nPoland\nPortugal\nSingapore\nSouth Africa\nSpain\nSweden\nThailand\nUAE\nUK\nUSA\nOr view all businesses for sale\nWorldwide\nuk\nSell Your Business\nAdvertisement\narrow_back\nSearch more Hair Salons For Sale in the UK\nListing ID:\n3778218\nHair Repair Club Franchise In United Kingdom For Sale\nUnited Kingdom\n,\nUK\nNew\nAsking Price:\n£30,000\nTurnover:\n£500K - £1M\nNet Profit:\n£100K - £250K\nzoom_in\nzoom_in\nzoom_in\n+ 1 images\nUnlock Unprecedented Profits with Gorilla Hair Repair Club Franchise\nWelcome to Gorilla Hair Repair Club, where exclusive partnerships fuel unparallel