In [1]:
from time import sleep
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import re
import pandas as pd
from bs4 import BeautifulSoup
from get_tenders import get_business_info

In [2]:
def extract_data(data, extra):
    # --- Expiry date ---
    expiry_match = re.search(r'Expire\s*:(\d{2}/\d{2}/\d{4})', extra)
    expiry_date = expiry_match.group(1) if expiry_match else None

    # --- Secteurs ---
    secteurs_match = re.search(r'Secteurs:(.*)', extra)
    secteurs = secteurs_match.group(1).strip() if secteurs_match else None

    # --- Publication date ---
    pub_match = re.search(r'Le\s*:\s*(\d{2}/\d{2}/\d{4})', extra)
    publication_date = pub_match.group(1) if pub_match else None

    # --- Country ---
    country_match = re.search(r'\|\s*([A-Za-zÀ-ÿ\s]+)\|', extra)
    country = country_match.group(1).strip() if country_match else None

    # Add back into dict
    data['country'] = country
    data['publication_date'] = publication_date
    data['expiry_date'] = expiry_date
    data['secteurs'] = secteurs

In [3]:
# Number of pages to scrape
page_number = 21
html_pages = []
offers = []
final_offer = []

print('\nStarting webdriver...')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))



Starting webdriver...


In [4]:
# Loop through pages
for n_page in range(1, page_number + 1):
    url = f'https://www.business-senegal.com/fre/opportunites/index/page:{n_page}'
    driver.get(url)

    print(f'\nCharging Page number: {n_page}...')

    # Wait until at least one offer box is present
    try:
        WebDriverWait(driver, 15).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "simple"))
        )
    except Exception as e:
        print(f"Timeout waiting for offers on page {n_page}: {e}")

    # Get rendered HTML
    html_page = driver.page_source
    soup_page = BeautifulSoup(html_page, "lxml")

    if ul_block := soup_page.find('ul', id='leadsCycleList2', class_='leadsCycleContainer'):
        html_pages.append(ul_block)
        print('\t--> Found <ul> leadCycleList...')

        for ul_block in html_pages:
            if offer_list := ul_block.find_all('li', class_='simple'):
                print('\t--> Found Offer <li>')
                for offer in offer_list:
                    offers.append(offer)
            else: print('\t--> Found no <li> offer...')
    else:
        print('\t--> Found no <ul> leadCycleList...')
    sleep(3)


Charging Page number: 1...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>

Charging Page number: 2...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>

Charging Page number: 3...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>

Charging Page number: 4...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>

Charging Page number: 5...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>

Charging Page number: 6...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>

Charging Page number: 7...
	--> Found <ul> leadCycleList...
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	--> Found Offer <li>
	

In [5]:
for offer in offers:
    data = {}
    titre_div = offer.find('div', class_='TitreOffre')
    if titre_div:
        type_span = titre_div.find('span', class_='type-offre')
        if type_span:
            data['offer_type'] = type_span.get_text(strip=True)
            spans = titre_div.find_all('span')
            if len(spans) > 1:
                data['description'] = spans[1].get_text(strip=True)

    bxInfo_block = offer.find('div', class_='bx-inf')
    if bxInfo_block:
        extra_info = bxInfo_block.get_text(strip=True)
        extract_data(data, extra_info)

        link = bxInfo_block.find('a', href=True)['href']
        url_base ="https://www.business-senegal.com"
        data['link'] = url_base+link
        get_business_info(data['link'], data)

    if data:
        final_offer.append(data)


Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profilsample/1/annonceur

Scraping: https://www.business-senegal.com/fre/Entreprises/profi

In [6]:
print(f'\nNumber of pages: {len(html_pages)}\n'
      f'Number of offers: {len(final_offer)}\n')


Number of pages: 21
Number of offers: 6693



In [7]:
driver.quit()

In [8]:
df = pd.DataFrame(final_offer)
print(f'Shape of the dataframe: {df.shape}')

Shape of the dataframe: (6693, 10)


In [9]:
df.head()

Unnamed: 0,offer_type,description,country,publication_date,expiry_date,secteurs,link,web_site,address,phones
0,Appel d’offre,TRAVAUX DE MAINTENANCE DE PERIMETRES IRRIGUES ...,Sénégal,10/01/2026,15/01/2026,"Maintenance et réparations,",https://www.business-senegal.com/fre/Entrepris...,/fre/Entreprises/profilent/_1,Liberté 6 extension\r\nLiberté 6 extension,"[2221776449596, +221 338243334, +221 ]"
1,Appel d’offre,SUPERVISION DES TRAVAUX DE CONSTRUCTION POUR L...,Sénégal,10/01/2026,15/01/2026,"Batiment,",https://www.business-senegal.com/fre/Entrepris...,/fre/Entreprises/profilent/_1,Liberté 6 extension\r\nLiberté 6 extension,"[2221776449596, +221 338243334, +221 ]"
2,Appel d’offre,TRAVAUX DE CONSTRUCTION DU CENTRE DE TRANSFORM...,Sénégal,10/01/2026,10/02/2026,"Batiment,Travaux publics,",https://www.business-senegal.com/fre/Entrepris...,/fre/Entreprises/profilent/_1,Liberté 6 extension\r\nLiberté 6 extension,"[2221776449596, +221 338243334, +221 ]"
3,Appel d’offre,ACQUISITION DE VEHICULES POUR LEXPLOITATION DE...,Sénégal,10/01/2026,29/01/2026,"Automobiles, roulants,",https://www.business-senegal.com/fre/Entrepris...,/fre/Entreprises/profilent/_1,Liberté 6 extension\r\nLiberté 6 extension,"[2221776449596, +221 338243334, +221 ]"
4,Appel d’offre,TRAVAUX DE MAINTENANCE DE PERIMETRES IRRIGUES ...,Sénégal,10/01/2026,15/01/2026,"Travaux publics,",https://www.business-senegal.com/fre/Entrepris...,/fre/Entreprises/profilent/_1,Liberté 6 extension\r\nLiberté 6 extension,"[2221776449596, +221 338243334, +221 ]"


In [10]:
print(f'List of countries: {df['country'].unique()}')
print(f'\nTypes of offers: {df["offer_type"].unique()}')

List of countries: ['Sénégal' 'Côte d Ivoire' 'Indonésie' 'France' 'Mali' 'Tunisie' 'Brésil'
 'Taïwan' 'Turquie' 'Maroc' 'Chine']

Types of offers: ['Appel d’offre' 'Appel manifestation / proposition'
 'Appel à consultation' 'Annonce Business' 'Offre de Services'
 'Passation de marché' 'Immobilier' 'Demande de Prix']


In [11]:
print(f'Data shape: {df[df["country"] == "Sénégal"].shape}')
df.to_excel('tenders.xlsx', index=False)

Data shape: (5781, 10)
