In [1]:
pip install requests beautifulsoup4 pandas openpyxl


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
pip install selenium pandas openpyxl


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://pharmatechexpo.com/ExhibitorsList"
resp = requests.get(url)
resp.raise_for_status()

soup = BeautifulSoup(resp.text, 'html.parser')
# Find list items under the exhibitor section
# They appear as "<li>Company Name</li>"
# Adjust selectors if nested within a specific container
exhibitor_items = soup.select("li")

names = []
for li in exhibitor_items:
    text = li.get_text(strip=True)
    # Simple filter: skip non-exhibitor list items
    if text and len(text) > 2 and not text.lower().startswith(('view','contact')):
        names.append(text)

df = pd.DataFrame({"Exhibitor": names})
df.to_excel("pharmatechexpo_exhibitors.xlsx", index=False)
print(f"Saved {len(names)} exhibitor names to Excel")


Saved 44 exhibitor names to Excel


In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd

# Setup headless Chrome
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)

# Visit main page
base_url = "https://pharmatechexpo.com/ExhibitorsList"
driver.get(base_url)
time.sleep(3)

# Collect company profile URLs
profile_links = []

# Find and loop through all filter buttons (A-Z, 0-9)
buttons = driver.find_elements(By.CSS_SELECTOR, "#myBtnContainer .btn-filter")
for button in buttons:
    letter = button.text.strip()
    print(f"🔤 Clicking: {letter}")
    
    try:
        driver.execute_script("arguments[0].click();", button)
        time.sleep(2)

        # Fetch all company cards
        companies = driver.find_elements(By.CSS_SELECTOR, "ul.list li a")
        for company in companies:
            name = company.text.strip()
            link = company.get_attribute("href")
            profile_links.append({
                "Company": name,
                "ProfileURL": link
            })
    except Exception as e:
        print(f"❌ Error on button {letter}: {e}")

print(f"✅ Total companies found: {len(profile_links)}")

# Extract data from each company profile
data = []

for item in profile_links:
    try:
        driver.get(item["ProfileURL"])
        time.sleep(1.5)

        contact_div = driver.find_element(By.CLASS_NAME, "col-md-4")
        lines = contact_div.text.strip().split("\n")

        contact = " | ".join([line for line in lines if "Mob" in line or "Tel" in line or "Email" in line])
        address = " ".join([line for line in lines if not any(x in line for x in ["Mob", "Tel", "Email"])])

        data.append({
            "Company": item["Company"],
            "Contact": contact,
            "Address": address,
            "ProfileURL": item["ProfileURL"]
        })

    except Exception as e:
        print(f"⚠️ Skipped {item['Company']}: {e}")
        data.append({
            "Company": item["Company"],
            "Contact": "",
            "Address": "",
            "ProfileURL": item["ProfileURL"]
        })

driver.quit()

# Save to Excel
df = pd.DataFrame(data)
df.to_excel("PharmaTech_Exhibitors_Full.xlsx", index=False)
print("✅ Done: Data saved to PharmaTech_Exhibitors_Full.xlsx")


🔤 Clicking: View all
🔤 Clicking: A
🔤 Clicking: B
🔤 Clicking: C
🔤 Clicking: D
🔤 Clicking: E
🔤 Clicking: F
🔤 Clicking: G
🔤 Clicking: H
🔤 Clicking: I
🔤 Clicking: J
🔤 Clicking: K
🔤 Clicking: L
🔤 Clicking: M
🔤 Clicking: N
🔤 Clicking: O
🔤 Clicking: P
🔤 Clicking: Q
🔤 Clicking: R
🔤 Clicking: S
🔤 Clicking: T
🔤 Clicking: U
🔤 Clicking: V
🔤 Clicking: W
🔤 Clicking: X
🔤 Clicking: Y
🔤 Clicking: Z
🔤 Clicking: 0-9
✅ Total companies found: 0
✅ Done: Data saved to PharmaTech_Exhibitors_Full.xlsx


In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.parse
import time

base_url = "https://pharmatechexpo.com"
list_url = f"{base_url}/ExhibitorsList"
headers = {'User-Agent': 'Mozilla/5.0'}

# Step 1: Get all company profile links
response = requests.get(list_url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

company_links = []

# All <a> tags inside company cards
for a in soup.select('.portfolio-item a'):
    href = a.get('href')
    if href and "ExhibitorCompany" in href:
        full_url = urllib.parse.urljoin(base_url, href)
        company_links.append(full_url)

print(f"✅ Total companies found: {len(company_links)}")

# Step 2: Visit each link and extract data
data = []

for url in company_links:
    try:
        r = requests.get(url, headers=headers)
        s = BeautifulSoup(r.text, 'html.parser')

        name = s.find('h1').text.strip()
        address = s.find('h5', text="Address").find_next('p').text.strip()
        booth = s.find('h5', text="Booth Details").find_next('p').text.strip()
        website_tag = s.find('h5', text="Website")
        website = website_tag.find_next('p').text.strip() if website_tag else ""
        desc_tag = s.find('h5', text="Company Description")
        description = desc_tag.find_next('p').text.strip() if desc_tag else ""

        data.append({
            "Company Name": name,
            "Address": address,
            "Booth": booth,
            "Website": website,
            "Description": description,
            "URL": url
        })

        print(f"✅ Scraped: {name}")
        time.sleep(1)  # polite wait

    except Exception as e:
        print(f"❌ Failed on {url} | {e}")

# Step 3: Save to Excel
df = pd.DataFrame(data)
df.to_excel("PharmaTech_Exhibitors.xlsx", index=False)
print("✅ All data saved to 'PharmaTech_Exhibitors.xlsx'")


✅ Total companies found: 0
✅ All data saved to 'PharmaTech_Exhibitors.xlsx'
