In [14]:
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
import re
from urllib.parse import urljoin
import time

print("Libraries imported successfully!")

Libraries imported successfully!


In [15]:
def scrape_10times_events():
    """
    Scrape medical-pharma events from 10times.com
    Returns a list of dictionaries containing event data
    """
    
    url = "https://10times.com/usa/medical-pharma"
    
    # Headers to mimic a real browser
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    }
    
    print(f"Fetching data from: {url}")
    
    try:
        # Make the request
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        print(f"Successfully fetched page (Status: {response.status_code})")
        
        # Parse the HTML
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Find all event cards based on the structure you provided
        event_cards = soup.find_all('tr', class_=lambda x: x and 'event-card' in x)
        print(f"Found {len(event_cards)} event cards")
        
        events_data = []
        
        for i, card in enumerate(event_cards, 1):
            try:
                event_data = extract_event_data(card)
                if event_data:
                    events_data.append(event_data)
                    print(f"Processed event {i}: {event_data.get('event_name', 'Unknown')[:50]}...")
            except Exception as e:
                print(f"Error processing event {i}: {str(e)}")
                continue
        
        return events_data
        
    except requests.RequestException as e:
        print(f"Error fetching the page: {str(e)}")
        return []
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return []

In [16]:
def extract_event_data(card):
    """
    Extract event data from a single event card
    """
    event_data = {}
    
    try:
        # Extract event date from the first td
        date_td = card.find('td', class_='text-dark')
        if date_td:
            event_data['event_date'] = date_td.get_text(strip=True)
        else:
            event_data['event_date'] = 'N/A'
        
        # Extract event name and link from the onclick attribute
        clickable_td = card.find('td', {'onclick': True})
        if clickable_td:
            onclick_content = clickable_td.get('onclick', '')
            # Extract URL from onclick="window.open('URL')"
            url_match = re.search(r"window\.open\(['\"]([^'\"]+)['\"]", onclick_content)
            if url_match:
                event_data['event_link'] = url_match.group(1)
                # Extract event name from the URL (last part after the last slash)
                event_name = event_data['event_link'].split('/')[-1].replace('-', ' ').title()
                event_data['event_name'] = event_name
            else:
                event_data['event_link'] = 'N/A'
                event_data['event_name'] = 'N/A'
        else:
            event_data['event_link'] = 'N/A'
            event_data['event_name'] = 'N/A'
        
        # Extract venue/city from the venue div
        venue_link = card.find('div', class_='venue')
        if venue_link:
            venue_a = venue_link.find('a')
            if venue_a:
                event_data['venue_city'] = venue_a.get_text(strip=True)
            else:
                event_data['venue_city'] = venue_link.get_text(strip=True)
        else:
            event_data['venue_city'] = 'N/A'
        
        # Extract description from the text-wrap div
        description_div = card.find('div', class_='text-wrap text-break')
        if description_div:
            event_data['description'] = description_div.get_text(strip=True)
        else:
            event_data['description'] = 'N/A'
        
        # Extract categories/tags from the spans with bg-light class
        categories = []
        category_spans = card.find_all('span', class_='bg-light rounded')
        for span in category_spans:
            categories.append(span.get_text(strip=True))
        
        # Also look for links in the same td for additional categories
        category_links = card.find_all('a', {'rel': 'nofollow'})
        for link in category_links:
            categories.append(link.get_text(strip=True))
        
        event_data['categories_tags'] = ', '.join(categories) if categories else 'N/A'
        
        # Extract interested count from the last td
        footer_td = card.find('td', class_='tb-foot')
        if footer_td:
            interested_link = footer_td.find('a', class_='xn')
            if interested_link:
                event_data['interested_count'] = interested_link.get_text(strip=True)
            else:
                event_data['interested_count'] = '0'
        else:
            event_data['interested_count'] = '0'
        
        return event_data
        
    except Exception as e:
        print(f"Error extracting event data: {str(e)}")
        return None

In [17]:
def save_to_csv(events_data, filename='10times_medical_pharma_events.csv'):
    """
    Save the scraped events data to a CSV file
    """
    if not events_data:
        print("No data to save!")
        return
    
    # Define the CSV headers based on the fields we're extracting
    headers = [
        'event_date',
        'event_name', 
        'event_link',
        'venue_city',
        'description',
        'categories_tags',
        'interested_count'
    ]
    
    try:
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            writer.writerows(events_data)
        
        print(f"Successfully saved {len(events_data)} events to {filename}")
        
        # Also display as pandas DataFrame for quick preview
        df = pd.DataFrame(events_data)
        print(f"\nPreview of the data:")
        print(df.head())
        
        return df
        
    except Exception as e:
        print(f"Error saving to CSV: {str(e)}")
        return None

In [20]:
# Main execution
if __name__ == "__main__":
    print("Starting 10times.com Medical & Pharma Events Scraper")
    print("=" * 50)
    
    # Scrape the events
    events = scrape_10times_events()
    
    if events:
        print(f"\nSuccessfully scraped {len(events)} events!")
        
        # Save to CSV
        df = save_to_csv(events)
        
        if df is not None:
            print("\nScraping completed successfully!")
            print(f"Data saved to: 10times_medical_pharma_events.csv")
            print(f"Total events scraped: {len(events)}")
            
            # Display summary statistics
            print(f"\nSummary:")
            print(f"- Events with venue information: {df['venue_city'].notna().sum()}")
            print(f"- Events with descriptions: {df['description'].notna().sum()}")
            print(f"- Events with categories: {df['categories_tags'].notna().sum()}")
            print(f"- Total interested count: {df['interested_count'].astype(str).str.extract(r'(\d+)', expand=False).fillna('0').astype(int).sum()}")
        else:
            print("Failed to save data to CSV!")
    else:
        print("No events were scraped. Please check the website structure or your internet connection.")
    
    print("\n" + "=" * 50)
    print("Scraping process completed!")

Starting 10times.com Medical & Pharma Events Scraper
Fetching data from: https://10times.com/usa/medical-pharma
Error fetching the page: 403 Client Error: Forbidden for url: https://10times.com/usa/medical-pharma
No events were scraped. Please check the website structure or your internet connection.

Scraping process completed!
Error fetching the page: 403 Client Error: Forbidden for url: https://10times.com/usa/medical-pharma
No events were scraped. Please check the website structure or your internet connection.

Scraping process completed!


In [19]:
# Optional: Test with a small sample first (uncomment to use)
# def test_scraper():
#     """
#     Test function to debug the scraper with sample HTML
#     """
#     sample_html = '''
#     <tr class="row py-2 mb-3 bg-white deep-shadow event-card event_189360">
#       <td class="col-12 text-dark" data-localizers="ignore" data-start-date="2025/10/16"
#           data-status="active" data-date-format="default" style="line-height: 1.2;"
#           data-time-diff="-313">
#         Sun, 12 – Thu, 16 Oct 2025
#       </td>
#       <td class="col-12 c-ga cursor-pointer text-break show-related" data-id="189360"
#           onclick="window.open('https://10times.com/ecs-meetings-chicago')">
#         <div class="col-12 mb-2">
#           <div class="small fw-500 venue">
#             <a class="text-dark text-decoration-none" href="https://10times.com/chicago-us/medical-pharma">
#               Chicago
#             </a>
#           </div>
#         </div>
#         <div class="col-12 mt-3">
#           <div class="small text-wrap text-break" style="color:#5e5e5e; line-height:1.2;">
#             ECS Meetings bring together global scientists, engineers, and industry leaders...
#           </div>
#         </div>
#       </td>
#       <td class="col-12 small text-muted-new mb-2" style="line-height:1.2;">
#         <span class="d-inline-block small me-2 p-1 lh-1 bg-light rounded">Conference</span>
#         <span class="d-inline-block small me-2 p-1 lh-1 bg-light rounded">Medical & Pharma</span>
#       </td>
#       <td class="col-12 mt-3 mb-1 tb-foot">
#         <div class="d-flex justify-content-between align-items-center">
#           <a class="fw-500 text-decoration-none mx-2 xn" 
#              href="https://10times.com/ecs-meetings-chicago/visitors"
#              target="_blank" rel="noreferrer">
#             6
#           </a>
#         </div>
#       </td>
#     </tr>
#     '''
#     
#     soup = BeautifulSoup(sample_html, 'html.parser')
#     card = soup.find('tr', class_=lambda x: x and 'event-card' in x)
#     
#     if card:
#         result = extract_event_data(card)
#         print("Test result:", result)
#         return result
#     else:
#         print("No card found in test HTML")
#         return None

# Uncomment the line below to run the test
# test_scraper()