In [3]:
import requests
from bs4 import BeautifulSoup

import csv

# Define the list of event URLs
event_urls = [
    'https://www.fabricconf.com/#!/',
    'https://events.crowdstrike.com/crowdtournyc/',
    'https://www.blackhat.com/us-24/',
    'https://www.acs.org/events/all-events/acs-on-campus-at-university-of-groningen.html',
    'https://www.acs.org/events/all-events/acs-on-campus-at-universidad-andrs-bello.html'
]

# Function to scrape event data from a given URL
def scrape_event(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    event_data = {}

    try:
        # Example of dynamic selection based on possible HTML structures
        event_data['Event Name'] = soup.find('title').text if soup.find('title') else 'N/A'
        event_data['Event Date(s)'] = soup.find('date').text if soup.find('date') else 'N/A'
        event_data['Location'] = soup.find(attrs={'class': 'location'}).text if soup.find(attrs={'class': 'location'}) else 'N/A'
        event_data['Website URL'] = url
        event_data['Description'] = soup.find(attrs={'class': 'description'}).text if soup.find(attrs={'class': 'description'}) else 'N/A'

        # For lists, such as speakers, agenda items, we can use list comprehensions
        event_data['Key Speakers'] = [speaker.text for speaker in soup.find_all(attrs={'class': 'speaker-name'})]
        event_data['Agenda/Schedule'] = [item.text for item in soup.find_all(attrs={'class': 'agenda-item'})]
        event_data['Registration Details'] = soup.find(attrs={'class': 'registration-details'}).text if soup.find(attrs={'class': 'registration-details'}) else 'N/A'
        event_data['Pricing'] = soup.find(attrs={'class': 'pricing'}).text if soup.find(attrs={'class': 'pricing'}) else 'N/A'
        event_data['Categories'] = [category.text for category in soup.find_all(attrs={'class': 'category'})]
        event_data['Audience type'] = soup.find(attrs={'class': 'audience-type'}).text if soup.find(attrs={'class': 'audience-type'}) else 'student'

    except AttributeError as e:
        print(f"Error scraping {url}: {e}")

    return event_data

# List to store all events data
events_data = []

# Scrape data for each event
for url in event_urls:
    try:
        event_data = scrape_event(url)
        events_data.append(event_data)
    except Exception as e:
        print(f"Error scraping {url}: {e}")

# Save data to CSV
with open('events_data.csv', 'w', newline='') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=events_data[0].keys())
    writer.writeheader()
    writer.writerows(events_data)


In [2]:
import pandas as pd
pd.read_csv('events_data.csv')

Unnamed: 0,Event Name,Event Date(s),Location,Website URL,Description,Key Speakers,Agenda/Schedule,Registration Details,Pricing,Categories,Audience type
0,,,,https://www.fabricconf.com/#!/,,[],[],,,[],Adult
1,crowdstrike.com,,,https://events.crowdstrike.com/crowdtournyc/,,[],[],,,[],Adult
2,Black Hat USA 2024,,,https://www.blackhat.com/us-24/,,[],[],,,[],Adult
3,ACS on Campus at University of Groningen - Ame...,,,https://www.acs.org/events/all-events/acs-on-c...,,[],[],,,[],Adult
4,ACS on Campus at Universidad Andrés Bello - Am...,,,https://www.acs.org/events/all-events/acs-on-c...,,[],[],,,[],Adult
