In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import ace_tools_open as tools  # For displaying in chat

# URL of the Barbican Events Page
url = "https://www.barbican.org.uk/whats-on"

# Fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Find all event listings
event_cards = soup.find_all("div", class_="views-row")

# Lists for structured & vectorized storage
all_events = []
unique_events = {}

for event in event_cards:
    # Extract the event date
    event_date = event.get("data-day", "N/A")

    # Extract title
    title_tag = event.find("h2", class_="listing-title listing-title--event")
    title = title_tag.text.strip() if title_tag else "N/A"

    # Extract category
    category_tag = event.find("span", class_="tag__plain")
    category = category_tag.text.strip() if category_tag else "N/A"

    # Extract description
    description_tag = event.find("div", class_="search-listing__intro")
    description = description_tag.get_text(separator=" ").strip() if description_tag else "N/A"

    # Extract pricing
    price_tag = event.find("div", class_="search-listing__label search-listing__label--promoted")
    price = price_tag.get_text(strip=True) if price_tag else "N/A"

    # Extract event URL
    link_tag = event.find("a", class_="button button--branded button--inline")
    event_url = f"https://www.barbican.org.uk{link_tag['href']}" if link_tag else "N/A"

    # Store ALL events (with dates) for PostgreSQL
    all_events.append({
        "Date": event_date,
        "Title": title,
        "Category": category,
        "Description": description,
        "Price": price,
        "URL": event_url
    })

    # Store UNIQUE events (without dates) for Vector DB
    unique_key = (title, description)  
    if unique_key not in unique_events:
        unique_events[unique_key] = {
            "Title": title,
            "Category": category,
            "Description": description,
            "Price": price,
            "URL": event_url
        }

# Convert lists to DataFrames
all_events_df = pd.DataFrame(all_events)  # For PostgreSQL
unique_events_df = pd.DataFrame(unique_events.values())  # For Vectorization

# Display both tables
tools.display_dataframe_to_user(name="All Barbican Events (PostgreSQL)", dataframe=all_events_df)
tools.display_dataframe_to_user(name="Unique Events (Vectorization)", dataframe=unique_events_df)


All Barbican Events (PostgreSQL)


Date,Title,Category,Description,Price,URL
Loading ITables v2.2.5 from the internet... (need help?),,,,,


Unique Events (Vectorization)


Title,Category,Description,Price,URL
Loading ITables v2.2.5 from the internet... (need help?),,,,


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import logging

# Configure logging
logging.basicConfig(
    filename="extract.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# URL of the Barbican Events Page
url = "https://www.barbican.org.uk/whats-on"

def extract_events():
    """Scrape event details from the Barbican website with error handling & logging."""
    
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises HTTPError for bad responses (4xx, 5xx)
        soup = BeautifulSoup(response.text, "html.parser")
    except requests.exceptions.RequestException as e:
        logging.error(f"❌ Failed to fetch page: {e}")
        return pd.DataFrame(), pd.DataFrame()
    
    event_cards = soup.find_all("div", class_="views-row")

    if not event_cards:
        logging.warning("⚠️ No events found on the page.")
        return pd.DataFrame(), pd.DataFrame()

    all_events = []
    unique_events = {}

    for event in event_cards:
        try:
            # Extract the event date
            event_date = event.get("data-day", "N/A")

            # Extract title
            title_tag = event.find("h2", class_="listing-title listing-title--event")
            title = title_tag.text.strip() if title_tag else "N/A"

            # Extract category
            category_tag = event.find("span", class_="tag__plain")
            category = category_tag.text.strip() if category_tag else "N/A"

            # Extract description
            description_tag = event.find("div", class_="search-listing__intro")
            description = description_tag.get_text(separator=" ").strip() if description_tag else "N/A"

            # Extract pricing
            price_tag = event.find("div", class_="search-listing__label search-listing__label--promoted")
            price = price_tag.get_text(strip=True) if price_tag else "N/A"

            # Extract event URL
            link_tag = event.find("a", class_="button button--branded button--inline")
            event_url = f"https://www.barbican.org.uk{link_tag['href']}" if link_tag else "N/A"

            # Store ALL events (with dates) for PostgreSQL
            all_events.append({
                "Date": event_date,
                "Title": title,
                "Category": category,
                "Description": description,
                "Price": price,
                "URL": event_url
            })

            # Store UNIQUE events (without dates) for Vector DB
            unique_key = (title, description)  
            if unique_key not in unique_events:
                unique_events[unique_key] = {
                    "Title": title,
                    "Category": category,
                    "Description": description,
                    "Price": price,
                    "URL": event_url
                }

        except Exception as e:
            logging.error(f"❌ Error processing an event: {e}")

    # Convert lists to DataFrames
    all_events_df = pd.DataFrame(all_events)
    unique_events_df = pd.DataFrame(unique_events.values())

    logging.info(f"✅ Successfully extracted {len(all_events_df)} total events and {len(unique_events_df)} unique events.")

    return all_events_df, unique_events_df

# Run extraction
all_events_df, unique_events_df = extract_events()


In [2]:
all_events_df

Unnamed: 0,Date,Title,Category,Description,Price,URL
0,Fri 7 Mar,Citra Sasmita: Into Eternal Land,Art & design,The Indonesian artist transforms The Curve in ...,Free,https://www.barbican.org.uk/whats-on/2025/even...
1,Fri 7 Mar,Noah Davis,Art & design,Celebrating the late artist’s expansive creati...,,https://www.barbican.org.uk/whats-on/2025/even...
2,Fri 7 Mar,School Screenings: Wolfwalkers,Cinema,"From Cartoon Saloon, the studio behind Song of...",,https://www.barbican.org.uk/whats-on/2025/even...
3,Fri 7 Mar,School Screenings: Rocks,Cinema,Join us at the Barbican Cinema for a free scho...,,https://www.barbican.org.uk/whats-on/2025/even...
4,Fri 7 Mar,Architecture Tours,Tours & public spaces,Step into the heart of London’s Brutalist legacy.,,https://www.barbican.org.uk/whats-on/2025/even...
5,Fri 7 Mar,Mickey 17,Cinema,Robert Pattinson is willing to die over and o...,,https://www.barbican.org.uk/whats-on/2025/even...
6,Fri 7 Mar,A Real Pain,Cinema,Two cousins embark on a tour of Poland in hono...,,https://www.barbican.org.uk/whats-on/2025/even...
7,Fri 7 Mar,Ernest Cole: Lost and Found,Cinema,Raoul Peck 's touching film-documentary of Ern...,,https://www.barbican.org.uk/whats-on/2025/even...
8,Fri 7 Mar,The Last Showgirl,Cinema,Shelly ( Pamela Anderson ) deals with the clos...,,https://www.barbican.org.uk/whats-on/2025/even...
9,Fri 7 Mar,Visit the Conservatory,Tours & public spaces,Visit the second biggest Conservatory in Londo...,Free,https://www.barbican.org.uk/whats-on/2025/even...
