# Tampa Bay tech events list builder

_The code is complete, but the documentation is a work in progress. More coming soon!_

## Imports

In [None]:
import base64
import json
import os
import re
from datetime import datetime, timedelta
from time import sleep
from urllib.parse import urlparse, urlunparse

import ipywidgets as widgets
import pyperclip
import requests
from dotenv import load_dotenv
from IPython.display import display
from openai import OpenAI
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

## Section 1: Create the blog post

The first phase of creating the weekly list of Tampa Bay tech events is to create a new post in _Global Nerdy_ to contain the list of events. This section contains the code to create that post.

### 1a: Blog post content generator functions
The post should contain the following information...

- **Blog post title.** Always has the same format, featuring the start and end dates of the week in question: *Tampa Bay tech, entrepreneur, and nerd events list (Monday, {month} {date} â€“ Sunday, {month}, {date})*.
- **A blank line for the hero image.** This is usually a picture of some place in the Tampa Bay area, with the title _Tampa Bay Tech, Entrepreneur, and Nerd Events_. I make the hero image in Canva, upload it to the blog, and then add it to the post. This is the last thing I do before publishing the post. I expect this will remain a manual process for some time.
- **Intro text.** This explains that the post is a list of events for Tampa Bayâ€™s tech scene, and the text includes the dates of the start and end of the week.
- **The _This weekâ€™s events_ list.** This is the _This weekâ€™s events_ heading, followed by a bullet-point list of the dates of the days of the week. Each date in the list is linked to a corresponding anchor so that the reader can click on the _Tuesday_ link and be immediately taken to the postâ€™s _Tuesday_ heading.
- **Date headings.** This is a set of &lt;h3&gt; headings, one for each day of the week. Each heading has a designated space where weâ€™ll paste the table of events for that day.
- **Outro text.** This is the text that appears at the end of each of these lists. It explains how I put the list together and what I consider worthy of including in the list.

The code cell below contains the functions to generate this content.

In [None]:
def next_monday():
    today = datetime.now()
    days_until_next_monday = 7 - today.weekday()  # 0 is Monday, 6 is Sunday
    if days_until_next_monday <= 0:  # If today is Monday or later in the week
        days_until_next_monday += 7
    return today + timedelta(days=days_until_next_monday)

def sunday_after_next_monday():
    next_mon = next_monday()
    days_until_sunday_after_next_monday = 6 - next_mon.weekday()  # 6 is Sunday
    return next_mon + timedelta(days=days_until_sunday_after_next_monday)

def title():
    monday_text = next_monday().strftime("%A, %B %-d")
    sunday_text = sunday_after_next_monday().strftime("%A, %B %-d")
    return f"Tampa Bay tech, entrepreneur, and nerd events list ({monday_text} - {sunday_text})"

def intro_text():
    with open("./_text/intro.html") as intro_text_file:
        unprocessed_intro_text = intro_text_file.readlines()
    intro_text = "".join(unprocessed_intro_text) \
                   .replace("{{NEXT_MONDAY}}", next_monday().strftime("%A, %B %-d")) \
                   .replace("{{SUNDAY_AFTER_NEXT_MONDAY}}", sunday_after_next_monday().strftime("%A, %B %-d"))
    return intro_text

def week_list_and_date_headings():
    start_date = next_monday()

    # Print dates from Monday to Sunday and generate abbreviated strings
    bullet_list = "<ul>\n"
    headings_list = ""
   
    for i in range(7):
        current_date = start_date + timedelta(days=i)
        day_of_week = current_date.strftime("%A").upper()
        full_date_str = current_date.strftime("%A, %B %-d") # On Windows, the format string should be "%A, %B %#d"
        abbr_date_str = current_date.strftime("%a-%b-%-d").lower()
       
        bullet_list += f"""<li><a href="#{abbr_date_str}">{full_date_str}</a></li>\n"""
        headings_list += f"""<a name="{abbr_date_str}"></a>\n<h3>{full_date_str}</h3>\n\n** PASTE {day_of_week}â€™S TABLE HERE **\n\n"""

    bullet_list += "</ul>"

    return f"{bullet_list}\n\n{headings_list}"

def outro_text():
    with open("./_text/outro.html") as outro_text_file:
        unprocessed_outro_text = outro_text_file.readlines()
    outro_text = "".join(unprocessed_outro_text)
    return outro_text
    
def blog_post_text():
    return f"{intro_text()}\n\n{week_list_and_date_headings()}\n\n{outro_text()}"
    

### 1b: Blog post creation function
The function below, `create_post()`, takes two strings â€” a title for the post and its content â€” and creates a new post on _Global Nerdy_ for the upcoming weekâ€™s tech events list.

In [None]:
def create_post(title, content):
    load_dotenv()
    WORDPRESS_API_URL = "https://www.globalnerdy.com/wp-json/wp/v2/posts"
    WORDPRESS_USERNAME = os.getenv("WORDPRESS_USERNAME")
    WORDPRESS_APP_PASSWORD = os.getenv("WORDPRESS_APP_PASSWORD")
    credentials = f"{WORDPRESS_USERNAME}:{WORDPRESS_APP_PASSWORD}"
    token = base64.b64encode(credentials.encode()).decode()
    headers = {
        "Authorization": f"Basic {token}",
        "Content-Type": "application/json"
    }
    
    # WordPress expects the postâ€™s title and content to be objects
    # whose values are stored in a property named `"raw"`.
    post_data = {
        "title": {
            "raw": title
        },
        "content": {
            "raw": content
        },
        "status": "draft"
    }
    
    try:
        response = requests.post(WORDPRESS_API_URL, headers=headers, json=post_data)
        
        print(f"Status Code: {response.status_code}")
        print(f"Response: {response.text}")
        
        if response.status_code == 201:
            return response.json()
        else:
            print(f"Error: {response.status_code}")
            try:
                error_data = response.json()
                print(f"Error details: {error_data}")
            except:
                print(f"Raw error: {response.text}")
            return None
            
    except Exception as e:
        print(f"Exception: {e}")
        return None

### 1c: Create the blog post
Once youâ€™ve run the code cells above, run the code cell below to create a blog post for the upcoming weekâ€™s tech events list.

In [None]:
result = create_post(title(), blog_post_text())

## Section 2: Scrape Meetup.com and generate the tables for each day of the upcoming week

### Open a Selenium-controlled browser window

In [None]:
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.get('https://www.facebook.com/v11.0/dialog/oauth?client_id=2403839689&redirect_uri=https%3A%2F%2Fwww.meetup.com%2Fties2%2F&scope=email%20user_friends&response_type=token&state=returnUri%3Dhttps%253A%252F%252Fwww.meetup.com%252Fhome%26facebook%3Dtrue')

load_dotenv()
FACEBOOK_USERNAME = os.getenv("FACEBOOK_USERNAME")
FACEBOOK_PASSWORD = os.getenv("FACEBOOK_PASSWORD")
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")

# Initialize the client for DeepSeek
client = OpenAI(
    api_key=DEEPSEEK_API_KEY,  # Replace with your actual key
    base_url="https://api.deepseek.com"  # DeepSeek's API endpoint
)

username_field = driver.find_element(By.ID, "email")
username_field.send_keys(FACEBOOK_USERNAME)
sleep(2)
password_field = driver.find_element(By.ID, "pass")
password_field.send_keys(FACEBOOK_PASSWORD)
sleep(2)
login_button = driver.find_element(By.ID, "loginbutton")
login_button.click()
sleep(8)
continue_button = driver.find_element(By.CLASS_NAME, "x9f619")
continue_button.click()

### Category page processing

In [None]:
IGNORE_LINE_PATTERNS = [
    r"^\$",                            # prices like $10.00
    r"^\d+\s+seats?\s+left",           # "9 seats left"
    r"^(Every|Mon|Tue|Wed|Thu|Fri|Sat|Sun)\b",  # date lines
    r"^\w+\sâ€¢\s",                      # "Every Wed â€¢ ..."
    r"^\d+\s+attendees?$",             # "1 attendee", "25 attendees"
    r"^by\s",                          # "by Tampa ..."
    r"^\s*$",                          # empty/whitespace
]

ignore_re = re.compile("|".join(IGNORE_LINE_PATTERNS), re.IGNORECASE)

def extract_title_from_anchor(a):
    """
    Prefer a heading inside the anchor (h1/h2/h3 or role=heading).
    Fallback: pick the longest non-metadata line from the anchor's text.
    Final fallback: aria-label/title attributes.
    """
    # 1) Heading-based title (most reliable)
    try:
        heading = None
        for hp in [
            ".//h1", ".//h2", ".//h3",
            ".//*[@role='heading']",
            ".//span[contains(@data-testid,'event') and contains(@data-testid,'title')]",
        ]:
            elems = a.find_elements(By.XPATH, hp)
            for e in elems:
                t = (e.text or "").strip()
                if t:
                    heading = t
                    break
            if heading:
                return heading
    except StaleElementReferenceException:
        pass

    # 2) Heuristic on inner text: choose the best-looking line
    try:
        lines = [(ln or "").strip() for ln in (a.text or "").splitlines()]
        lines = [ln for ln in lines if not ignore_re.search(ln)]
        if lines:
            # pick the longest remaining line as the title
            return max(lines, key=len)
    except StaleElementReferenceException:
        pass

    # 3) Attributes fallback
    for attr in ("aria-label", "title"):
        val = (a.get_attribute(attr) or "").strip()
        if val:
            # Sometimes aria-label has extra bits like " | date". Keep the longest token.
            parts = [p.strip() for p in re.split(r"\s+\|\s+|\n", val) if p.strip()]
            if parts:
                return max(parts, key=len)

    return ""  # give up gracefully

def event_urls_from_category_or_keyword_page(URL):
    try:
        driver.get(URL)
    
        wait = WebDriverWait(driver, 20)
    
        # --- Helper: wait for any event card/link to show up ---
        def wait_for_any_event_link():
            # This XPath tries to be resilient across Meetup UI changes:
            # - looks for anchors linking to /events/
            # - ignores auth/account links
            # - ensures they are visible
            return wait.until(EC.presence_of_all_elements_located((
                By.XPATH, "//a[contains(@href, '/events/') and not(contains(@href, '/account/'))]"
            )))
    
        wait_for_any_event_link()
    
        # --- Helper: scroll to load more (if infinite loading is used) ---
        def scroll_until_stable(max_rounds=8, pause=1.2):
            last_count = 0
            stable_rounds = 0
            for _ in range(max_rounds):
                # Grab current anchors (donâ€™t keep references around for long)
                anchors = driver.find_elements(By.XPATH, "//a[contains(@href, '/events/') and not(contains(@href, '/account/'))]")
                count = len(anchors)
                # Scroll
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                sleep(pause)
                # If no increase for 2 consecutive rounds, assume we've loaded what's available
                if count == last_count:
                    stable_rounds += 1
                    if stable_rounds >= 2:
                        break
                else:
                    stable_rounds = 0
                last_count = count
    
        scroll_until_stable()
    
        # --- Collect fresh anchors again after scrolling ---
        anchors = driver.find_elements(By.XPATH, "//a[contains(@href, '/events/') and not(contains(@href, '/account/'))]")
    
        # --- Extract clean (title, url) with retries for staleness ---
        results = []
        seen = set()
    
        def safe_get_text(a):
            # Prefer visible text; fall back to aria-label or title
            txt = (a.text or "").strip()
            if not txt:
                txt = (a.get_attribute("aria-label") or "").strip()
            if not txt:
                txt = (a.get_attribute("title") or "").strip()
            return txt
    
        for a in anchors:
            tries = 0
            while tries < 3:
                try:
                    href = a.get_attribute("href") or ""
                    if "/events/" not in href:
                        break
        
                    title = extract_title_from_anchor(a)
                    if title:
                        key = (href, title)
                        if key not in seen:
                            results.append({"title": title, "url": href})
                            seen.add(key)
                    break
                except StaleElementReferenceException:
                    sleep(0.2)
                    tries += 1
                except Exception:
                    break
    
        # --- Nice, compact output ---
        event_urls = []
        for i, item in enumerate(results, start=1):
            event_urls.append(item['url'])

        return event_urls
        
    
    except TimeoutException:
        return []
    finally:
        # Keep the browser open for inspection; close it when you're done:
        # driver.quit()
        pass

### Event page processing

In [None]:
def event_from_event_page(event_url):
    """
    Given the URL of a Meetup event page, return a dictionary containing 
    the following data from that page:

    - event_url
    - event_name
    - group_name
    - group_url
    - location
    - time
    - description
    """

    print(f"ðŸ“… Reading event page: {event_url}")
    driver.get(event_url)
    sleep(5)

    # Get event name
    # --------------
    # As of October 2025, the event name on a Meetup event page is in the *second* h1 element whose class contains 'ds2-b32...'
    # The first h1 element is blank, possibly as an anti-scraping measure.
    h1_elements = driver.find_elements(By.TAG_NAME, "h1")
    # for h1_element in h1_elements:
    #     print(f"h1 element text and class: {h1_elements[1].text} {h1_element.get_attribute('class')}")
    event_name = h1_elements[1].text if h1_elements[1].text else "[ UNKNOWN EVENT NAME ]"

    # Get group name
    # --------------
    # As of October 2025, the group name on a Meetup event page is in the *second* h3 element whose class contains 'ds2-m16...'
    # The first h3 element is blank, possibly as an anti-scraping measure.
    h3_elements = driver.find_elements(By.CSS_SELECTOR, "h3[class='ds2-m16 line-clamp-2 overflow-hidden text-ds2-text-fill-primary-enabled lg:ds2-m18']")
    group_name = h3_elements[1].text if h3_elements[1].text else "[ UNKNOWN GROUP NAME ]"

    # Get group URL
    # -------------
    # As of October 2025, the group URL on a Meetup event page is in the first a element whose class is 'block no-underline hover:no-underline'.
    a_elements = driver.find_elements(By.CSS_SELECTOR, "a[class='block no-underline hover:no-underline']")
    group_url = a_elements[0].get_attribute("href") if len(a_elements) > 0 else "[ UNKNOWN GROUP URL ]"

    # Get location
    # ------------
    # As of October 2025, the location on a Meetup event page is in the *second* p element whose class contains 'ds2-k16...'
    # The first p element is blank, possibly as an anti-scraping measure.
    p_elements = driver.find_elements(By.CSS_SELECTOR, "p[class='ds2-k16 text-ds2-text-fill-primary-enabled']")
    location = p_elements[1].text if len(p_elements) > 1 else ""
    # try:
    #     location = p_elements[1].text
    # except IndexError:
    #     print("Couldnâ€™t find location element, going with blank location.")
    #     location = ""
    # finally:
    #     print(f"Location: {location}")

    # Get display time and datetime
    # -----------------------------
    # As of October 2025, the display time and datetime on a Meetup event page are in the first time element.
    time_elements = driver.find_elements(By.TAG_NAME, "time")
    display_time_element = time_elements[0]
    datetime = display_time_element.get_attribute("datetime")
    full_display_time = display_time_element.text
    try:
        display_time = full_display_time.split(" Â· ")[1]
    except IndexError:
        display_time = "[ UNKNOWN DISPLAY TIME ]"
    
    # Get event description
    # ---------------------
    # As of December 2025, the event description on a Meetup event page is in the first div element whose class is 
    # 'w-full break-words transition-all duration-300 line-clamp-[15]'.
    paragraphs = driver.find_elements(By.CSS_SELECTOR, """div[class="w-full break-words transition-all duration-300 line-clamp-[15]"]""")
    description = paragraphs[0].text if paragraphs else ""
        
    return {
        "event_url": event_url,
        "event_name": event_name,
        "group_name": group_name,
        "group_url": group_url,
        "location": location,
        "display_time": display_time,
        "datetime": datetime,
        "description": description,
    }

### Event list processing

In [None]:
def all_meetup_events(year, month, day):
    events = []

    BASE_URL = "https://www.meetup.com/find"
    KEYWORDS = {
        "programming": "programming",
        "data%20science": "data science",
        "project%20management": "project management",
        "security": "security",
        "cryptocurrency": "cryptocurrency",
        "cyber": "cyber",
        "agile": "agile",
        "entrepreneur": "entrepreneur",
        "startup": "startup",
        "artificial intelligence": "artificial intelligence",
    }
    CATEGORIES = {
        "546": "Technology",
        # "405": "Career & Business",
        # "604": "Community & Environment",
        # "535": "Games",
        # "571": "Hobbies & Passions",
        # "436": "Science & Education",
        # "652": "Social Activities",
        # "467": "Writing",
    }

    url_date = f"{year}-{month:02d}-{day:02d}"
    start_date_parameter = f"customStartDate={url_date}T00%3A00-05%3A00"
    end_date_parameter = f"customEndDate={url_date}T23%3A59-05%3A00"
    parameters = f"source=EVENTS&{start_date_parameter}&{end_date_parameter}&distance=hundredMiles&location=us--fl--Tampa"

    for category in CATEGORIES:
        print(f"Reading {CATEGORIES[category]} category page...")
        category_page_url = f"{BASE_URL}/?{parameters}&categoryId={category}"
        print(f"Category page URL: {category_page_url}")

        attempts = 0
        max_attempts = 2
        while attempts < max_attempts:
            attempts += 1
            sleep(3)
            event_urls = event_urls_from_category_or_keyword_page(category_page_url)
            if event_urls is None:
                return []
            if len(event_urls) > 0 or attempts == max_attempts:
                break
        
        for event_url in event_urls:
            events.append(event_from_event_page(event_url))

    return events

def events_without_duplicates(events):
    result_event_urls = []
    result_events = []

    for event in events:
        if "event_url" in event:
            if event["event_url"] in result_event_urls:
                continue
            else:
                result_event_urls.append(event["event_url"])
                result_events.append(event)

    return result_events

def events_not_on_ignore_list(events):
    with open("./ignore_names.txt") as ignore_names_file:
        raw_ignore_names = ignore_names_file.readlines()
    NAMES_TO_IGNORE = [raw_ignore_name.strip().lower() for raw_ignore_name in raw_ignore_names]

    result_list = []
    
    for event in events:
        is_in_list = True
        for name_to_ignore in NAMES_TO_IGNORE:
            if name_to_ignore in event['group_name'].lower() or name_to_ignore in event['event_name'].lower():
                is_in_list = False
                break
        if is_in_list:
            result_list.append(event)
        
    return result_list

def sorted_events(events):
    """
    Given a list of event objects, this method returns a new list
    containing the event objects sorted in chronological order,
    based on the datetime value in each eventâ€™s 'datetime' key.
    """
    return sorted(events, key=lambda event: event["datetime"])

def events_for_checklist(year, month, day):
    print("generate_checklist()")
    global checklist

    initial_events = all_meetup_events(year, month, day)  
    print("Generated initial events")

    unique_events = events_without_duplicates(initial_events)
    filtered_events = events_not_on_ignore_list(unique_events)
    return sorted_events(filtered_events)
    # checklist = build_checklist_from_events(sorted_filtered_events)
    # display_checklist(checklist)



### Checklist processing

In [None]:
def event_relevance(description):
    SYSTEM_PROMPT = """
    You are an editorial assistant whose task is to determine whether a given meetup event is relevant to people in the technology industry in
    Tampa Bay and surrounding areas based on its description.

    Relevant events include:
    - Events of interest to software developers, technology professionals, technology hobbyists, and technology enthusiasts.
    - Events aimed at a "nerd" audience, including board game enthusiasts, role-playing gamers, tech hobbyists, and similar groups.
    - Events for people who are new to technology or considering a career in technology, including students and career changers.
    - Non-technology, but 'nerd' events, including Toastmasters meetings, film clubs, and book clubs, even if they are not strictly tech-related.
    - Community or networking events that meet the above criteria.

    Irrelevant events include:
    - Events whose primary purpose is lead generation. Many of these events talk about passive income or have some Ponzi scheme aspect.
    - Events that appear to be sales pitches or seminars.

    Respond with a JSON object with these keys and corresponding values: 
    - "relevance_score": A score from 0 - 100, where 0 means "completely irrelevant" and 100 means "highly relevant.
    - "explanation": A brief explanation of why you gave the event that score.
    """


    response = client.chat.completions.create(
        model="deepseek-chat",  # Choose your model
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"""Here is the description of the Meetup event: {description}"""}
        ],
        stream=False  # Set to True for streaming responses
    )

    processed_response = response.choices[0].message.content
    if processed_response.startswith("```json"):
        relevance = json.loads(processed_response.strip("```json").strip("```").strip())
    else:
        relevance = json.loads(processed_response)
    
    usage = response.usage

    return {
        "relevance": relevance,
        "usage": usage,
    }

def event_checkbox_description(event):
    return (
        f"{event['group_name']}: {event['event_name']}\n" + f"{event['display_time']}\n"
    )

def build_checklist_from_events(events):
    checklist = {}

    for event in events:
        checkbox = widgets.Checkbox(
            # value=event["relevance"]["relevance_score"] >= 50,
            description=event_checkbox_description(event),
            layout=widgets.Layout(width="800px"),
        )
        checklist[checkbox] = event

    return checklist


def display_checklist(checklist):
    total_tokens_used = 0
    for item in checklist:
        event = checklist[item]
        url = event["event_url"]
        relevance = event_relevance(event["description"])
        relevance_score = relevance["relevance"]["relevance_score"]
        relevance_explanation = relevance["relevance"]["explanation"]
        tokens_used = relevance["usage"].total_tokens
        total_tokens_used += tokens_used
        link = widgets.HTML(value=f"""Rating: <details><summary>{relevance_score}</summary>{relevance_explanation}<br />Tokens used: {tokens_used}</details> â€¢â€¢â€¢ <a href={url} target="_blank">link</a>""")

        display(widgets.HBox([item, link]))
    print(f"Total tokens used for relevance scoring: {total_tokens_used}")

### Generate the checklist

In [None]:
events = events_for_checklist(2026, 1, 1)
checklist = build_checklist_from_events(events)
display_checklist(checklist)

### The table generator: _Run after checking the checklist!_

In [None]:
def get_checked_items(checklist):
    checked_items = []
    
    for checkbox in checklist:
        if checkbox.value:
            checked_items.append(checklist[checkbox])
            
    return checked_items

def checked_items_to_html_table(checked_items):
    event_html_table = """<table><tr><th>Event name and location</th><th>Group</th><th width="20%">Time</th></tr>"""
    
    for event in checked_items:
        event_html_table += f"""<tr><td><strong><a href=\"{event['event_url']}\">{event['event_name']}</a></strong><br /><small>{event['location']}</small></p></td><td><a href=\"{event['group_url']}\">{event['group_name']}</a></td><td><small>{event['display_time']}</small></td></tr>"""
    
    event_html_table += """<tr><td colspan="3"><a href="#top">Return to the top of the list</a></td></tr></table>"""
    
    pyperclip.copy(event_html_table)
    return event_html_table


table = checked_items_to_html_table(get_checked_items(checklist))
table