In [None]:
from datetime import datetime, timedelta
import json
import markdown
import pandas as pd
from pathlib import Path
import re

In [None]:
# Specify input RSS file from https://www.meetup.com/hacksburgva/events/rss/ (Must be signed into meetup)
file = 'rss.txt'

# Change extension to json for output
out_file = Path(file).with_suffix('.json')

# Read data from input file, drop weekly meeting events
data = pd.read_xml(file, xpath=".//item")
data.drop(data[data["title"] == "Weekly Meeting"].index, inplace=True)

In [None]:
# Define regex for parsing description field (includes date, time, description, number of attendees, price, etc)
expression = re.compile('.*<p><p>(?P<description>.*)<\/p> <\/p> <p>(?P<location>.*?)<\/p> <p>(?P<date>.*?)<\/p> <p>(?P<attendees>.*?)<\/p>( <p>Price: (?P<price>[0-9]+).*?<\/p>)? <p>(?P<url>.*)<\/p>.*')

# Initialize empty frame for collecting events
output = pd.DataFrame()

# Iterate through events
# Notes and limitations:
# - No subtitles are generated
# - Meetup does not include year in timestamp, Dates may be inaccurate near end of year
# - End time assumed to be 3 hours after start
# - By default assuming onsite location, not offered online, offered in person
# - Member price $5 less than meetup price
# - No image attached

for index, row in data.iterrows():
    # print(row["description"]) # For debug
    description_groups = expression.match(row["description"]).groupdict()
    date_time = datetime.strptime(description_groups['date'], '%A, %B %d at %I:%M %p').replace(year=datetime.today().year)
    if description_groups['price']:
        member_price = int(description_groups['price'])-5
        non_member_price = int(description_groups['price'])
    else:
        member_price = 0
        non_member_price = 0
    temp = {"title": row['title'],
            "subtitle": "",
            "description": markdown.markdown(description_groups['description']),
            "date": date_time.strftime('%Y-%m-%d'),
            "start_time": date_time.strftime('%-I:%M%p').lower(),
            "end_time": (date_time + timedelta(hours=3)).strftime('%-I:%M%p').lower(),
            "offsite_location": None,
            "offered_online": False,
            "offered_in_person": True,
            "member_price": member_price,
            "non_member_price": non_member_price,
            "image": "",
            "meetup_link": row['guid']}
    output = pd.concat([output, pd.DataFrame([temp])], ignore_index=True)

In [None]:
# Write to output file
# Escapes forward slashed by default, has no impact on generated html
output.to_json(orient="records",path_or_buf=out_file)

# Use Ctrl+Shift+I in VS Code to auto-indent