In [9]:
# import required libraries
import pandas as pd
import numpy as np
import json
import re

In [2]:
%%capture
# loop over the list of workbooks
subregions = [
    "Vashon Island",
    "South Seattle",
    "South King County",
    "South East King County",
    "Snoqualmie Valley",
    "North Seattle",
    "North King County",
    "Family Phone Line",
    "East King County",
    "Downtown Seattle"
]

# loop over the list, creating an Excel workbook of results
# --------------
for region in subregions:
    # get the source export
    path = f"C://Users/ben_g/Downloads/{region}.xlsx"
    # read into a pandas dataframe
    workbook = pd.read_excel(io=path,
                            header=5,
                            index_col=0,
                            engine='openpyxl')
    
    # do a little cleaning
    workbook = workbook.drop(columns=["email",
                                    "Volunteer Emergency Contact Name (Last, First)",
                                    "Volunteer Emergency Contact Phone Number",
                                    "Volunteer Phone Number",
                                    "Waiver Completed?"])
    
    # transpose the dataframe
    workbook = workbook.T

    # reset the index
    workbook = workbook.reset_index(names="DateTime")

    # split 'DateTime' into two columns
    # ----------------------------------
    # look for a space preceded by 4 digits
    split_data = workbook['DateTime'].str.split(r'(?<=\d{4})\s', expand=True)

    workbook['Date'] = split_data[0]
    workbook['Time'] = split_data[1]

    # groupby columns to handle duplicate columns
    workbook = workbook.groupby(level=0,
                                axis=1).first()
    
    # replace all "Signed-up" with "Time"
    for col in workbook.columns:
        # print(col)
        workbook.loc[workbook[col] == "Signed-up", col] = workbook['Time']

    # drop newly extraneous columns
    workbook = workbook.drop(columns=["DateTime",
                                    "Time",
                                    "Total seats",
                                    "Participants signed-up"])

    # reset the index to be the "Date" col (ahead of transposing)
    workbook = workbook.set_index(keys="Date")

    # loop over the locations to create dataframes similar to the excel tracker
    # ------------------
    # get a list of unique locations
    locations = list(set(workbook["Session description"]))

    # create a dictionary to hold the indexed dataframes
    dfs = {}

    # loop over each location to create a separate, named df
    for loc in locations:
        # create dataframes as transposed copies
        # -------------------
        # filter for target location
        df = workbook[workbook["Session description"]==loc].copy()

        # drop the "Session description" from the newly created df
        df = df.drop(columns=["Session description"])

        # convert the "Date" column to datetime
        df.index = pd.to_datetime(df.index,
                                    errors="coerce")

        # transpose the dataframe and drop empty rows
        df = df.T.dropna(how='all')

        # group by column and take the 'first' non-null entries
        df = df.groupby(level=0,
                        axis=1).first()
        
        # order the dataframe columns chronologically
        df = df.sort_index(axis=1)

        # reset the index so we don't lose names
        df = df.reset_index()

        # limit the sheet name len to avoid errors
        if len(loc)>31:
            loc = str(loc[:31])

        # WRITE TO EXCEL WORKBOOK
        # -------------------
        # create the ExcelWriter object
        with pd.ExcelWriter('Volunteer Schedules - Doodle.xlsx',
                            engine='openpyxl',
                            mode='a',
                            if_sheet_exists='replace') as writer:
            # index=False prevents the 0, 1, 2... row numbers from being saved
            df.to_excel(writer, sheet_name=loc, index=False)

# Transform '_workbook_' for Dashboard

In [3]:
# reset the index so that we can manipulate 'Date'
workbook = workbook.reset_index(names="Date")

# convert 'Date' column to datetime obj
workbook["Date"] = pd.to_datetime(workbook["Date"], errors='coerce').dt.strftime('%m/%d/%Y')

In [4]:
# set 'Date' and 'Session description' as multiindex before grouping columns
workbook = workbook.set_index(keys=["Date", "Session description"])

In [5]:
# groupby columns to handle duplicate columns
grouped_wbk = workbook.stack().groupby(level=[0,1]).value_counts().reset_index(name="Count")

In [6]:
# create function to handle the time transformation
def parse_shift_times(df, column_name='level_2'):
    # clean the string: remove the ".1" duplicates and split by newline
    # This separates "10:00 AM" from "4 h"
    temp_split = df[column_name].str.replace(r'\.\d+$', '', regex=True).str.split('\n', expand=True)
    
    # convert the first part to a datetime (Shift Start)
    df['Shift Start'] = pd.to_datetime(temp_split[0], format='%I:%M %p').dt.time
    
    # convert the second part to a Timedelta (Duration)
    # clean the duration string to make it compatible with pandas (e.g., "4 h 30 min" -> "4h 30m")
    duration_str = temp_split[1].str.replace(' h', 'h').str.replace(' min', 'm')
    duration_td = pd.to_timedelta(duration_str)
    
    # calculate Shift End
    # combine a dummy date with the start time to do the math, then extract the time
    dummy_date = pd.to_datetime('2026-01-01 ' + temp_split[0])
    df['Shift End'] = (dummy_date + duration_td).dt.time

    # drop the source column
    df = df.drop(columns=["level_2"])
    
    return df

# apply the function to your 'grouped_wbk' dataframe
grouped_wbk = parse_shift_times(grouped_wbk)

# Create JSON Representation for Export

In [10]:
# create ISO 8601 strings for Start and End
# assumes 'Date' is already a string or datetime object
grouped_wbk['iso_start'] = pd.to_datetime(grouped_wbk['Date'] \
                                          .astype(str) + \
                                            ' ' + \
                                                grouped_wbk['Shift Start'] \
                                                    .astype(str)).dt \
                                                        .strftime('%Y-%m-%dT%H:%M:%S')
grouped_wbk['iso_end'] = pd.to_datetime(grouped_wbk['Date'] \
                                        .astype(str) + \
                                            ' ' + \
                                                grouped_wbk['Shift End'] \
                                                    .astype(str)).dt \
                                                        .strftime('%Y-%m-%dT%H:%M:%S')

# convert to a list of dictionaries
# we can include 'Count' in the title so users see how many volunteers are there
calendar_data = []
for _, row in grouped_wbk.iterrows():
    calendar_data.append({
        "title": f"{row['Count']} Volunteers",
        "start": row['iso_start'],
        "end": row['iso_end'],
        "location": row['Session description'], # use this for filtering
        "allDay": False
    })

# export to a JSON file for your website
with open('schedule.json', 'w') as f:
    json.dump(calendar_data, f)