1. Export calendar from google calendar gui at calendar.google.com -> go to the name of calendar to export on left side of screen -> calendar settings -> export
2. Ensure the student_holidays.csv is in working directory and up-to-date. 
3. Run the following

In [67]:
from csv_ical import Convert
import pandas as pd
import numpy as np
import re

last_completed_cohort = 'germain'


Create dataframe of student holidays

In [68]:
holidays_df = pd.read_csv('student_holidays.csv')
holidays_df['Date'] = pd.to_datetime(holidays_df.Date)
holidays_df.index = pd.to_datetime(holidays_df.Date)

Create dataframe of Cohorts with Name, Start and End Dates

In [69]:
https://docs.google.com/spreadsheets/d/1BZVS8D3Z0jkZxUeQNlGvwn88P9nH2ZM2j8WzdkEC4kc/edit?usp=sharing
    
labels = ['cohort', 'start_date', 'end_date']
easley = ['easley',  '12/07/2020', '06/11/2021']
florence = ['florence', '03/15/2021', '09/03/2021']
germain = ['germain', '06/14/2021', '12/07/2021']
hopper = ['hopper', '09/07/2021', '03/14/2022']
innis = ['innis', '12/13/2021', '06/16/2022']
jemison = ['jemison', '03/21/2022', '08/19/2022']
kalpana = ['kalpana', '06/06/2022', '11/04/2022']
leavitt = ['leavitt', '06/20/2022', '11/21/2022']
mirzakhani = ['mirzakhani', '08/29/2022', '02/14/2023']
noether = ['noether', '11/14/2022', '05/02/2023']
oneill = ['oneill', '12/12/2022', '05/24/2023']

cohorts_start_stop = pd.DataFrame([easley, florence, germain, hopper, innis])
cohorts_start_stop.columns = labels

In [70]:
cohorts_start_stop

Unnamed: 0,cohort,start_date,end_date
0,easley,12/07/2020,06/11/2021
1,florence,03/15/2021,09/03/2021
2,germain,06/14/2021,12/07/2021
3,hopper,09/07/2021,03/14/2022
4,innis,12/13/2021,06/16/2022


Read historical cohort calendars that have been exported to .ics files

In [71]:
def read_convert_save_calendar(cohort_ics, cohort_csv):
    convert = Convert()
    convert.CSV_FILE_LOCATION = cohort_csv
    convert.SAVE_LOCATION = cohort_ics

    convert.read_ical(convert.SAVE_LOCATION)
    convert.make_csv()
    convert.save_csv(convert.CSV_FILE_LOCATION)

In [72]:
# cohorts is a list of filenames in the current directory of exported calendars. They are saved as curie.ics, e.g.
cohorts = ['curie', 'darden', 'easley', 'easley2', 'florence', 'germain']
df = pd.DataFrame(columns=['event','start','end','cohort'])

for cohort in cohorts:
    read_convert_save_calendar(cohort + '.ics', cohort + '.csv')
    temp_df = pd.read_csv(cohort + '.csv', header = None, usecols = [0, 1, 2], names=['event', 'start', 'end'])
    temp_df['cohort'] = cohort
    df = pd.concat([df, temp_df], axis=0)

In [73]:
df.tail()

Unnamed: 0,event,start,end,cohort
393,[Cohort] Graduation,2021-12-07,2021-12-08,germain
394,[Zach] Morning Kick Off,2021-12-06 09:00:00-06:00,2021-12-06 09:15:00-06:00,germain
395,[Maggie] Morning Kick Off,2021-12-07 09:00:00-06:00,2021-12-07 09:15:00-06:00,germain
396,[Ryan] Morning Kick Off,2021-12-08 09:00:00-06:00,2021-12-08 09:15:00-06:00,germain
397,[Ravinder] Morning Kick Off,2021-12-09 09:00:00-06:00,2021-12-09 09:15:00-06:00,germain


In [74]:
# filter out all events with a time
df = df[~df.start.str.contains("\s([0-9]{2}:){2}.+$", case=True, flags=0, na=None, regex=True)]

  return func(self, *args, **kwargs)


In [75]:
# create a start date field
df['start'] = pd.to_datetime(df.start)
# get the name of the day
df['day_of_week'] = df.start.dt.day_name()
# drop end date as each event we are concerned with is one day only 
df.drop(columns=['end'], inplace=True)

In [76]:
df.tail()

Unnamed: 0,event,start,cohort,day_of_week
384,Capstone,2021-12-01,germain,Wednesday
387,Capstone,2021-12-02,germain,Thursday
389,Capstone,2021-12-03,germain,Friday
392,Capstone,2021-12-06,germain,Monday
393,[Cohort] Graduation,2021-12-07,germain,Tuesday


In [77]:
# filter out where 'OOO' is in the entry
df = df[~df.event.str.contains("(OOO|Out|Vacation|No\sClass|Staff\s*Day|(\
                               (Faith|Adam|John|Madeleine|Maggie|Ravinder|Ryan|Sam|Zach)\s*(in\s*[a-z]|Out)))", 
                               case=False, flags=0, na=None, regex=True)]

  return func(self, *args, **kwargs)


Filter down to most accurate template of lessons and the time they take. 

In [78]:
# filter down to germain which is currently our best template
last_cohort_lessons = df[df.cohort==last_completed_cohort]
last_cohort_lessons = last_cohort_lessons.sort_values(by=['start']).reset_index(drop=True)
last_cohort_lessons.head()

Unnamed: 0,event,start,cohort,day_of_week
0,Syllabus Review [Ryan],2021-06-14,germain,Monday
1,Welcome Day,2021-06-14,germain,Monday
2,Fundamentals L1 [Ryan],2021-06-15,germain,Tuesday
3,Fundamentals L2 Explore through Hyperdoc [Faith],2021-06-15,germain,Tuesday
4,Fundamentals L4 Project Kickoff [Faith],2021-06-16,germain,Wednesday


____________________________________

Get Dataframe of all dates and holidays between last completed cohort start and stop

In [79]:
last_cohort_dates = cohorts_start_stop[cohorts_start_stop.cohort==last_completed_cohort]
last_cohort_dates = pd.DataFrame({'date': [last_cohort_dates.start_date.iloc[0], 
                                           last_cohort_dates.end_date.iloc[0]]})

#convert to datetime
last_cohort_dates = pd.DataFrame(pd.to_datetime(last_cohort_dates.date)).set_index(last_cohort_dates.date).asfreq('D')

# join on holidays dataframe
last_cohort_dates = last_cohort_dates.join(holidays_df, how='left').drop(columns=['date', 'Date'])


# get weekday
last_cohort_dates['weekday'] = last_cohort_dates.index.day_name()

# label weekends
last_cohort_dates.loc[last_cohort_dates.weekday == 'Saturday', 'Student Holidays'] = 'Weekend'
last_cohort_dates.loc[last_cohort_dates.weekday == 'Sunday', 'Student Holidays'] = 'Weekend'

# new column names
last_cohort_dates.columns = ['student_holiday', 'weekday']

last_cohort_dates['is_classday'] = last_cohort_dates.student_holiday.isnull().astype(int)

In [80]:
hours = pd.DataFrame({'weekday': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                    'is_classday': [1, 1, 1, 1, 1, 0, 0], 'class_hours': [6, 7, 3.5, 6.5, 7, 0, 0]})

last_cohort_dates_hours = last_cohort_dates.merge(hours, how='left', on=['weekday', 'is_classday'])
# give new dataframe the original date index. 
last_cohort_dates_hours.index = last_cohort_dates.index


# get classday number 
last_cohort_dates_hours['classday_num'] = np.arange(last_cohort_dates_hours.shape[0])
last_cohort_dates_hours = pd.concat([
    last_cohort_dates_hours[last_cohort_dates_hours.is_classday == 1].classday_num.rank().rename('classday_num2'),
    last_cohort_dates_hours
], axis=1).drop(columns='classday_num').rename(columns={'classday_num2': 'classday_num'})

last_cohort_dates_hours = last_cohort_dates_hours.reset_index()

In [81]:
last_cohort_dates_hours.columns = ['start', 'classday_num', 'student_holiday', 'weekday', 'is_classday', 'class_hours']

Merge the dataframe with all dates with the dataframe of lessons (left join)

In [82]:
last_cohort_lessons.columns

Index(['event', 'start', 'cohort', 'day_of_week'], dtype='object')

In [83]:
last_cohort_df = last_cohort_dates_hours.merge(last_cohort_lessons, how='left', on='start')
last_cohort_df.head(20)

Unnamed: 0,start,classday_num,student_holiday,weekday,is_classday,class_hours,event,cohort,day_of_week
0,2021-06-14,1.0,,Monday,1,6.0,Syllabus Review [Ryan],germain,Monday
1,2021-06-14,1.0,,Monday,1,6.0,Welcome Day,germain,Monday
2,2021-06-15,2.0,,Tuesday,1,7.0,Fundamentals L1 [Ryan],germain,Tuesday
3,2021-06-15,2.0,,Tuesday,1,7.0,Fundamentals L2 Explore through Hyperdoc [Faith],germain,Tuesday
4,2021-06-16,3.0,,Wednesday,1,3.5,Fundamentals L4 Project Kickoff [Faith],germain,Wednesday
5,2021-06-16,3.0,,Wednesday,1,3.5,Fundamentals L3 InterReview [Faith],germain,Wednesday
6,2021-06-17,4.0,,Thursday,1,6.5,Project Time for Skills in Demand,germain,Thursday
7,2021-06-18,5.0,,Friday,1,7.0,Fundamentals Assessment,germain,Friday
8,2021-06-18,5.0,,Friday,1,7.0,Demonstrate Example End-to-end project [Faith],germain,Friday
9,2021-06-19,,Weekend,Saturday,0,0.0,,,


In [84]:
# extract the lesson details
lesson_details_df = last_cohort_df.event.str.extract(r'(^\w*)\s*(L[0-9]+)((\s*\w*)*)(\s*)(\[[A-Za-z]+\])')
lesson_details_df[2] = lesson_details_df[2].str.lower()
lesson_details_df[2] = lesson_details_df[2].str.strip()
lesson_details_df[3] = lesson_details_df[2].str.contains(r'(review)') == True

  return func(self, *args, **kwargs)


In [85]:
last_cohort_df['module'] = lesson_details_df[0]
last_cohort_df['lesson_number'] = lesson_details_df[1]
last_cohort_df['lesson_name'] = lesson_details_df[2]
last_cohort_df['is_review'] = lesson_details_df[3]
last_cohort_df['instructor'] = last_cohort_df.event.str.extract(r'(\[[A-Za-z]+\])')
last_cohort_df.loc[last_cohort_df['instructor'].isnull() == True, 'lesson_name'] = last_cohort_df.event
last_cohort_df.loc[last_cohort_df['lesson_name'].isnull() == True, 'lesson_name'] = last_cohort_df.event.str.extract(r'(.*)\s\[')

In [86]:
last_cohort_df.to_csv("germain_parsed.csv")


In [42]:
last_cohort_df.lesson_number.value_counts()

L02    12
L03    11
L01    11
L04    10
L2      9
L3      9
L6      8
L00     7
L7      6
L4      6
L8      6
L1      6
L5      5
L9      5
L05     4
L10     4
L06     3
L0      2
L07     1
Name: lesson_number, dtype: int64

In [43]:
lesson_details_df = last_cohort_df.event.str.extract(r'(^\w*)\s*(L[0-9]+)((\s*\w*)*)(\[[A-Za-z]+\])')
lesson_details_df[2] = lesson_details_df[2].str.lower()
lesson_details_df[2] = lesson_details_df[2].str.strip()
# lesson_details_df[3] = lesson_details_df[2].str.extract(r'(review)')
lesson_details_df[3] = lesson_details_df[2].str.contains(r'(review)') == True
lesson_details_df[3].value_counts()

  return func(self, *args, **kwargs)


False    217
True      49
Name: 3, dtype: int64

In [98]:
new_cohort = 'innis'
new_cohort_start_end = cohorts_start_stop[cohorts_start_stop.cohort == new_cohort]
new_cohort_start_end

Unnamed: 0,cohort,start_date,end_date
4,innis,12/13/2021,06/16/2022


In [109]:
# df with start and end
new_cohort_days = pd.DataFrame({'date': [new_cohort_start_end.start_date.iloc[0], new_cohort_start_end.end_date.iloc[0]]})

#convert to datetime
new_cohort_days = pd.DataFrame(pd.to_datetime(new_cohort_days.date)).set_index(new_cohort_days.date).asfreq('D')

# join on holidays dataframe
new_cohort_days = new_cohort_days.join(holidays_df, how='left').drop(columns=['date', 'Date'])

# get weekday
new_cohort_days['weekday'] = new_cohort_days.index.day_name()

# label weekends
new_cohort_days.loc[new_cohort_days.weekday == 'Saturday', 'Student Holidays'] = 'Weekend'
new_cohort_days.loc[new_cohort_days.weekday == 'Sunday', 'Student Holidays'] = 'Weekend'

# new column names
new_cohort_days.columns = ['student_holiday', 'weekday']

new_cohort_days['is_classday'] = new_cohort_days.student_holiday.isnull().astype(int)

create hours dataframe that has default class hours by day of week. 

In [116]:
hours = pd.DataFrame({'weekday': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                    'is_classday': [1, 1, 1, 1, 1, 0, 0], 'class_hours': [6, 7, 3.5, 6.5, 7, 0, 0]})

Merge cohort dataframe with the hours dataframe. 

In [124]:
new_cohort_daily_hours = new_cohort_days.merge(hours, how='left', on=['weekday', 'is_classday'])
# give new dataframe the original date index. 
new_cohort_daily_hours.index = new_cohort_days.index

In [125]:
new_cohort_daily_hours

Unnamed: 0,student_holiday,weekday,is_classday,class_hours
2021-12-13,,Monday,1,6.0
2021-12-14,,Tuesday,1,7.0
2021-12-15,,Wednesday,1,3.5
2021-12-16,,Thursday,1,6.5
2021-12-17,Planning Day,Friday,0,
...,...,...,...,...
2022-06-12,Weekend,Sunday,0,0.0
2022-06-13,,Monday,1,6.0
2022-06-14,,Tuesday,1,7.0
2022-06-15,,Wednesday,1,3.5


In [126]:
# get classday number 
new_cohort_daily_hours['classday_num'] = np.arange(new_cohort_daily_hours.shape[0])

new_cohort_daily_hours = pd.concat([
    new_cohort_daily_hours[new_cohort_daily_hours.is_classday == 1].classday_num.rank().rename('classday_num2'),
    new_cohort_daily_hours
], axis=1).drop(columns='classday_num').rename(columns={'classday_num2': 'classday_num'})

In [128]:
# read csv with generic lesson names by class day number taught
generic_lesson_template = pd.read_csv('generic_lessons_days.csv')

generic_lesson_template = generic_lesson_template[['classday_num', 'generic_event_name']]

In [131]:
new_cohort_daily_hours = new_cohort_daily_hours.reset_index()

In [133]:
classdays = new_cohort_daily_hours[new_cohort_daily_hours.is_classday==1].set_index('classday_num').drop(columns=['student_holiday', 'is_classday'])

lessons_df = generic_lesson_template[generic_lesson_template.classday_num.notnull()].set_index('classday_num')

new_cal = lessons_df.join(classdays, how='left')

new_cal['start'] = new_cal['index']

new_cal = new_cal[['generic_event_name', 'index']]
new_cal.columns = ['Subject', 'Start Date']

new_cal.to_csv('innis_cal.csv')