## Enrollment activities trends

This notebook calculates and graphs weekly enrollment activities (adds, drops, waitlists, swaps) trends

### preprocess transaction data

In [None]:
import pandas as pd
from tqdm import tqdm

df = pd.read_csv(TRANSACTION_DATA)

semester_dict = {'2012 Fall': 2128,'2013 Spring': 2132,'2013 Summer': 2135,'2013 Fall': 2138,'2014 Spring': 2142,'2014 Summer': 2145,'2014 Fall': 2148,'2015 Spring': 2152,'2015 Summer': 2155,'2015 Fall': 2158,'2016 Spring': 2162,'2016 Summer': 2165,'2016 Fall': 2168,'2017 Spring': 2172,'2017 Summer': 2175,'2017 Fall': 2178,'2018 Spring': 2182,'2018 Summer': 2185,'2018 Fall': 2188,'2019 Spring': 2192,'2019 Summer': 2195,'2019 Fall': 2198,'2020 Spring': 2202,'2020 Summer': 2205,'2020 Fall': 2208,'2021 Spring': 2212,'2021 Summer': 2215,'2021 Fall': 2218,'2022 Spring': 2222,'2022 Summer': 2225,'2022 Fall': 2228}
semester_dict = {v: k for k, v in semester_dict.items()}

df['semester_clean'] = df.semester_year_term_cd.map(semester_dict)
df['course_clean'] = df.subject_desc.map(str)  + ' ' + df['course_number'].map(str) 

df_filtered = df[ (df['action_affects_enrollment_status_flag']=='Y') & (df['enrollment_intitiator_type']=='Student')]

### calculate course swaps

In [None]:
def get_swaps_result_df(df_filtered):
    '''
    given a transaction dataframe, return dataframe with swap pairs 
    
    '''
    df_swaps=df_filtered[(df_filtered['enrollment_request_action_cd']=='S') & ~(df_filtered['class_number'].isna()) & ~(df_filtered['change_to_class_nbr'].isna())]
    swaps_request_id = df_swaps[df_swaps['row_type']=='SWAP_ENRL']['enrollment_request_id']
    swap_result_df=pd.DataFrame([],columns=['original_course','swapped_course', 'semester_clean', 'timestamp', 'student_id', 'initiator'])
    for si in tqdm(swaps_request_id):
        swap_rows = df_swaps[df_swaps['enrollment_request_id']==si]
        base = swap_rows[swap_rows['row_type']=='BASE']
        swap = swap_rows[swap_rows['row_type']=='SWAP_ENRL']
        swap_result_df.loc[-1] = [base['course_clean'].values[0],swap['course_clean'].values[0], base['semester_clean'].values[0],base['enrollment_request_tmsp'].values[0], base['student_id'].values[0], base['enrollment_intitiator_type'].values[0]]
        swap_result_df.index = swap_result_df.index + 1  # shifting index
        swap_result_df = swap_result_df.sort_index()
    return  swap_result_df[swap_result_df['original_course']!=swap_result_df['swapped_course']]

In [None]:
swap_result = get_swaps_result_df(df_filtered)

### calculate weekly average waitlists, enrolls, and drops

In [None]:
from collections import defaultdict
import tqdm
wl_per_week = {}
er_per_week = {}
dp_per_week = {}

## get mean enrollment action per week 
for s in tqdm.tqdm(fa_sems):
    try:
        sem_df = df_filtered[df_filtered['semester_clean']==s]
        sem_df['timestamp_date'] = sem_df['enrollment_request_tmsp'].str[:10]
        sem_df['week_num'] = sem_df.apply(lambda x: output_week_based_on_date(x['semester_clean'],x['timestamp_date']),axis=1)
        waitlist_data = sem_df[sem_df['student_enrollment_status_outcome_cd'] == 'W']
        mean_waitlist_per_week = waitlist_data.groupby(['student_id', 'week_num'])['course_clean'].nunique().groupby('week_num').mean()

        enroll_data = sem_df[sem_df['student_enrollment_status_outcome_cd'] == 'E']
        mean_enroll_per_week = enroll_data.groupby(['student_id', 'week_num'])['course_clean'].nunique().groupby('week_num').mean()

        drop_data = sem_df[sem_df['student_enrollment_status_outcome_cd'] == 'D']
        mean_drop_per_week = drop_data.groupby(['student_id', 'week_num'])['course_clean'].nunique().groupby('week_num').mean()

        wl_per_week[s]= mean_waitlist_per_week
        er_per_week[s]= mean_enroll_per_week
        dp_per_week[s] = mean_drop_per_week
    except:
        pass
  


In [None]:
er_per_week=pd.DataFrame.from_dict(er_per_week, orient='index')
er_per_week=list(er_per_week.mean(axis=0))

wl_per_week=pd.DataFrame.from_dict(wl_per_week, orient='index')
wl_per_week=list(wl_per_week.mean(axis=0))

dp_per_week=pd.DataFrame.from_dict(dp_per_week, orient='index')
dp_per_week=list(dp_per_week.mean(axis=0))

intermed = swap_result.groupby(['student_id','week_num']).agg({'timestamp':'count'}).reset_index()
swaps_per_week = intermed.groupby('week_num').agg({'timestamp':'mean'})

## graph activities trend

In [None]:
import matplotlib.pyplot as plt
plt.plot(wl_per_week, label='waitlist')
plt.plot(er_per_week, label = 'enroll')
plt.plot(dp_per_week, label = 'drop')
plt.plot(swaps_per_week['timestamp'], label = 'swaps')


plt.legend(loc='upper right', bbox_to_anchor=(1, 1))
plt.title('Mean number of courses per week')
plt.xlabel('Week')
plt.ylabel('Mean number of courses')

vertical_line_position= 0
plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Phase 1 begins', xy=(vertical_line_position, 1.1), xytext=(vertical_line_position + 0.1, 1.1),)

vertical_line_position= 10
plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Phase 1 begins (new student)', xy=(vertical_line_position, 1.8), xytext=(vertical_line_position + 0.1, 1.8),)

vertical_line_position= 11

plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Phase 2 begins', xy=(vertical_line_position, 2), xytext=(vertical_line_position + 0.1, 2),)
vertical_line_position=17
plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Add/drop period begins', xy=(vertical_line_position, 2.15), xytext=(vertical_line_position + 0.1, 2.15),)
vertical_line_position=18

plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Instruction begins', xy=(vertical_line_position, 2.3), xytext=(vertical_line_position + 0.1, 2.3),)


vertical_line_position=20
plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Add/drop deadline', xy=(vertical_line_position,2.5), xytext=(vertical_line_position + 0.1,2.5),)

vertical_line_position=23
plt.axvline(x=vertical_line_position, color = 'silver', linestyle='--')
plt.annotate('Late add/drop deadline', xy=(vertical_line_position,2.7), xytext=(vertical_line_position + 0.1,2.7),)

plt.xlabel('Week')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.3))
