In [990]:
%reset -f

Import Data Sources, Make DataFrames

In [991]:
# Import packages
import os
import pandas as pd

# Set base path for reproducibility
# Save initial data to a folder "Capstone Project Data_Cleaned" in the current working directory
base_path = os.path.join(os.getcwd(), "Capstone Project Data_Cleaned")

# Create DFs - Undergrad Only Data
df_all_inperson = pd.read_excel(os.path.join(base_path, "df_all_inperson.xlsx"))
df_all_online = pd.read_excel(os.path.join(base_path, "df_all_online.xlsx"))
df_all = pd.read_excel(os.path.join(base_path, "df_all.xlsx"))

df_fall_all_inperson = pd.read_excel(os.path.join(base_path, "df_fall_all_inperson.xlsx"))
df_fall_all_online = pd.read_excel(os.path.join(base_path, "df_fall_all_online.xlsx"))
df_fall_all = pd.read_excel(os.path.join(base_path, "df_fall_all.xlsx"))

df_spring_all_inperson = pd.read_excel(os.path.join(base_path, "df_spring_all_inperson.xlsx"))
df_spring_all_online = pd.read_excel(os.path.join(base_path, "df_spring_all_online.xlsx"))
df_spring_all = pd.read_excel(os.path.join(base_path, "df_spring_all.xlsx"))

Create Dictionaries

In [992]:
# Create Dictionaries 

# All Dictionary
dfs_all = {
    "df_all": df_all,
    "df_fall_all": df_fall_all,
    "df_spring_all": df_spring_all
}

# In-Person Dictionary
dfs_inperson = {
    "df_all_inperson": df_all_inperson,
    "df_fall_all_inperson": df_fall_all_inperson,
    "df_spring_all_inperson": df_spring_all_inperson
}

# Online Dictionary
dfs_online = {
    "df_all_online": df_all_online,
    "df_fall_all_online": df_fall_all_online,
    "df_spring_all_online": df_spring_all_online
}

dfs_everything = {**dfs_all, **dfs_inperson, **dfs_online}


Explore DF size by Dictionary

In [993]:
print("DF sizes in dfs_all:")
for name, df in dfs_all.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns") #df.shape[] for #rows, #cols

print("\nDF sizes in dfs_inperson:")
for name, df in dfs_inperson.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns")

print("\nDF sizes in dfs_online:")
for name, df in dfs_online.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns")


DF sizes in dfs_all:
   df_all: 1559 rows, 27 columns
   df_fall_all: 766 rows, 27 columns
   df_spring_all: 793 rows, 27 columns

DF sizes in dfs_inperson:
   df_all_inperson: 339 rows, 27 columns
   df_fall_all_inperson: 165 rows, 27 columns
   df_spring_all_inperson: 174 rows, 27 columns

DF sizes in dfs_online:
   df_all_online: 1220 rows, 27 columns
   df_fall_all_online: 601 rows, 27 columns
   df_spring_all_online: 619 rows, 27 columns


<h1> Modality: In-Person vs Online </h1>

In-Person vs Online Totals

In [994]:
print("Online Classes Count:")
for name, df in dfs_online.items():
    print(f"   {name}: has {df.shape[0]} online classes")

print("\nIn-Person Classes Count:")
for name, df in dfs_inperson.items():
    print(f"   {name}: has {df.shape[0]} in-person classes")



Online Classes Count:
   df_all_online: has 1220 online classes
   df_fall_all_online: has 601 online classes
   df_spring_all_online: has 619 online classes

In-Person Classes Count:
   df_all_inperson: has 339 in-person classes
   df_fall_all_inperson: has 165 in-person classes
   df_spring_all_inperson: has 174 in-person classes


Online/In-Person Dictionary

In [995]:
dfs_all_inperson_online = {
    "df_all_inperson": df_all_inperson,
    "df_all_online": df_all_online
}

<h3> Function for sorting results in academic year order: semester_order_func </h3>
Fall 2021, Spring 2022, Fall 2022, Spring 2023, Fall 2023, Spring 2024, Fall 2024, Spring 2025

In [996]:
# Strategy: label Fall/Spring starting with the same year, to count them as the same academic year
# e.g. Fall 2021 & Spring 2022 will turn into 2021.0 and 2021.5, to group as the 2021 academic year

def semester_order_func(sem):
    season, year = sem.split()
    year = int(year)
    if season == "Fall":
        return year + 0.0           # Fall 2021 turns into 2021.0; Fall 2021 into 2022.0; etc
    elif season == "Spring":
        return (year - 1) + 0.5     # Spring 2022 - 1 + 0.5 = 2021.5 so that it'll follow Fall 2021.0


Online/In-Person by Semester

In [997]:
#Totals by Session
for df_semester, df in dfs_all_inperson_online.items():
    # Separate inperson/online within our new dictionary
    modality = "In Person Classes" if "inperson" in df_semester else "Online Classes"

    # Group by Term (semester) & count
    term_counts = df.groupby("Term").size()

    # Function: sort by semester order
    # Using key=lambda x: for each tuples (term + count), sort by applying function to x[0] term first
    sorted_term_counts = sorted(term_counts.items(), key=lambda x: semester_order_func(x[0]))

    print(f"\n{df_semester}: {modality}")
    for term, count in sorted_term_counts:
        print(f"   {term} has {count} classes")




df_all_inperson: In Person Classes
   Fall 2021 has 33 classes
   Spring 2022 has 31 classes
   Fall 2022 has 32 classes
   Spring 2023 has 34 classes
   Fall 2023 has 38 classes
   Spring 2024 has 40 classes
   Fall 2024 has 62 classes
   Spring 2025 has 69 classes

df_all_online: Online Classes
   Fall 2021 has 155 classes
   Spring 2022 has 143 classes
   Fall 2022 has 138 classes
   Spring 2023 has 149 classes
   Fall 2023 has 157 classes
   Spring 2024 has 166 classes
   Fall 2024 has 151 classes
   Spring 2025 has 161 classes


Online/Live Online by Semester

In [998]:
# Get unique Term values from DF df_all_online
terms = df_all_online["Term"].unique()

# For loop with our sorted function semester_order_func
for df_semester in sorted(terms, key=semester_order_func):
    df_term = df_all_online[df_all_online["Term"] == df_semester]
    
    total = df_term.shape[0]
    fully_online = (df_term["Facility"] == "Online").sum()
    live_online = (df_term["Facility"] == "Live Online").sum()
    
    print(f"{df_semester}: {total} total classes, {fully_online} Fully Online, and {live_online} Live Online")


Fall 2021: 155 total classes, 142 Fully Online, and 13 Live Online
Spring 2022: 143 total classes, 142 Fully Online, and 1 Live Online
Fall 2022: 138 total classes, 138 Fully Online, and 0 Live Online
Spring 2023: 149 total classes, 145 Fully Online, and 4 Live Online
Fall 2023: 157 total classes, 153 Fully Online, and 4 Live Online
Spring 2024: 166 total classes, 162 Fully Online, and 4 Live Online
Fall 2024: 151 total classes, 147 Fully Online, and 4 Live Online
Spring 2025: 161 total classes, 157 Fully Online, and 4 Live Online


<h3>In-Person/Online by Modalities </h3>

Online/In-Person: Session (15 week vs 7)

In [999]:
print("Online vs In-Person by Session Type (15/7): Totals")

for name, df in dfs_all_inperson_online.items():
    print(f"\n{name}: Total by Session")
    session_counts = df["Session"].value_counts(dropna=False)
    print(session_counts)


Online vs In-Person by Session Type (15/7): Totals

df_all_inperson: Total by Session
Session
Regular Academic Session    339
Name: count, dtype: int64

df_all_online: Total by Session
Session
Regular Academic Session    635
Seven Week - Second         298
Seven Week - First          287
Name: count, dtype: int64


In [1000]:
print("Online vs In-Person by Session Type (15/7) & Semester")

for df_semester, df in dfs_all_inperson_online.items():
    print(f"\n{df_semester}: Session by Semester (Term)")
   
    # Group by "Term" and "Session Grouped"; .size() count the rows; unstack() turn into a DF
    session_by_term = df.groupby(["Term", "Session"]).size().unstack(fill_value=0)
   
    # Order semesters with function semester_order_func; .loc[] to return a DF
    session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)]
    print(session_by_term)

Online vs In-Person by Session Type (15/7) & Semester

df_all_inperson: Session by Semester (Term)
Session      Regular Academic Session
Term                                 
Fall 2021                          33
Spring 2022                        31
Fall 2022                          32
Spring 2023                        34
Fall 2023                          38
Spring 2024                        40
Fall 2024                          62
Spring 2025                        69

df_all_online: Session by Semester (Term)
Session      Regular Academic Session  Seven Week - First  Seven Week - Second
Term                                                                          
Fall 2021                          97                  32                   26
Spring 2022                        78                  27                   38
Fall 2022                          75                  30                   33
Spring 2023                        77                  34                   38
Fall

7-week: combine 7W1 and 7W2 into 1 term

In [1001]:
# New DF df_online_7week, to not confuse with df_all_online
df_online_7week = df_all_online.copy()

# Group "Seven Week - First" and "Seven Week - Second" into a single "Seven Week Combined"
df_online_7week["Session Grouped"] = df_online_7week["Session"].replace({
    "Seven Week - First": "Seven Week Combined",
    "Seven Week - Second": "Seven Week Combined"})

print("Online Classes by 15/7 Week, Totalling both 7-Week Sessions")

# Total counts by session type (15/7)
print("\nTotals in Dataset:")
session_counts = df_online_7week["Session Grouped"].value_counts()
print(session_counts)

# Totals by Session type & by Term
print("\nSession by Semester (Term):")
session_by_term = df_online_7week.groupby(["Term", "Session Grouped"]).size().unstack(fill_value=0)
session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)] # Run semester_order_func to sort
print(session_by_term)


Online Classes by 15/7 Week, Totalling both 7-Week Sessions

Totals in Dataset:
Session Grouped
Regular Academic Session    635
Seven Week Combined         585
Name: count, dtype: int64

Session by Semester (Term):
Session Grouped  Regular Academic Session  Seven Week Combined
Term                                                          
Fall 2021                              97                   58
Spring 2022                            78                   65
Fall 2022                              75                   63
Spring 2023                            77                   72
Fall 2023                              82                   75
Spring 2024                            80                   86
Fall 2024                              71                   80
Spring 2025                            75                   86


Online/In-Person by Meeting Days

In [1002]:
print("Online vs In-Person by Meeting Days")

for name, df in dfs_all_inperson_online.items():
    # Print Online/In-Person Totals by Meeting Days
    print(f"\n{name}: Total by Meeting Days")
    meeting_days_total = df["Meeting Days"].value_counts(dropna=False)
    print(meeting_days_total)
    
    # Print Online/In-Person Totals by Meeting Days & by Semester (Term) 
    print(f"\n{name}: Meeting Days by Semester (Term)")
    # Group by Term & Meeting Days; .size() count the rows; unstack() turn into a DF
    meeting_days_by_term = df.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
    meeting_days_by_term = meeting_days_by_term.loc[sorted(meeting_days_by_term.index, key=semester_order_func)]
    print(meeting_days_by_term)


Online vs In-Person by Meeting Days

df_all_inperson: Total by Meeting Days
Meeting Days
TR    181
MW     75
M      34
F      33
W      10
-       5
T       1
Name: count, dtype: int64

df_all_inperson: Meeting Days by Semester (Term)
Meeting Days  -   F  M  MW  T  TR  W
Term                                
Fall 2021     0   0  0   8  0  25  0
Spring 2022   0   2  5   5  0  19  0
Fall 2022     0   3  3   3  0  23  0
Spring 2023   0   2  5   7  0  19  1
Fall 2023     0   2  4  10  0  21  1
Spring 2024   0   3  5  10  0  21  1
Fall 2024     2  11  5  17  0  24  3
Spring 2025   3  10  7  15  1  29  4

df_all_online: Total by Meeting Days
Meeting Days
-     1186
TR      24
MW       4
R        3
W        2
F        1
Name: count, dtype: int64

df_all_online: Meeting Days by Semester (Term)
Meeting Days    -  F  MW  R  TR  W
Term                              
Fall 2021     142  1   4  3   3  2
Spring 2022   142  0   0  0   1  0
Fall 2022     138  0   0  0   0  0
Spring 2023   145  0   0  0  

Online/Live Online by Days

Split df_all_online into Online/Live Online

In [1003]:
df_all_online_fully = df_all_online[df_all_online["Facility"] == "Online"].copy()
df_all_online_live = df_all_online[df_all_online["Facility"] == "Live Online"].copy()

dfs_online_vs_live = {
    "df_all_online_fully": df_all_online_fully,
    "df_all_online_live": df_all_online_live
}

In [1004]:
print("Online vs Live Online - Meeting Days")

for name, df in dfs_online_vs_live.items():
    # Online vs Live Online Totals by Meeting Days
    print(f"\n{name}: Total by Meeting Days")
    meeting_days_total = df["Meeting Days"].value_counts(dropna=False)
    print(meeting_days_total)

    # Online vs Live Online Totals by Meeting Days & by Semester (Term)
    print(f"\n{name}: Meeting Days by Semester (Term)")
    meeting_days_by_term = df.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
    meeting_days_by_term = meeting_days_by_term.loc[sorted(meeting_days_by_term.index, key=semester_order_func)]
    print(meeting_days_by_term)


Online vs Live Online - Meeting Days

df_all_online_fully: Total by Meeting Days
Meeting Days
-    1186
Name: count, dtype: int64

df_all_online_fully: Meeting Days by Semester (Term)
Meeting Days    -
Term             
Fall 2021     142
Spring 2022   142
Fall 2022     138
Spring 2023   145
Fall 2023     153
Spring 2024   162
Fall 2024     147
Spring 2025   157

df_all_online_live: Total by Meeting Days
Meeting Days
TR    24
MW     4
R      3
W      2
F      1
Name: count, dtype: int64

df_all_online_live: Meeting Days by Semester (Term)
Meeting Days  F  MW  R  TR  W
Term                         
Fall 2021     1   4  3   3  2
Spring 2022   0   0  0   1  0
Spring 2023   0   0  0   4  0
Fall 2023     0   0  0   4  0
Spring 2024   0   0  0   4  0
Fall 2024     0   0  0   4  0
Spring 2025   0   0  0   4  0


Online/In-Person by Component

In [1005]:
print("Online/In-Person by Component")

for name, df in dfs_all_inperson_online.items():
    # Print Online/In-Person Totals by Component
    print(f"\n{name}: Total by Component")
    component_total = df["Component"].value_counts(dropna=False)
    print(component_total)

    # Print Online/In-Person Totals by Component & by Semester (Term)
    print(f"\n{name}: Component by Semester (Term)")
    component_by_term = df.groupby(["Term", "Component"]).size().unstack(fill_value=0)
    component_by_term = component_by_term.loc[sorted(component_by_term.index, key=semester_order_func)]
    print(component_by_term)


Online/In-Person by Component

df_all_inperson: Total by Component
Component
Lecture       266
Discussion     67
Colloquium      6
Name: count, dtype: int64

df_all_inperson: Component by Semester (Term)
Component    Colloquium  Discussion  Lecture
Term                                        
Fall 2021             0           0       33
Spring 2022           1           6       24
Fall 2022             0           6       26
Spring 2023           2           6       26
Fall 2023             0           6       32
Spring 2024           1           6       33
Fall 2024             1          18       43
Spring 2025           1          19       49

df_all_online: Total by Component
Component
Lecture       1104
Discussion     115
Colloquium       1
Name: count, dtype: int64

df_all_online: Component by Semester (Term)
Component    Colloquium  Discussion  Lecture
Term                                        
Fall 2021             1          27      127
Spring 2022           0          15   

<h1> Modality: 15-week vs 7-week </h1>

15/7 totals

In [1006]:
print("Modality Breakdown by Session Type (15-week vs 7-week)")

for name, df in dfs_all.items():
    print(f"\n{name}: Courses by Session")
    # 15-week vs. 7-week Totals
    session_counts = df["Session"].value_counts(dropna=False)
    print(session_counts)


Modality Breakdown by Session Type (15-week vs 7-week)

df_all: Courses by Session
Session
Regular Academic Session    974
Seven Week - Second         298
Seven Week - First          287
Name: count, dtype: int64

df_fall_all: Courses by Session
Session
Regular Academic Session    490
Seven Week - First          142
Seven Week - Second         134
Name: count, dtype: int64

df_spring_all: Courses by Session
Session
Regular Academic Session    484
Seven Week - Second         164
Seven Week - First          145
Name: count, dtype: int64


15/7 by Term

In [1007]:
print("15/7 Course Counts by Term")

# Group by Session and Term; Sort
session_by_term = df_all.groupby(["Term", "Session"]).size().unstack(fill_value=0)
# Run semester_order_func to sort
session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)]
print(session_by_term)


15/7 Course Counts by Term
Session      Regular Academic Session  Seven Week - First  Seven Week - Second
Term                                                                          
Fall 2021                         130                  32                   26
Spring 2022                       109                  27                   38
Fall 2022                         107                  30                   33
Spring 2023                       111                  34                   38
Fall 2023                         120                  38                   37
Spring 2024                       120                  43                   43
Fall 2024                         133                  42                   38
Spring 2025                       144                  41                   45


<h3> 15/7 by Modalities </h3>

15/7 by Days Scheduled

In [1008]:
print("15/7 by Days Scheduled, Totals:")

for name, df in dfs_all.items():
    print(f"\n{name}:")
    summary = df.groupby(["Session", "Meeting Days"]).size().unstack(fill_value=0)
    print(summary)


15/7 by Days Scheduled, Totals:

df_all:
Meeting Days                -   F   M  MW  R  T   TR   W
Session                                                 
Regular Academic Session  606  34  34  79  3  1  205  12
Seven Week - First        287   0   0   0  0  0    0   0
Seven Week - Second       298   0   0   0  0  0    0   0

df_fall_all:
Meeting Days                -   F   M  MW  R   TR  W
Session                                             
Regular Academic Session  306  17  12  42  3  104  6
Seven Week - First        142   0   0   0  0    0  0
Seven Week - Second       134   0   0   0  0    0  0

df_spring_all:
Meeting Days                -   F   M  MW  T   TR  W
Session                                             
Regular Academic Session  300  17  22  37  1  101  6
Seven Week - First        145   0   0   0  0    0  0
Seven Week - Second       164   0   0   0  0    0  0


In [1009]:
print("15/7 by Days Scheduled & Semester")

# Group by Term (semester), Session (15/7), and Meeting Days
meeting_days_summary = df_all.groupby(["Term", "Session", "Meeting Days"]).size()

# Turn Meeting Days (meeting_days_summary) into Columns for cleaner output; sort
session_term_days = meeting_days_summary.unstack(fill_value=0)

# Sort with semester_order_func: multi-index
# key = lambda x again because this one is multi-index
session_term_days = session_term_days.loc[sorted(session_term_days.index, key=lambda x: semester_order_func(x[0]))]
print(session_term_days)


15/7 by Days Scheduled & Semester
Meeting Days                           -   F  M  MW  R  T  TR  W
Term        Session                                             
Fall 2021   Regular Academic Session  84   1  0  12  3  0  28  2
            Seven Week - First        32   0  0   0  0  0   0  0
            Seven Week - Second       26   0  0   0  0  0   0  0
Spring 2022 Regular Academic Session  77   2  5   5  0  0  20  0
            Seven Week - First        27   0  0   0  0  0   0  0
            Seven Week - Second       38   0  0   0  0  0   0  0
Fall 2022   Regular Academic Session  75   3  3   3  0  0  23  0
            Seven Week - First        30   0  0   0  0  0   0  0
            Seven Week - Second       33   0  0   0  0  0   0  0
Spring 2023 Regular Academic Session  73   2  5   7  0  0  23  1
            Seven Week - First        34   0  0   0  0  0   0  0
            Seven Week - Second       38   0  0   0  0  0   0  0
Fall 2023   Regular Academic Session  78   2  4  10  0  

15/7 by Days Scheduled & Semester -- totaling the two Seven Weeks into 1

In [1010]:
print("15/7 by Days Scheduled & Semester - Combining 7 week sessions")

# Create a new dataframe to avoid directly modifying df_all 
# Last time we did this, it was building off the df_all_online DF, so we're doing it again on df_all
df_all_7week = df_all.copy()

# Group "Seven Week - First" and "Seven Week - Second" into a single "Seven Week Combined"
df_all_7week["Session Grouped"] = df_all_7week["Session"].replace({
    "Seven Week - First": "Seven Week Combined", 
    "Seven Week - Second": "Seven Week Combined"})

# Group by Term, Session Grouped (15/7), and Meeting Days
meeting_days_summary = df_all_7week.groupby(["Term", "Session Grouped", "Meeting Days"]).size()

# Turn Meeting Days into columns for cleaner output
session_term_days = meeting_days_summary.unstack(fill_value=0)

# Sort the semesters semester_order_func 
# MultiIndex: Term + Session - needs key = lambda x again
session_term_days = session_term_days.loc[sorted(session_term_days.index, key=lambda x: semester_order_func(x[0]))]

print(session_term_days)


15/7 by Days Scheduled & Semester - Combining 7 week sessions
Meeting Days                           -   F  M  MW  R  T  TR  W
Term        Session Grouped                                     
Fall 2021   Regular Academic Session  84   1  0  12  3  0  28  2
            Seven Week Combined       58   0  0   0  0  0   0  0
Spring 2022 Regular Academic Session  77   2  5   5  0  0  20  0
            Seven Week Combined       65   0  0   0  0  0   0  0
Fall 2022   Regular Academic Session  75   3  3   3  0  0  23  0
            Seven Week Combined       63   0  0   0  0  0   0  0
Spring 2023 Regular Academic Session  73   2  5   7  0  0  23  1
            Seven Week Combined       72   0  0   0  0  0   0  0
Fall 2023   Regular Academic Session  78   2  4  10  0  0  25  1
            Seven Week Combined       75   0  0   0  0  0   0  0
Spring 2024 Regular Academic Session  76   3  5  10  0  0  25  1
            Seven Week Combined       86   0  0   0  0  0   0  0
Fall 2024   Regular Academic

15/7 by Component

In [1011]:
print("15/7 by Component: Totals")

# Total counts: Session × Component
component_total = df_all.groupby(["Session", "Component"]).size().unstack(fill_value=0)
print(component_total)

print("\n15/7 by Component: Totals by Semester")

# By Term: Term × Session × Component
component_by_term = df_all.groupby(["Term", "Session", "Component"]).size().unstack(fill_value=0)

# Sort - multi-index again (needs key=lambda x)
component_by_term = component_by_term.loc[sorted(component_by_term.index, key=lambda x: semester_order_func(x[0]))]

print(component_by_term)


15/7 by Component: Totals
Component                 Colloquium  Discussion  Lecture
Session                                                  
Regular Academic Session           7         181      786
Seven Week - First                 0           1      286
Seven Week - Second                0           0      298

15/7 by Component: Totals by Semester
Component                             Colloquium  Discussion  Lecture
Term        Session                                                  
Fall 2021   Regular Academic Session           1          27      102
            Seven Week - First                 0           0       32
            Seven Week - Second                0           0       26
Spring 2022 Regular Academic Session           1          21       87
            Seven Week - First                 0           0       27
            Seven Week - Second                0           0       38
Fall 2022   Regular Academic Session           0          20       87
            Sev

<h1> Modality: Days Scheduled </h1>

Days Scheduled - Totals

In [1012]:
# Empty list to store results as DF
days_total_list = []

print("Modality: Days Scheduled Totals")
for name, df in dfs_all.items():
    # Print results
    print(f"\n{name}:")
    print(df["Meeting Days"].value_counts(dropna=False))

    # Store results to list
    counts = df["Meeting Days"].value_counts(dropna=False)
    for meeting_day, count in counts.items():
        days_total_list.append({"Modality": name, "Meeting Days": meeting_day, "Count": count})

# DF for our results
df_days_total = pd.DataFrame(days_total_list)


Modality: Days Scheduled Totals

df_all:
Meeting Days
-     1191
TR     205
MW      79
F       34
M       34
W       12
R        3
T        1
Name: count, dtype: int64

df_fall_all:
Meeting Days
-     582
TR    104
MW     42
F      17
M      12
W       6
R       3
Name: count, dtype: int64

df_spring_all:
Meeting Days
-     609
TR    101
MW     37
M      22
F      17
W       6
T       1
Name: count, dtype: int64


Days Scheduled - Totals by Semester

In [1013]:
#Days Scheduled - Totals by Semester

#Group by Term/Days & run semester_order_func
days_scheduled_semester = df_all.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
days_scheduled_semester = days_scheduled_semester.loc[sorted(days_scheduled_semester.index, key=semester_order_func)]

print("Days Scheduled by Semester")
print(days_scheduled_semester)


Days Scheduled by Semester
Meeting Days    -   F  M  MW  R  T  TR  W
Term                                     
Fall 2021     142   1  0  12  3  0  28  2
Spring 2022   142   2  5   5  0  0  20  0
Fall 2022     138   3  3   3  0  0  23  0
Spring 2023   145   2  5   7  0  0  23  1
Fall 2023     153   2  4  10  0  0  25  1
Spring 2024   162   3  5  10  0  0  25  1
Fall 2024     149  11  5  17  0  0  28  3
Spring 2025   160  10  7  15  0  1  33  4


Days Scheduled by Component

In [1014]:
# Days by Component - Totals

# Group by Component and Meeting Days
days_by_component_total = df_all.groupby(["Component", "Meeting Days"]).size().unstack(fill_value=0)
print("Days Scheduled by Component: Totals")
print(days_by_component_total)

# Group by Term, Component, and Meeting Days
days_by_component_term = df_all.groupby(["Term", "Component", "Meeting Days"]).size().unstack(fill_value=0)
# Sort - multi-index again (key=lambda x)
days_by_component_term = days_by_component_term.loc[sorted(days_by_component_term.index, key=lambda x: semester_order_func(x[0]))]
print("\nDays Scheduled by Component: Totals")
print(days_by_component_term)


Days Scheduled by Component: Totals
Meeting Days     -   F   M  MW  R  T   TR  W
Component                                   
Colloquium       0   0   5   1  0  0    1  0
Discussion     109  33  29   0  3  0    0  8
Lecture       1082   1   0  78  0  1  204  4

Days Scheduled by Component: Totals
Meeting Days              -   F  M  MW  R  T  TR  W
Term        Component                              
Fall 2021   Colloquium    0   0  0   0  0  0   1  0
            Discussion   21   1  0   0  3  0   0  2
            Lecture     121   0  0  12  0  0  27  0
Spring 2022 Colloquium    0   0  1   0  0  0   0  0
            Discussion   15   2  4   0  0  0   0  0
            Lecture     127   0  0   5  0  0  20  0
Fall 2022   Discussion   14   3  3   0  0  0   0  0
            Lecture     124   0  0   3  0  0  23  0
Spring 2023 Colloquium    0   0  1   1  0  0   0  0
            Discussion   15   2  4   0  0  0   0  0
            Lecture     130   0  0   6  0  0  23  1
Fall 2023   Discussion   1

<h1> Modality: Time of Day </h1>

Time of Day - Counts by Start Time

In [1015]:
# Meeting Time Start - Totals
start_time_counts = df_all["Meeting Time Start"].value_counts(dropna=False).sort_index()

print("Counts by Meeting Time Start")
print(start_time_counts)


Counts by Meeting Time Start
Meeting Time Start
00:00:00    1191
08:00:00       7
09:00:00       6
09:30:00      14
10:00:00      14
11:00:00      95
12:00:00       7
12:30:00      78
13:00:00      13
14:00:00      54
15:00:00      13
15:30:00      58
16:00:00       5
17:30:00       4
Name: count, dtype: int64


Time of Day - Counts by Semester

In [1016]:
# Meeting Time Start totals by Term; run semester_order_func
start_times_semester = df_all.groupby(["Term", "Meeting Time Start"]).size().unstack(fill_value=0)
start_times_semester = start_times_semester.loc[sorted(start_times_semester.index, key=semester_order_func)]

print("Meeting Start Time Counts by Semester")
print(start_times_semester)


Meeting Start Time Counts by Semester
Meeting Time Start  00:00:00  08:00:00  09:00:00  09:30:00  10:00:00  \
Term                                                                   
Fall 2021                142         0         0         7         0   
Spring 2022              142         1         0         0         2   
Fall 2022                138         1         1         0         1   
Spring 2023              145         1         0         0         2   
Fall 2023                153         1         0         0         2   
Spring 2024              162         1         0         0         3   
Fall 2024                149         1         2         3         2   
Spring 2025              160         1         3         4         2   

Meeting Time Start  11:00:00  12:00:00  12:30:00  13:00:00  14:00:00  \
Term                                                                   
Fall 2021                 10         1         9         1         8   
Spring 2022              

<h3>Time of Day Function #1: </h3> break up by morning, midday, afternoon, late afternoon

First - Convert "Meeting Time Start & "Meeting Time End" to datetime (HH:MM:SS) format

In [1017]:
# new DF
df_all_times = df_all.copy()

# Columns to convert
time_columns = ["Meeting Time Start", "Meeting Time End"]

import datetime
for col in time_columns:
    if col in df_all_times.columns:
        # Convert to dt.time in the format HH:MM:SS
        df_all_times[col] = pd.to_datetime(df_all_times[col], format="%H:%M:%S", errors="coerce").dt.time

# Check for conversion - still says "object" but at least it's a datetime.time object
#print(df_all_times.dtypes)


Time of Day function #1: assign_time_slot 

morning (through 10am), midday (past 10, through 1pm), afternoon (past 1pm, through 3:30pm), and late afternoon (after 3:30pm)

In [1018]:
# Define function for labeling time slots
def assign_time_slot(t):
    if pd.isnull(t) or t == datetime.time(0, 0):
        return "NoTime"
    elif t <= datetime.time(10, 0):
        return "Morning"
    elif t <= datetime.time(13, 0):
        return "Midday"
    elif t <= datetime.time(15, 30):
        return "Afternoon"
    else:
        return "Late Afternoon"

# Apply function to create new column
df_all_times["Time Slot"] = df_all_times["Meeting Time Start"].apply(assign_time_slot)


Time of Day: Total Counts & Total Counts by Semester

In [1019]:
# Print times in Order
ordered_time = ["Morning", "Midday", "Afternoon", "Late Afternoon", "NoTime"]

# Totals by Time Slot
# Get value counts, then reindex to your order (ordered_time)
time_slot_total = df_all_times["Time Slot"].value_counts(dropna=False).reindex(ordered_time, fill_value=0)

print("Totals by Time Slot")
print(time_slot_total)

# Time Slot Totals by Semester - .groupby(["Term", "Time Slot"])
time_slot_semester = df_all_times.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
time_slot_semester = time_slot_semester[ordered_time] # Reorder time slots to ordered_time
time_slot_semester = time_slot_semester.loc[sorted(time_slot_semester.index, key=semester_order_func)] # Semesters in order

print("\nTotals by Time Slot & Semester")
print(time_slot_semester)

Totals by Time Slot
Time Slot
Morning             41
Midday             193
Afternoon          125
Late Afternoon       9
NoTime            1191
Name: count, dtype: int64

Totals by Time Slot & Semester
Time Slot    Morning  Midday  Afternoon  Late Afternoon  NoTime
Term                                                           
Fall 2021          7      21         16               2     142
Spring 2022        3      17         11               1     142
Fall 2022          3      17         11               1     138
Spring 2023        3      17         17               1     145
Fall 2023          3      22         14               3     153
Spring 2024        4      24         16               0     162
Fall 2024          8      36         19               1     149
Spring 2025       10      39         21               0     160


<h3>Time of Day Function #2: 3 times of day</h3>

In [1020]:
# Morning: before 11. Midday: 11 through before 2. Afternoon: 2 & after
def assign_time_slot_v2(t):
    if pd.isnull(t) or t == datetime.time(0, 0):
        return "NoTime"
    elif t < datetime.time(11, 0):
        return "Morning"
    elif t < datetime.time(14, 0):
        return "Midday"
    else:
        return "Afternoon"

# New DF for our v2 Time Function
df_all_times_v2 = df_all_times.copy()

# Apply function to create new column
df_all_times_v2["Time Slot"] = df_all_times_v2["Meeting Time Start"].apply(assign_time_slot_v2)

Time of Day Function #2: Total Counts & Total Counts by Semester

In [1021]:
# Print times in Order
ordered_time_v2 = ["Morning", "Midday", "Afternoon", "NoTime"]

# Totals by Time Slot
# Get value counts, then reindex to your order
time_slot_total_v2 = df_all_times_v2["Time Slot"].value_counts(dropna=False).reindex(ordered_time_v2, fill_value=0)

print("Totals by Time Slot (3 time slots)")
print(time_slot_total_v2)

# Time Slot Totals by Semester f
time_slot_semester_v2 = df_all_times_v2.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
time_slot_semester_v2 = time_slot_semester_v2[ordered_time_v2]
time_slot_semester_v2 = time_slot_semester_v2.loc[sorted(time_slot_semester.index, key=semester_order_func)]

print("\nTotals by Time Slot & Semester (3 time slots)")
print(time_slot_semester_v2)

Totals by Time Slot (3 time slots)
Time Slot
Morning        41
Midday        193
Afternoon     134
NoTime       1191
Name: count, dtype: int64

Totals by Time Slot & Semester (3 time slots)
Time Slot    Morning  Midday  Afternoon  NoTime
Term                                           
Fall 2021          7      21         18     142
Spring 2022        3      17         12     142
Fall 2022          3      17         12     138
Spring 2023        3      17         18     145
Fall 2023          3      22         17     153
Spring 2024        4      24         16     162
Fall 2024          8      36         20     149
Spring 2025       10      39         21     160


<h3> Time of Day: By Modalities

Time of Day: In-Person/Online 

Run Time of Day function on In-Person/Online DFs: df_all_online & df_all_inperson

In [1022]:
# New DFs for Online/In-Person for Time Slot Analysis
df_times_online = df_all_online.copy()
df_times_inperson = df_all_inperson.copy()

# Convert the time columns to be datetime
for df in [df_times_online, df_times_inperson]:
    df["Meeting Time Start"] = pd.to_datetime(df["Meeting Time Start"], 
                                              format="%H:%M:%S", errors="coerce").dt.time

# Apply the Time Slot function
df_times_online["Time Slot"] = df_times_online["Meeting Time Start"].apply(assign_time_slot)
df_times_inperson["Time Slot"] = df_times_inperson["Meeting Time Start"].apply(assign_time_slot)

dfs_times_online_inperson = {
    "df_times_online": df_times_online,
    "df_times_inperson": df_times_inperson
}

Time of Day by In-Person/Online

In [1023]:
print("Time Slot Totals by Online vs In-Person\n")

for df_name, df in dfs_times_online_inperson.items():
    print(f"{df_name}:")
    
    time_modality_total = df["Time Slot"].value_counts(dropna=False)
    time_modality_total = time_modality_total.reindex(ordered_time, fill_value=0)
    
    print(time_modality_total)
    print("\n") 


Time Slot Totals by Online vs In-Person

df_times_online:
Time Slot
Morning              0
Midday              17
Afternoon           16
Late Afternoon       1
NoTime            1186
Name: count, dtype: int64


df_times_inperson:
Time Slot
Morning            41
Midday            176
Afternoon         109
Late Afternoon      8
NoTime              5
Name: count, dtype: int64




Time of Day by In-Person/Online & Semester

In [1024]:
print("Time Slot Totals by Online vs In-Person & Semester\n")

for df_name, df in dfs_times_online_inperson.items():
    # Group by Term and Time Slot
    time_slot_by_term = df.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
    # Reorder time slots to ordered_time
    time_slot_by_term = time_slot_by_term[ordered_time] if set(ordered_time).issubset(time_slot_by_term.columns) else time_slot_by_term
    # Sort semesters with semester_order_func
    time_slot_by_term = time_slot_by_term.loc[sorted(time_slot_by_term.index, key=semester_order_func)]

    print(f"{df_name}:")
    print(time_slot_by_term)
    print("\n")  


Time Slot Totals by Online vs In-Person & Semester

df_times_online:
Time Slot    Afternoon  Late Afternoon  Midday  NoTime
Term                                                  
Fall 2021            6               1       6     142
Spring 2022          0               0       1     142
Fall 2022            0               0       0     138
Spring 2023          2               0       2     145
Fall 2023            2               0       2     153
Spring 2024          2               0       2     162
Fall 2024            2               0       2     147
Spring 2025          2               0       2     157


df_times_inperson:
Time Slot    Morning  Midday  Afternoon  Late Afternoon  NoTime
Term                                                           
Fall 2021          7      15         10               1       0
Spring 2022        3      16         11               1       0
Fall 2022          3      17         11               1       0
Spring 2023        3      15         15 

Time of Day: 15-week/7-week

Time of Slots: 15/7 totals

In [1025]:
time_session_totals = df_all_times.groupby(["Time Slot", "Session"]).size().unstack(fill_value=0) # group
time_session_totals = time_session_totals.reindex(ordered_time) # order time slots ordered_time

print("Time of Day by 15/7 Session Type")
print(time_session_totals)

Time of Day by 15/7 Session Type
Session         Regular Academic Session  Seven Week - First  \
Time Slot                                                      
Morning                               41                   0   
Midday                               193                   0   
Afternoon                            125                   0   
Late Afternoon                         9                   0   
NoTime                               606                 287   

Session         Seven Week - Second  
Time Slot                            
Morning                           0  
Midday                            0  
Afternoon                         0  
Late Afternoon                    0  
NoTime                          298  


Time Slots by 15/7 and Semester

In [1026]:
# .groupby(["Term", "Time Slot", "Session"])
time_session_term = df_all_times.groupby(["Term", "Time Slot", "Session"]).size().unstack(fill_value=0)

# Reorder: 
time_session_term = time_session_term.reorder_levels(["Time Slot", "Term"])     # Reorder: Time Slot first, then Term
time_session_term = time_session_term.reindex(ordered_time, level="Time Slot")  # Reorder: time slots by ordered_time order
# Reorder semesters within each Time Slot (multi-index) using semester_order_func on Term
time_session_term = time_session_term.loc[sorted(time_session_term.index,
                                                 key=lambda x: (ordered_time.index(x[0]), semester_order_func(x[1])))]

print("Time Slot Totals by 15/7 Week & Semester")
print(time_session_term)


Time Slot Totals by 15/7 Week & Semester
Session                     Regular Academic Session  Seven Week - First  \
Time Slot      Term                                                        
Morning        Fall 2021                           7                   0   
               Spring 2022                         3                   0   
               Fall 2022                           3                   0   
               Spring 2023                         3                   0   
               Fall 2023                           3                   0   
               Spring 2024                         4                   0   
               Fall 2024                           8                   0   
               Spring 2025                        10                   0   
Midday         Fall 2021                          21                   0   
               Spring 2022                        17                   0   
               Fall 2022                       

Time of Day: By Days Scheduled

In [1027]:
# Time Slots by Days
time_slot_days = df_all_times.groupby(["Time Slot", "Meeting Days"]).size().unstack(fill_value=0)
time_slot_days = time_slot_days.reindex(ordered_time)

print("Time Slot Totals by Days Scheduled")
print(time_slot_days)

Time Slot Totals by Days Scheduled
Meeting Days       -   F   M  MW  R  T   TR  W
Time Slot                                     
Morning            0   6  21   3  0  0   11  0
Midday             0  26   7  43  1  0  111  5
Afternoon          0   2   6  30  1  1   78  7
Late Afternoon     0   0   0   3  1  0    5  0
NoTime          1191   0   0   0  0  0    0  0


Time of Day by Days Scheduled & Semester

In [1028]:
# Group by Term (semester), Meeting Days, and Time Slot
time_day_semester = df_all_times.groupby(["Term", "Meeting Days", "Time Slot"]).size().unstack(fill_value=0)
# Reorder Time Slots by ordered_time
time_day_semester = time_day_semester[ordered_time] if set(ordered_time).issubset(time_day_semester.columns) else time_day_semester
# Reorder Semesters with func semester_order_func (multi-index)
time_day_semester = time_day_semester.loc[sorted(time_day_semester.index, key=lambda x: semester_order_func(x[0]))]

print("Time of Day by Days & Semester")
print(time_day_semester)

Time of Day by Days & Semester
Time Slot                 Morning  Midday  Afternoon  Late Afternoon  NoTime
Term        Meeting Days                                                    
Fall 2021   -                   0       0          0               0     142
            F                   0       1          0               0       0
            MW                  0       7          5               0       0
            R                   0       1          1               1       0
            TR                  7      12          8               1       0
            W                   0       0          2               0       0
Spring 2022 -                   0       0          0               0     142
            F                   0       2          0               0       0
            M                   3       1          1               0       0
            MW                  0       3          2               0       0
            TR                  0      11    

Time of Day by Component

In [1029]:
# Time of Day by Component - Totals
tod_component = df_all_times.groupby(["Time Slot", "Component"]).size().unstack(fill_value=0)
tod_component = tod_component.reindex(ordered_time)

print("Time of Day by Component")
print(tod_component)

# Time of Day by Component & Semester
tod_component_semester = df_all_times.groupby(["Term", "Time Slot", "Component"]).size().unstack(fill_value=0)
# Reorder: 1) print Time Slot first, 2) Time Slot by ordered_time, 3) Term by semester_order_func
tod_component_semester = tod_component_semester.reorder_levels(["Time Slot", "Term"]).sort_index(level="Time Slot")
tod_component_semester = tod_component_semester.reindex(ordered_time, level="Time Slot")
tod_component_semester = tod_component_semester.loc[sorted(tod_component_semester.index,
                                                           key=lambda x: (ordered_time.index(x[0]), semester_order_func(x[1])))]

print("\nTime of Day by Component & Semester")
print(tod_component_semester)


Time of Day by Component
Component       Colloquium  Discussion  Lecture
Time Slot                                      
Morning                  0          26       15
Midday                   0          39      154
Afternoon                7           7      111
Late Afternoon           0           1        8
NoTime                   0         109     1082

Time of Day by Component & Semester
Component                   Colloquium  Discussion  Lecture
Time Slot      Term                                        
Morning        Fall 2021             0           0        7
               Spring 2022           0           3        0
               Fall 2022             0           3        0
               Spring 2023           0           3        0
               Fall 2023             0           3        0
               Spring 2024           0           3        1
               Fall 2024             0           5        3
               Spring 2025           0           6        4
Mi

Save relevent DFs for Modality Trend Analysis

In [1031]:
Modality_Trend_path = os.path.join(os.getcwd(), "Modality Trends")
os.makedirs(Modality_Trend_path, exist_ok=True) 

# List of DFs to save
dfs_to_save = [
    (df_days_total, "df_days_total.xlsx"),
    (days_scheduled_semester, "days_scheduled_semester.xlsx"),

    (days_by_component_total, "days_by_component_total.xlsx"),
    (days_by_component_term, "days_by_component_term.xlsx"),

    (start_time_counts, "start_time_counts.xlsx"),
    (start_times_semester, "start_times_semester.xlsx"),

    (time_slot_total, "time_slot_total.xlsx"),
    (time_slot_semester, "time_slot_semester.xlsx"),

    (time_slot_total_v2, "time_slot_total_v2.xlsx"),
    (time_slot_semester_v2, "time_slot_semester_v2.xlsx"),

    (time_slot_days, "time_slot_days.xlsx"),
    (time_day_semester, "time_day_semester.xlsx"),

    (tod_component, "tod_component.xlsx"),
    (tod_component_semester, "tod_component_semester.xlsx")]

# Save each to Excel
for df, filename in dfs_to_save:
    file_path = os.path.join(Modality_Trend_path, filename)
    df.to_excel(file_path)

Save our final df_all for CBI analysis

In [None]:
# Save final df_all to our next CBI folder
CBI_folder_path = os.path.join(os.getcwd(), "CBI Analysis")
os.makedirs(CBI_folder_path, exist_ok=True) 
df_all.to_excel(os.path.join(CBI_folder_path, "df_all.xlsx"), index=False)