In [839]:
%reset -f

Import Data Sources, Make DataFrames

In [840]:
# Import packages
import os
import pandas as pd

# Set base path for reproducibility
# Save initial data to a folder "Capstone Project Data_Cleaned" in the current working directory
base_path = os.path.join(os.getcwd(), "Capstone Project Data_Cleaned")

# Create DFs - Undergrad Only Data
df_all_inperson = pd.read_excel(os.path.join(base_path, "df_all_inperson.xlsx"))
df_all_online = pd.read_excel(os.path.join(base_path, "df_all_online.xlsx"))
df_all = pd.read_excel(os.path.join(base_path, "df_all.xlsx"))

df_fall_all_inperson = pd.read_excel(os.path.join(base_path, "df_fall_all_inperson.xlsx"))
df_fall_all_online = pd.read_excel(os.path.join(base_path, "df_fall_all_online.xlsx"))
df_fall_all = pd.read_excel(os.path.join(base_path, "df_fall_all.xlsx"))

df_spring_all_inperson = pd.read_excel(os.path.join(base_path, "df_spring_all_inperson.xlsx"))
df_spring_all_online = pd.read_excel(os.path.join(base_path, "df_spring_all_online.xlsx"))
df_spring_all = pd.read_excel(os.path.join(base_path, "df_spring_all.xlsx"))

Create Dictionaries

In [841]:
# Create Dictionaries 

# All Dictionary
dfs_all = {
    "df_all": df_all,
    "df_fall_all": df_fall_all,
    "df_spring_all": df_spring_all
}

# In-Person Dictionary
dfs_inperson = {
    "df_all_inperson": df_all_inperson,
    "df_fall_all_inperson": df_fall_all_inperson,
    "df_spring_all_inperson": df_spring_all_inperson
}

# Online Dictionary
dfs_online = {
    "df_all_online": df_all_online,
    "df_fall_all_online": df_fall_all_online,
    "df_spring_all_online": df_spring_all_online
}

dfs_everything = {**dfs_all, **dfs_inperson, **dfs_online}


Explore DF size by Dictionary

In [842]:
print("DF sizes in dfs_all:")
for name, df in dfs_all.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns") #df.shape[] for #rows, #cols

print("\nDF sizes in dfs_inperson:")
for name, df in dfs_inperson.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns")

print("\nDF sizes in dfs_online:")
for name, df in dfs_online.items():
    print(f"   {name}: {df.shape[0]} rows, {df.shape[1]} columns")


DF sizes in dfs_all:
   df_all: 1559 rows, 27 columns
   df_fall_all: 766 rows, 27 columns
   df_spring_all: 793 rows, 27 columns

DF sizes in dfs_inperson:
   df_all_inperson: 339 rows, 27 columns
   df_fall_all_inperson: 165 rows, 27 columns
   df_spring_all_inperson: 174 rows, 27 columns

DF sizes in dfs_online:
   df_all_online: 1220 rows, 27 columns
   df_fall_all_online: 601 rows, 27 columns
   df_spring_all_online: 619 rows, 27 columns


<h1> Modality: In-Person vs Online </h1>

In-Person vs Online Totals

In [843]:
print("Online Classes Count:")
for name, df in dfs_online.items():
    print(f"   {name}: has {df.shape[0]} online classes")

print("\nIn-Person Classes Count:")
for name, df in dfs_inperson.items():
    print(f"   {name}: has {df.shape[0]} in-person classes")



Online Classes Count:
   df_all_online: has 1220 online classes
   df_fall_all_online: has 601 online classes
   df_spring_all_online: has 619 online classes

In-Person Classes Count:
   df_all_inperson: has 339 in-person classes
   df_fall_all_inperson: has 165 in-person classes
   df_spring_all_inperson: has 174 in-person classes


Online/In-Person Dictionary

In [844]:
dfs_all_inperson_online = {
    "df_all_inperson": df_all_inperson,
    "df_all_online": df_all_online
}

<h3> Function for sorting results in academic year order: semester_order_func </h3>
Fall 2021, Spring 2022, Fall 2022, Spring 2023, Fall 2023, Spring 2024, Fall 2024, Spring 2025

In [845]:
# Strategy: label Fall/Spring starting with the same year, to count them as the same academic year
# e.g. Fall 2021 & Spring 2022 will turn into 2021.0 and 2021.5, to group as the 2021 academic year

def semester_order_func(sem):
    season, year = sem.split()
    year = int(year)
    if season == "Fall":
        return year + 0.0           # Fall 2021 turns into 2021.0; Fall 2021 into 2022.0; etc
    elif season == "Spring":
        return (year - 1) + 0.5     # Spring 2022 - 1 + 0.5 = 2021.5 so that it'll follow Fall 2021.0


Online/In-Person by Semester

In [846]:
#Totals by Session
for df_semester, df in dfs_all_inperson_online.items():
    # Separate inperson/online within our new dictionary
    modality = "In Person Classes" if "inperson" in df_semester else "Online Classes"

    # Group by Term (semester) & count
    term_counts = df.groupby("Term").size()

    # Function: sort by semester order
    # Using key=lambda x: for each tuples (term + count), sort by applying function to x[0] term first
    sorted_term_counts = sorted(term_counts.items(), key=lambda x: semester_order_func(x[0]))

    print(f"\n{df_semester}: {modality}")
    for term, count in sorted_term_counts:
        print(f"   {term} has {count} classes")




df_all_inperson: In Person Classes
   Fall 2021 has 33 classes
   Spring 2022 has 31 classes
   Fall 2022 has 32 classes
   Spring 2023 has 34 classes
   Fall 2023 has 38 classes
   Spring 2024 has 40 classes
   Fall 2024 has 62 classes
   Spring 2025 has 69 classes

df_all_online: Online Classes
   Fall 2021 has 155 classes
   Spring 2022 has 143 classes
   Fall 2022 has 138 classes
   Spring 2023 has 149 classes
   Fall 2023 has 157 classes
   Spring 2024 has 166 classes
   Fall 2024 has 151 classes
   Spring 2025 has 161 classes


Online/Live Online by Semester

In [847]:
# Get unique Term values from DF df_all_online
terms = df_all_online["Term"].unique()

# For loop with our sorted function semester_order_func
for df_semester in sorted(terms, key=semester_order_func):
    df_term = df_all_online[df_all_online["Term"] == df_semester]
    
    total = df_term.shape[0]
    fully_online = (df_term["Facility"] == "Online").sum()
    live_online = (df_term["Facility"] == "Live Online").sum()
    
    print(f"{df_semester}: {total} total classes, {fully_online} Fully Online, and {live_online} Live Online")


Fall 2021: 155 total classes, 142 Fully Online, and 13 Live Online
Spring 2022: 143 total classes, 142 Fully Online, and 1 Live Online
Fall 2022: 138 total classes, 138 Fully Online, and 0 Live Online
Spring 2023: 149 total classes, 145 Fully Online, and 4 Live Online
Fall 2023: 157 total classes, 153 Fully Online, and 4 Live Online
Spring 2024: 166 total classes, 162 Fully Online, and 4 Live Online
Fall 2024: 151 total classes, 147 Fully Online, and 4 Live Online
Spring 2025: 161 total classes, 157 Fully Online, and 4 Live Online


<h3>In-Person/Online by Modalities </h3>

Online/In-Person: Session (15 week vs 7)

In [848]:
print("Online vs In-Person by Session Type (15/7): Totals")

for name, df in dfs_all_inperson_online.items():
    print(f"\n{name}: Total by Session")
    session_counts = df["Session"].value_counts(dropna=False)
    print(session_counts)


Online vs In-Person by Session Type (15/7): Totals

df_all_inperson: Total by Session
Session
Regular Academic Session    339
Name: count, dtype: int64

df_all_online: Total by Session
Session
Regular Academic Session    635
Seven Week - Second         298
Seven Week - First          287
Name: count, dtype: int64


In [849]:
print("Online vs In-Person by Session Type (15/7) & Semester")

for df_semester, df in dfs_all_inperson_online.items():
    print(f"\n{df_semester}: Session by Semester (Term)")
   
    # Group by "Term" and "Session Grouped"; .size() count the rows; unstack() turn into a DF
    session_by_term = df.groupby(["Term", "Session"]).size().unstack(fill_value=0)
   
    # Order semesters with function semester_order_func; .loc[] to return a DF
    session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)]
    print(session_by_term)

Online vs In-Person by Session Type (15/7) & Semester

df_all_inperson: Session by Semester (Term)
Session      Regular Academic Session
Term                                 
Fall 2021                          33
Spring 2022                        31
Fall 2022                          32
Spring 2023                        34
Fall 2023                          38
Spring 2024                        40
Fall 2024                          62
Spring 2025                        69

df_all_online: Session by Semester (Term)
Session      Regular Academic Session  Seven Week - First  Seven Week - Second
Term                                                                          
Fall 2021                          97                  32                   26
Spring 2022                        78                  27                   38
Fall 2022                          75                  30                   33
Spring 2023                        77                  34                   38
Fall

7-week: combine 7W1 and 7W2 into 1 term

In [850]:
# New DF df_online_7week, to not confuse with df_all_online
df_online_7week = df_all_online.copy()

# Group "Seven Week - First" and "Seven Week - Second" into a single "Seven Week Combined"
df_online_7week["Session Grouped"] = df_online_7week["Session"].replace({
    "Seven Week - First": "Seven Week Combined",
    "Seven Week - Second": "Seven Week Combined"})

print("Online Classes by 15/7 Week, Totalling both 7-Week Sessions")

# Total counts by session type (15/7)
print("\nTotals in Dataset:")
session_counts = df_online_7week["Session Grouped"].value_counts()
print(session_counts)

# Totals by Session type & by Term
print("\nSession by Semester (Term):")
session_by_term = df_online_7week.groupby(["Term", "Session Grouped"]).size().unstack(fill_value=0)
session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)] # Run semester_order_func to sort
print(session_by_term)


Online Classes by 15/7 Week, Totalling both 7-Week Sessions

Totals in Dataset:
Session Grouped
Regular Academic Session    635
Seven Week Combined         585
Name: count, dtype: int64

Session by Semester (Term):
Session Grouped  Regular Academic Session  Seven Week Combined
Term                                                          
Fall 2021                              97                   58
Spring 2022                            78                   65
Fall 2022                              75                   63
Spring 2023                            77                   72
Fall 2023                              82                   75
Spring 2024                            80                   86
Fall 2024                              71                   80
Spring 2025                            75                   86


Online/In-Person by Meeting Days

In [851]:
print("Online vs In-Person by Meeting Days")

for name, df in dfs_all_inperson_online.items():
    # Print Online/In-Person Totals by Meeting Days
    print(f"\n{name}: Total by Meeting Days")
    meeting_days_total = df["Meeting Days"].value_counts(dropna=False)
    print(meeting_days_total)
    
    # Print Online/In-Person Totals by Meeting Days & by Semester (Term) 
    print(f"\n{name}: Meeting Days by Semester (Term)")
    # Group by Term & Meeting Days; .size() count the rows; unstack() turn into a DF
    meeting_days_by_term = df.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
    meeting_days_by_term = meeting_days_by_term.loc[sorted(meeting_days_by_term.index, key=semester_order_func)]
    print(meeting_days_by_term)


Online vs In-Person by Meeting Days

df_all_inperson: Total by Meeting Days
Meeting Days
TR    181
MW     75
M      34
F      33
W      10
-       5
T       1
Name: count, dtype: int64

df_all_inperson: Meeting Days by Semester (Term)
Meeting Days  -   F  M  MW  T  TR  W
Term                                
Fall 2021     0   0  0   8  0  25  0
Spring 2022   0   2  5   5  0  19  0
Fall 2022     0   3  3   3  0  23  0
Spring 2023   0   2  5   7  0  19  1
Fall 2023     0   2  4  10  0  21  1
Spring 2024   0   3  5  10  0  21  1
Fall 2024     2  11  5  17  0  24  3
Spring 2025   3  10  7  15  1  29  4

df_all_online: Total by Meeting Days
Meeting Days
-     1186
TR      24
MW       4
R        3
W        2
F        1
Name: count, dtype: int64

df_all_online: Meeting Days by Semester (Term)
Meeting Days    -  F  MW  R  TR  W
Term                              
Fall 2021     142  1   4  3   3  2
Spring 2022   142  0   0  0   1  0
Fall 2022     138  0   0  0   0  0
Spring 2023   145  0   0  0  

Online/Live Online by Days

Split df_all_online into Online/Live Online

In [852]:
df_all_online_fully = df_all_online[df_all_online["Facility"] == "Online"].copy()
df_all_online_live = df_all_online[df_all_online["Facility"] == "Live Online"].copy()

dfs_online_vs_live = {
    "df_all_online_fully": df_all_online_fully,
    "df_all_online_live": df_all_online_live
}

In [853]:
print("Online vs Live Online - Meeting Days")

for name, df in dfs_online_vs_live.items():
    # Online vs Live Online Totals by Meeting Days
    print(f"\n{name}: Total by Meeting Days")
    meeting_days_total = df["Meeting Days"].value_counts(dropna=False)
    print(meeting_days_total)

    # Online vs Live Online Totals by Meeting Days & by Semester (Term)
    print(f"\n{name}: Meeting Days by Semester (Term)")
    meeting_days_by_term = df.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
    meeting_days_by_term = meeting_days_by_term.loc[sorted(meeting_days_by_term.index, key=semester_order_func)]
    print(meeting_days_by_term)


Online vs Live Online - Meeting Days

df_all_online_fully: Total by Meeting Days
Meeting Days
-    1186
Name: count, dtype: int64

df_all_online_fully: Meeting Days by Semester (Term)
Meeting Days    -
Term             
Fall 2021     142
Spring 2022   142
Fall 2022     138
Spring 2023   145
Fall 2023     153
Spring 2024   162
Fall 2024     147
Spring 2025   157

df_all_online_live: Total by Meeting Days
Meeting Days
TR    24
MW     4
R      3
W      2
F      1
Name: count, dtype: int64

df_all_online_live: Meeting Days by Semester (Term)
Meeting Days  F  MW  R  TR  W
Term                         
Fall 2021     1   4  3   3  2
Spring 2022   0   0  0   1  0
Spring 2023   0   0  0   4  0
Fall 2023     0   0  0   4  0
Spring 2024   0   0  0   4  0
Fall 2024     0   0  0   4  0
Spring 2025   0   0  0   4  0


Online/In-Person by Component

In [854]:
print("Online/In-Person by Component")

for name, df in dfs_all_inperson_online.items():
    # Print Online/In-Person Totals by Component
    print(f"\n{name}: Total by Component")
    component_total = df["Component"].value_counts(dropna=False)
    print(component_total)

    # Print Online/In-Person Totals by Component & by Semester (Term)
    print(f"\n{name}: Component by Semester (Term)")
    component_by_term = df.groupby(["Term", "Component"]).size().unstack(fill_value=0)
    component_by_term = component_by_term.loc[sorted(component_by_term.index, key=semester_order_func)]
    print(component_by_term)


Online/In-Person by Component

df_all_inperson: Total by Component
Component
Lecture       266
Discussion     67
Colloquium      6
Name: count, dtype: int64

df_all_inperson: Component by Semester (Term)
Component    Colloquium  Discussion  Lecture
Term                                        
Fall 2021             0           0       33
Spring 2022           1           6       24
Fall 2022             0           6       26
Spring 2023           2           6       26
Fall 2023             0           6       32
Spring 2024           1           6       33
Fall 2024             1          18       43
Spring 2025           1          19       49

df_all_online: Total by Component
Component
Lecture       1104
Discussion     115
Colloquium       1
Name: count, dtype: int64

df_all_online: Component by Semester (Term)
Component    Colloquium  Discussion  Lecture
Term                                        
Fall 2021             1          27      127
Spring 2022           0          15   

<h1> Modality: 15-week vs 7-week </h1>

15/7 totals

In [855]:
print("Modality Breakdown by Session Type (15-week vs 7-week)")

for name, df in dfs_all.items():
    print(f"\n{name}: Courses by Session")
    # 15-week vs. 7-week Totals
    session_counts = df["Session"].value_counts(dropna=False)
    print(session_counts)


Modality Breakdown by Session Type (15-week vs 7-week)

df_all: Courses by Session
Session
Regular Academic Session    974
Seven Week - Second         298
Seven Week - First          287
Name: count, dtype: int64

df_fall_all: Courses by Session
Session
Regular Academic Session    490
Seven Week - First          142
Seven Week - Second         134
Name: count, dtype: int64

df_spring_all: Courses by Session
Session
Regular Academic Session    484
Seven Week - Second         164
Seven Week - First          145
Name: count, dtype: int64


15/7 by Term

In [856]:
print("15/7 Course Counts by Term")

# Group by Session and Term; Sort
session_by_term = df_all.groupby(["Term", "Session"]).size().unstack(fill_value=0)
# Run semester_order_func to sort
session_by_term = session_by_term.loc[sorted(session_by_term.index, key=semester_order_func)]
print(session_by_term)


15/7 Course Counts by Term
Session      Regular Academic Session  Seven Week - First  Seven Week - Second
Term                                                                          
Fall 2021                         130                  32                   26
Spring 2022                       109                  27                   38
Fall 2022                         107                  30                   33
Spring 2023                       111                  34                   38
Fall 2023                         120                  38                   37
Spring 2024                       120                  43                   43
Fall 2024                         133                  42                   38
Spring 2025                       144                  41                   45


<h3> 15/7 by Modalities </h3>

15/7 by Days Scheduled

In [857]:
print("15/7 by Days Scheduled, Totals:")

for name, df in dfs_all.items():
    print(f"\n{name}:")
    summary = df.groupby(["Session", "Meeting Days"]).size().unstack(fill_value=0)
    print(summary)


15/7 by Days Scheduled, Totals:

df_all:
Meeting Days                -   F   M  MW  R  T   TR   W
Session                                                 
Regular Academic Session  606  34  34  79  3  1  205  12
Seven Week - First        287   0   0   0  0  0    0   0
Seven Week - Second       298   0   0   0  0  0    0   0

df_fall_all:
Meeting Days                -   F   M  MW  R   TR  W
Session                                             
Regular Academic Session  306  17  12  42  3  104  6
Seven Week - First        142   0   0   0  0    0  0
Seven Week - Second       134   0   0   0  0    0  0

df_spring_all:
Meeting Days                -   F   M  MW  T   TR  W
Session                                             
Regular Academic Session  300  17  22  37  1  101  6
Seven Week - First        145   0   0   0  0    0  0
Seven Week - Second       164   0   0   0  0    0  0


In [858]:
print("15/7 by Days Scheduled & Semester")

# Group by Term (semester), Session (15/7), and Meeting Days
meeting_days_summary = df_all.groupby(["Term", "Session", "Meeting Days"]).size()

# Turn Meeting Days (meeting_days_summary) into Columns for cleaner output; sort
session_term_days = meeting_days_summary.unstack(fill_value=0)

# Sort with semester_order_func: multi-index
# key = lambda x again because this one is multi-index
session_term_days = session_term_days.loc[sorted(session_term_days.index, key=lambda x: semester_order_func(x[0]))]
print(session_term_days)


15/7 by Days Scheduled & Semester
Meeting Days                           -   F  M  MW  R  T  TR  W
Term        Session                                             
Fall 2021   Regular Academic Session  84   1  0  12  3  0  28  2
            Seven Week - First        32   0  0   0  0  0   0  0
            Seven Week - Second       26   0  0   0  0  0   0  0
Spring 2022 Regular Academic Session  77   2  5   5  0  0  20  0
            Seven Week - First        27   0  0   0  0  0   0  0
            Seven Week - Second       38   0  0   0  0  0   0  0
Fall 2022   Regular Academic Session  75   3  3   3  0  0  23  0
            Seven Week - First        30   0  0   0  0  0   0  0
            Seven Week - Second       33   0  0   0  0  0   0  0
Spring 2023 Regular Academic Session  73   2  5   7  0  0  23  1
            Seven Week - First        34   0  0   0  0  0   0  0
            Seven Week - Second       38   0  0   0  0  0   0  0
Fall 2023   Regular Academic Session  78   2  4  10  0  

15/7 by Days Scheduled & Semester -- totaling the two Seven Weeks into 1

In [859]:
print("15/7 by Days Scheduled & Semester - Combining 7 week sessions")

# Create a new dataframe to avoid directly modifying df_all 
# Last time we did this, it was building off the df_all_online DF, so we're doing it again on df_all
df_all_7week = df_all.copy()

# Group "Seven Week - First" and "Seven Week - Second" into a single "Seven Week Combined"
df_all_7week["Session Grouped"] = df_all_7week["Session"].replace({
    "Seven Week - First": "Seven Week Combined", 
    "Seven Week - Second": "Seven Week Combined"})

# Group by Term, Session Grouped (15/7), and Meeting Days
meeting_days_summary = df_all_7week.groupby(["Term", "Session Grouped", "Meeting Days"]).size()

# Turn Meeting Days into columns for cleaner output
session_term_days = meeting_days_summary.unstack(fill_value=0)

# Sort the semesters semester_order_func 
# MultiIndex: Term + Session - needs key = lambda x again
session_term_days = session_term_days.loc[sorted(session_term_days.index, key=lambda x: semester_order_func(x[0]))]

print(session_term_days)


15/7 by Days Scheduled & Semester - Combining 7 week sessions
Meeting Days                           -   F  M  MW  R  T  TR  W
Term        Session Grouped                                     
Fall 2021   Regular Academic Session  84   1  0  12  3  0  28  2
            Seven Week Combined       58   0  0   0  0  0   0  0
Spring 2022 Regular Academic Session  77   2  5   5  0  0  20  0
            Seven Week Combined       65   0  0   0  0  0   0  0
Fall 2022   Regular Academic Session  75   3  3   3  0  0  23  0
            Seven Week Combined       63   0  0   0  0  0   0  0
Spring 2023 Regular Academic Session  73   2  5   7  0  0  23  1
            Seven Week Combined       72   0  0   0  0  0   0  0
Fall 2023   Regular Academic Session  78   2  4  10  0  0  25  1
            Seven Week Combined       75   0  0   0  0  0   0  0
Spring 2024 Regular Academic Session  76   3  5  10  0  0  25  1
            Seven Week Combined       86   0  0   0  0  0   0  0
Fall 2024   Regular Academic

15/7 by Component

In [860]:
print("15/7 by Component: Totals")

# Total counts: Session × Component
component_total = df_all.groupby(["Session", "Component"]).size().unstack(fill_value=0)
print(component_total)

print("\n15/7 by Component: Totals by Semester")

# By Term: Term × Session × Component
component_by_term = df_all.groupby(["Term", "Session", "Component"]).size().unstack(fill_value=0)

# Sort - multi-index again (needs key=lambda x)
component_by_term = component_by_term.loc[sorted(component_by_term.index, key=lambda x: semester_order_func(x[0]))]

print(component_by_term)


15/7 by Component: Totals
Component                 Colloquium  Discussion  Lecture
Session                                                  
Regular Academic Session           7         181      786
Seven Week - First                 0           1      286
Seven Week - Second                0           0      298

15/7 by Component: Totals by Semester
Component                             Colloquium  Discussion  Lecture
Term        Session                                                  
Fall 2021   Regular Academic Session           1          27      102
            Seven Week - First                 0           0       32
            Seven Week - Second                0           0       26
Spring 2022 Regular Academic Session           1          21       87
            Seven Week - First                 0           0       27
            Seven Week - Second                0           0       38
Fall 2022   Regular Academic Session           0          20       87
            Sev

<h1> Modality: Days Scheduled </h1>

Days Scheduled - Totals

In [861]:
print("Modality: Days Scheduled Totals")

for name, df in dfs_all.items():
    print(f"\n{name}:")
    print(df["Meeting Days"].value_counts(dropna=False))


Modality: Days Scheduled Totals

df_all:
Meeting Days
-     1191
TR     205
MW      79
F       34
M       34
W       12
R        3
T        1
Name: count, dtype: int64

df_fall_all:
Meeting Days
-     582
TR    104
MW     42
F      17
M      12
W       6
R       3
Name: count, dtype: int64

df_spring_all:
Meeting Days
-     609
TR    101
MW     37
M      22
F      17
W       6
T       1
Name: count, dtype: int64


Days Scheduled - Totals by Semester

In [862]:
#Days Scheduled - Totals by Semester

#Group by Term/Days & run semester_order_func
days_scheduled_semester = df_all.groupby(["Term", "Meeting Days"]).size().unstack(fill_value=0)
days_scheduled_semester = days_scheduled_semester.loc[sorted(days_scheduled_semester.index, key=semester_order_func)]

print("Days Scheduled by Semester")
print(days_scheduled_semester)


Days Scheduled by Semester
Meeting Days    -   F  M  MW  R  T  TR  W
Term                                     
Fall 2021     142   1  0  12  3  0  28  2
Spring 2022   142   2  5   5  0  0  20  0
Fall 2022     138   3  3   3  0  0  23  0
Spring 2023   145   2  5   7  0  0  23  1
Fall 2023     153   2  4  10  0  0  25  1
Spring 2024   162   3  5  10  0  0  25  1
Fall 2024     149  11  5  17  0  0  28  3
Spring 2025   160  10  7  15  0  1  33  4


Days Scheduled by Component

In [863]:
# Days by Component - Totals

# Group by Component and Meeting Days
days_by_component_total = df_all.groupby(["Component", "Meeting Days"]).size().unstack(fill_value=0)
print("Days Scheduled by Component: Totals")
print(days_by_component_total)

# Group by Term, Component, and Meeting Days
days_by_component_term = df_all.groupby(["Term", "Component", "Meeting Days"]).size().unstack(fill_value=0)
# Sort - multi-index again (key=lambda x)
days_by_component_term = days_by_component_term.loc[sorted(days_by_component_term.index, key=lambda x: semester_order_func(x[0]))]
print("\nDays Scheduled by Component: Totals")
print(days_by_component_term)


Days Scheduled by Component: Totals
Meeting Days     -   F   M  MW  R  T   TR  W
Component                                   
Colloquium       0   0   5   1  0  0    1  0
Discussion     109  33  29   0  3  0    0  8
Lecture       1082   1   0  78  0  1  204  4

Days Scheduled by Component: Totals
Meeting Days              -   F  M  MW  R  T  TR  W
Term        Component                              
Fall 2021   Colloquium    0   0  0   0  0  0   1  0
            Discussion   21   1  0   0  3  0   0  2
            Lecture     121   0  0  12  0  0  27  0
Spring 2022 Colloquium    0   0  1   0  0  0   0  0
            Discussion   15   2  4   0  0  0   0  0
            Lecture     127   0  0   5  0  0  20  0
Fall 2022   Discussion   14   3  3   0  0  0   0  0
            Lecture     124   0  0   3  0  0  23  0
Spring 2023 Colloquium    0   0  1   1  0  0   0  0
            Discussion   15   2  4   0  0  0   0  0
            Lecture     130   0  0   6  0  0  23  1
Fall 2023   Discussion   1

<h1> Modality: Time of Day </h1>

Time of Day - Counts by Start Time

In [864]:
# Meeting Time Start - Totals
start_time_counts = df_all["Meeting Time Start"].value_counts(dropna=False).sort_index()

print("Counts by Meeting Time Start")
print(start_time_counts)


Counts by Meeting Time Start
Meeting Time Start
00:00:00    1191
08:00:00       7
09:00:00       6
09:30:00      14
10:00:00      14
11:00:00      95
12:00:00       7
12:30:00      78
13:00:00      13
14:00:00      54
15:00:00      13
15:30:00      58
16:00:00       5
17:30:00       4
Name: count, dtype: int64


Time of Day - Counts by Semester

In [865]:
# Meeting Time Start totals by Term; run semester_order_func
start_times_semester = df_all.groupby(["Term", "Meeting Time Start"]).size().unstack(fill_value=0)
start_times_semester = start_times_semester.loc[sorted(start_times_semester.index, key=semester_order_func)]

print("Meeting Start Time Counts by Semester")
print(start_times_semester)


Meeting Start Time Counts by Semester
Meeting Time Start  00:00:00  08:00:00  09:00:00  09:30:00  10:00:00  \
Term                                                                   
Fall 2021                142         0         0         7         0   
Spring 2022              142         1         0         0         2   
Fall 2022                138         1         1         0         1   
Spring 2023              145         1         0         0         2   
Fall 2023                153         1         0         0         2   
Spring 2024              162         1         0         0         3   
Fall 2024                149         1         2         3         2   
Spring 2025              160         1         3         4         2   

Meeting Time Start  11:00:00  12:00:00  12:30:00  13:00:00  14:00:00  \
Term                                                                   
Fall 2021                 10         1         9         1         8   
Spring 2022              

<h3>Time of Day Function #1: </h3> break up by morning, midday, afternoon, late afternoon

First - Convert "Meeting Time Start & "Meeting Time End" to datetime (HH:MM:SS) format

In [866]:
# new DF
df_all_times = df_all.copy()

# Columns to convert
time_columns = ["Meeting Time Start", "Meeting Time End"]

import datetime
for col in time_columns:
    if col in df_all_times.columns:
        # Convert to dt.time in the format HH:MM:SS
        df_all_times[col] = pd.to_datetime(df_all_times[col], format="%H:%M:%S", errors="coerce").dt.time

# Check for conversion - still says "object" but at least it's a datetime.time object
#print(df_all_times.dtypes)


Time of Day function #1: assign_time_slot 

morning (through 10am), midday (past 10, through 1pm), afternoon (past 1pm, through 3:30pm), and late afternoon (after 3:30pm)

In [867]:
# Define function for labeling time slots
def assign_time_slot(t):
    if pd.isnull(t) or t == datetime.time(0, 0):
        return "NoTime"
    elif t <= datetime.time(10, 0):
        return "Morning"
    elif t <= datetime.time(13, 0):
        return "Midday"
    elif t <= datetime.time(15, 30):
        return "Afternoon"
    else:
        return "Late Afternoon"

# Apply function to create new column
df_all_times["Time Slot"] = df_all_times["Meeting Time Start"].apply(assign_time_slot)


Time of Day: Total Counts & Total Counts by Semester

In [868]:
# Print times in Order
ordered_time = ["Morning", "Midday", "Afternoon", "Late Afternoon", "NoTime"]

# Totals by Time Slot
# Get value counts, then reindex to your order (ordered_time)
time_slot_total = df_all_times["Time Slot"].value_counts(dropna=False).reindex(ordered_time, fill_value=0)

print("Totals by Time Slot")
print(time_slot_total)

# Time Slot Totals by Semester - .groupby(["Term", "Time Slot"])
time_slot_semester = df_all_times.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
time_slot_semester = time_slot_semester[ordered_time] # Reorder time slots to ordered_time
time_slot_semester = time_slot_semester.loc[sorted(time_slot_semester.index, key=semester_order_func)] # Semesters in order

print("\nTotals by Time Slot & Semester")
print(time_slot_semester)

Totals by Time Slot
Time Slot
Morning             41
Midday             193
Afternoon          125
Late Afternoon       9
NoTime            1191
Name: count, dtype: int64

Totals by Time Slot & Semester
Time Slot    Morning  Midday  Afternoon  Late Afternoon  NoTime
Term                                                           
Fall 2021          7      21         16               2     142
Spring 2022        3      17         11               1     142
Fall 2022          3      17         11               1     138
Spring 2023        3      17         17               1     145
Fall 2023          3      22         14               3     153
Spring 2024        4      24         16               0     162
Fall 2024          8      36         19               1     149
Spring 2025       10      39         21               0     160


<h3>Time of Day Function #2: 3 times of day</h3>

In [869]:
# Morning: before 11. Midday: 11 through before 2. Afternoon: 2 & after
def assign_time_slot_v2(t):
    if pd.isnull(t) or t == datetime.time(0, 0):
        return "NoTime"
    elif t < datetime.time(11, 0):
        return "Morning"
    elif t < datetime.time(14, 0):
        return "Midday"
    else:
        return "Afternoon"

# New DF for our v2 Time Function
df_all_times_v2 = df_all_times.copy()

# Apply function to create new column
df_all_times_v2["Time Slot"] = df_all_times_v2["Meeting Time Start"].apply(assign_time_slot_v2)

Time of Day Function #2: Total Counts & Total Counts by Semester

In [870]:
# Print times in Order
ordered_time_v2 = ["Morning", "Midday", "Afternoon", "NoTime"]

# Totals by Time Slot
# Get value counts, then reindex to your order
time_slot_total_v2 = df_all_times_v2["Time Slot"].value_counts(dropna=False).reindex(ordered_time_v2, fill_value=0)

print("Totals by Time Slot (3 time slots)")
print(time_slot_total_v2)

# Time Slot Totals by Semester f
time_slot_semester_v2 = df_all_times_v2.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
time_slot_semester_v2 = time_slot_semester_v2[ordered_time_v2]
time_slot_semester_v2 = time_slot_semester_v2.loc[sorted(time_slot_semester.index, key=semester_order_func)]

print("\nTotals by Time Slot & Semester (3 time slots)")
print(time_slot_semester_v2)

Totals by Time Slot (3 time slots)
Time Slot
Morning        41
Midday        193
Afternoon     134
NoTime       1191
Name: count, dtype: int64

Totals by Time Slot & Semester (3 time slots)
Time Slot    Morning  Midday  Afternoon  NoTime
Term                                           
Fall 2021          7      21         18     142
Spring 2022        3      17         12     142
Fall 2022          3      17         12     138
Spring 2023        3      17         18     145
Fall 2023          3      22         17     153
Spring 2024        4      24         16     162
Fall 2024          8      36         20     149
Spring 2025       10      39         21     160


<h3> Time of Day: By Modalities

Time of Day: In-Person/Online 

Run Time of Day function on In-Person/Online DFs: df_all_online & df_all_inperson

In [871]:
# New DFs for Online/In-Person for Time Slot Analysis
df_times_online = df_all_online.copy()
df_times_inperson = df_all_inperson.copy()

# Convert the time columns to be datetime
for df in [df_times_online, df_times_inperson]:
    df["Meeting Time Start"] = pd.to_datetime(df["Meeting Time Start"], 
                                              format="%H:%M:%S", errors="coerce").dt.time

# Apply the Time Slot function
df_times_online["Time Slot"] = df_times_online["Meeting Time Start"].apply(assign_time_slot)
df_times_inperson["Time Slot"] = df_times_inperson["Meeting Time Start"].apply(assign_time_slot)

dfs_times_online_inperson = {
    "df_times_online": df_times_online,
    "df_times_inperson": df_times_inperson
}

Time of Day by In-Person/Online

In [872]:
print("Time Slot Totals by Online vs In-Person\n")

for df_name, df in dfs_times_online_inperson.items():
    print(f"{df_name}:")
    
    time_modality_total = df["Time Slot"].value_counts(dropna=False)
    time_modality_total = time_modality_total.reindex(ordered_time, fill_value=0)
    
    print(time_modality_total)
    print("\n") 


Time Slot Totals by Online vs In-Person

df_times_online:
Time Slot
Morning              0
Midday              17
Afternoon           16
Late Afternoon       1
NoTime            1186
Name: count, dtype: int64


df_times_inperson:
Time Slot
Morning            41
Midday            176
Afternoon         109
Late Afternoon      8
NoTime              5
Name: count, dtype: int64




Time of Day by In-Person/Online & Semester

In [873]:
print("Time Slot Totals by Online vs In-Person & Semester\n")

for df_name, df in dfs_times_online_inperson.items():
    # Group by Term and Time Slot
    time_slot_by_term = df.groupby(["Term", "Time Slot"]).size().unstack(fill_value=0)
    # Reorder time slots to ordered_time
    time_slot_by_term = time_slot_by_term[ordered_time] if set(ordered_time).issubset(time_slot_by_term.columns) else time_slot_by_term
    # Sort semesters with semester_order_func
    time_slot_by_term = time_slot_by_term.loc[sorted(time_slot_by_term.index, key=semester_order_func)]

    print(f"{df_name}:")
    print(time_slot_by_term)
    print("\n")  


Time Slot Totals by Online vs In-Person & Semester

df_times_online:
Time Slot    Afternoon  Late Afternoon  Midday  NoTime
Term                                                  
Fall 2021            6               1       6     142
Spring 2022          0               0       1     142
Fall 2022            0               0       0     138
Spring 2023          2               0       2     145
Fall 2023            2               0       2     153
Spring 2024          2               0       2     162
Fall 2024            2               0       2     147
Spring 2025          2               0       2     157


df_times_inperson:
Time Slot    Morning  Midday  Afternoon  Late Afternoon  NoTime
Term                                                           
Fall 2021          7      15         10               1       0
Spring 2022        3      16         11               1       0
Fall 2022          3      17         11               1       0
Spring 2023        3      15         15 

Time of Day: 15-week/7-week

Time of Slots: 15/7 totals

In [874]:
time_session_totals = df_all_times.groupby(["Time Slot", "Session"]).size().unstack(fill_value=0) # group
time_session_totals = time_session_totals.reindex(ordered_time) # order time slots ordered_time

print("Time of Day by 15/7 Session Type")
print(time_session_totals)

Time of Day by 15/7 Session Type
Session         Regular Academic Session  Seven Week - First  \
Time Slot                                                      
Morning                               41                   0   
Midday                               193                   0   
Afternoon                            125                   0   
Late Afternoon                         9                   0   
NoTime                               606                 287   

Session         Seven Week - Second  
Time Slot                            
Morning                           0  
Midday                            0  
Afternoon                         0  
Late Afternoon                    0  
NoTime                          298  


Time Slots by 15/7 and Semester

In [875]:
# .groupby(["Term", "Time Slot", "Session"])
time_session_term = df_all_times.groupby(["Term", "Time Slot", "Session"]).size().unstack(fill_value=0)

# Reorder: 
time_session_term = time_session_term.reorder_levels(["Time Slot", "Term"])     # Reorder: Time Slot first, then Term
time_session_term = time_session_term.reindex(ordered_time, level="Time Slot")  # Reorder: time slots by ordered_time order
# Reorder semesters within each Time Slot (multi-index) using semester_order_func on Term
time_session_term = time_session_term.loc[sorted(time_session_term.index,
                                                 key=lambda x: (ordered_time.index(x[0]), semester_order_func(x[1])))]

print("Time Slot Totals by 15/7 Week & Semester")
print(time_session_term)


Time Slot Totals by 15/7 Week & Semester
Session                     Regular Academic Session  Seven Week - First  \
Time Slot      Term                                                        
Morning        Fall 2021                           7                   0   
               Spring 2022                         3                   0   
               Fall 2022                           3                   0   
               Spring 2023                         3                   0   
               Fall 2023                           3                   0   
               Spring 2024                         4                   0   
               Fall 2024                           8                   0   
               Spring 2025                        10                   0   
Midday         Fall 2021                          21                   0   
               Spring 2022                        17                   0   
               Fall 2022                       

Time of Day: By Days Scheduled

In [876]:
# Time Slots by Days
time_slot_days = df_all_times.groupby(["Time Slot", "Meeting Days"]).size().unstack(fill_value=0)
time_slot_days = time_slot_days.reindex(ordered_time)

print("Time Slot Totals by Days Scheduled")
print(time_slot_days)

Time Slot Totals by Days Scheduled
Meeting Days       -   F   M  MW  R  T   TR  W
Time Slot                                     
Morning            0   6  21   3  0  0   11  0
Midday             0  26   7  43  1  0  111  5
Afternoon          0   2   6  30  1  1   78  7
Late Afternoon     0   0   0   3  1  0    5  0
NoTime          1191   0   0   0  0  0    0  0


Time of Day by Days Scheduled & Semester

In [877]:
# Group by Term (semester), Meeting Days, and Time Slot
time_day_semester = df_all_times.groupby(["Term", "Meeting Days", "Time Slot"]).size().unstack(fill_value=0)
# Reorder Time Slots by ordered_time
time_day_semester = time_day_semester[ordered_time] if set(ordered_time).issubset(time_day_semester.columns) else time_day_semester
# Reorder Semesters with func semester_order_func (multi-index)
time_day_semester = time_day_semester.loc[sorted(time_day_semester.index, key=lambda x: semester_order_func(x[0]))]

print("Time of Day by Days & Semester")
print(time_day_semester)

Time of Day by Days & Semester
Time Slot                 Morning  Midday  Afternoon  Late Afternoon  NoTime
Term        Meeting Days                                                    
Fall 2021   -                   0       0          0               0     142
            F                   0       1          0               0       0
            MW                  0       7          5               0       0
            R                   0       1          1               1       0
            TR                  7      12          8               1       0
            W                   0       0          2               0       0
Spring 2022 -                   0       0          0               0     142
            F                   0       2          0               0       0
            M                   3       1          1               0       0
            MW                  0       3          2               0       0
            TR                  0      11    

Time of Day by Component

In [878]:
# Time of Day by Component - Totals
tod_component = df_all_times.groupby(["Time Slot", "Component"]).size().unstack(fill_value=0)
tod_component = tod_component.reindex(ordered_time)

print("Time of Day by Component")
print(tod_component)

# Time of Day by Component & Semester
tod_component_semester = df_all_times.groupby(["Term", "Time Slot", "Component"]).size().unstack(fill_value=0)
# Reorder: 1) print Time Slot first, 2) Time Slot by ordered_time, 3) Term by semester_order_func
tod_component_semester = tod_component_semester.reorder_levels(["Time Slot", "Term"]).sort_index(level="Time Slot")
tod_component_semester = tod_component_semester.reindex(ordered_time, level="Time Slot")
tod_component_semester = tod_component_semester.loc[sorted(tod_component_semester.index,
                                                           key=lambda x: (ordered_time.index(x[0]), semester_order_func(x[1])))]

print("\nTime of Day by Component & Semester")
print(tod_component_semester)


Time of Day by Component
Component       Colloquium  Discussion  Lecture
Time Slot                                      
Morning                  0          26       15
Midday                   0          39      154
Afternoon                7           7      111
Late Afternoon           0           1        8
NoTime                   0         109     1082

Time of Day by Component & Semester
Component                   Colloquium  Discussion  Lecture
Time Slot      Term                                        
Morning        Fall 2021             0           0        7
               Spring 2022           0           3        0
               Fall 2022             0           3        0
               Spring 2023           0           3        0
               Fall 2023             0           3        0
               Spring 2024           0           3        1
               Fall 2024             0           5        3
               Spring 2025           0           6        4
Mi

<h1> Course Bottleneck Index (CBI) KDD </h1>

<h3> Begin with Defining Terms: DR, OFS, IAS, PCS </h3>

<h2> Demand Ratio (DR)</h2>
DR = enrollment / capacity

Data Cleanse for DR, group Co-Convened Courses

In [929]:
df_DR_all = df_all.copy()

# Co-Convened? no: df_n_coconvened, yes: df_y_coconvened
df_n_coconvened = df_DR_all[df_DR_all["Combined Section"] == "-"] # Whether or not value = "-"
df_y_coconvened = df_DR_all[df_DR_all["Combined Section"] != "-"]

# Co-Convened Yes Modifications -->
# Feed to Aggregate Dictionary: 1) grouping columns; 2) sum columns; 3) all remaining columns, keep "first" value
cc_group_cols = ["Term", "Combined Section"]           # same "term" & "combined section" -> yes co-convened
cc_sum_cols = ["Total Enroll", "Enrollment Capacity"]  # sum Enroll & Capacity if co-convened
cc_first_cols = [col for col in df_y_coconvened.columns if col not in cc_group_cols + cc_sum_cols] # all other cols, keep first value

# Build the aggregate dictionary for Co-Convened Yes
cc_agg_dict = {col: "sum" for col in cc_sum_cols}
cc_agg_dict.update({col: "first" for col in cc_first_cols})

# Co-Convened Yes: Group and aggregate
df_y_coconvened = df_y_coconvened.groupby(cc_group_cols, as_index=False).agg(cc_agg_dict)

# Concatenate the y/n Co-Convened DFs together
df_DR_all = pd.concat([df_n_coconvened, df_y_coconvened], ignore_index=True)

print("df_DR_all had",df_all.shape[0], "rows before consolidating the co-convened classes")
print("\ndf_DR_all has",df_DR_all.shape[0], "rows after consolidating the co-convened classes")

df_DR_all had 1559 rows before consolidating the co-convened classes

df_DR_all has 1047 rows after consolidating the co-convened classes


DR: Every Class

In [922]:
#DR for Every Individual Class
df_DR_all["DR"] = (df_DR_all["Total Enroll"] / df_DR_all["Enrollment Capacity"]).round(2)

Finalize data cleansing for df_DR_all

In [None]:
df_DR_missing = df_DR_all[df_DR_all["DR"].isna()]
# All of them are classes with no enrollments & no capacity

# Drop the missing values
df_DR_all = df_DR_all.dropna(subset=["DR"])

# Drop the 2 (discussion) classes that had Enrollments but didn't have Enrollment Capacity (DR = "inf")
# Statistic Foundations Info Age & Dealing with Data
df_DR_all = df_DR_all[df_DR_all["DR"].astype(str) != "inf"]

# df_DR_all is ready to go!


DR: Total by Course

In [882]:
df_avg_DR = (df_DR_all.groupby("Course Description", as_index=False)["DR"].mean()).round(2)
print("Average DR by Course:")
print(df_avg_DR)

Average DR by Course:
               Course Description    DR
0       Advanced Game Development  0.30
1             Advanced Web Design  0.33
2            Algorithms for Games  0.68
3   Applied Cyberinfrastruct Conc  0.14
4      Applied Data Visualization  0.68
..                            ...   ...
73          Theories of New Media  0.49
74       User Interf+Website Dsgn  0.06
75                Virtual Reality  0.48
76        Visual Content Creation  0.52
77              eSport Industries  0.70

[78 rows x 2 columns]


DR Total - Summary Stats

In [None]:
# Overall stats
dr_mean = round(df_avg_DR["DR"].mean(), 2)
dr_median = round(df_avg_DR["DR"].median(), 2)
dr_std = round(df_avg_DR["DR"].std(), 2)

print("DR Summary Statistics (Overall):")
print(f"Mean DR: {dr_mean}")
print(f"Median DR: {dr_median}")
print(f"Standard Deviation: {dr_std}")


# DR min/max and course name
dr_min = round(df_avg_DR["DR"].min(), 2)
dr_max = round(df_avg_DR["DR"].max(), 2)
course_min = df_avg_DR.loc[df_avg_DR["DR"] == dr_min, "Course Description"].tolist() #Course name(s) as list
course_max = df_avg_DR.loc[df_avg_DR["DR"] == dr_max, "Course Description"].tolist()

print(f"Min DR: {dr_min} — Courses: {course_min}")
print(f"Max DR: {dr_max} — Courses: {course_max}")

# Top 10 highest and lowest DR courses
top_dr = df_avg_DR.nlargest(10, "DR")
bottom_dr = df_avg_DR.nsmallest(10, "DR")

print("\n10 Highest DR Courses:")
print(top_dr)
print("\n10 Lowest DR Courses:")
print(bottom_dr)


DR Summary Statistics (Overall):
Mean DR: 0.48
Median DR: 0.53
Standard Deviation: 0.24
Min DR: 0.02 — Courses: ['Computational Social Science', 'Data Ethics']
Max DR: 0.93 — Courses: ['Intro to Creative Coding']

Top 5 Highest DR Courses:
                Course Description    DR
46        Intro to Creative Coding  0.93
22    Digital Crime & Social Media  0.91
42             Installation Design  0.87
12          Computing and the Arts  0.85
11  Computational Thinking & Doing  0.84
52  Intro: Human Computer Interact  0.77
53     Introduction to Game Design  0.76
31       Ethics in a Digital World  0.75
67      Social Media and Ourselves  0.75
70  Statistic Foundations Info Age  0.72

Bottom 5 Lowest DR Courses:
               Course Description    DR
10   Computational Social Science  0.02
14                    Data Ethics  0.02
7   Bayesian Modeling & Inference  0.03
30  Ethical Issues in Information  0.03
64                     STEM Games  0.05
8            Business Information  0.06


DR: By Term & Course

In [None]:
df_term_avg_DR = (df_DR_all.groupby(["Term", "Course Description"], as_index=False)["DR"].mean()).round(2)

# Sort by Term
df_term_avg_DR = df_term_avg_DR.set_index(["Term", "Course Description"])
df_term_avg_DR = df_term_avg_DR.loc[sorted(df_term_avg_DR.index, key=lambda x: semester_order_func(x[0]))]
df_term_avg_DR = df_term_avg_DR.reset_index()

print("Average DR by Course & Term:")
print(df_term_avg_DR)

Average DR by Course & Term:
            Term             Course Description    DR
0      Fall 2021            Advanced Web Design  0.26
1      Fall 2021           Algorithms for Games  0.52
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22
3      Fall 2021     Applied Data Visualization  0.72
4      Fall 2021  Bayesian Modeling & Inference  0.09
..           ...                            ...   ...
460  Spring 2025         The Past and New Media  0.34
461  Spring 2025          Theories of New Media  0.32
462  Spring 2025                Virtual Reality  0.46
463  Spring 2025        Visual Content Creation  0.52
464  Spring 2025              eSport Industries  0.50

[465 rows x 3 columns]


DR by Term - Summary Stats

In [945]:
# For Loop for our summary stats (mean, median, std, min/max) and top/bottom 5 courses
for df_semester, group in df_term_avg_DR.groupby("Term"):
    # Summary stats (mean, median, std)
    dr_mean = round(group["DR"].mean(), 2)
    dr_median = round(group["DR"].median(), 2)
    dr_std = round(group["DR"].std(), 2)
    print(f"\n🗓 DR Summary for {df_semester}") # emoji to make it easier to read
    print(f"Mean DR: {dr_mean}")
    print(f"Median DR: {dr_median}")
    print(f"Standard Deviation: {dr_std}")

    # Min/Max and course name(s)
    dr_min = round(group["DR"].min(), 2)
    dr_max = round(group["DR"].max(), 2)
    course_min = group.loc[group["DR"] == dr_min, "Course Description"].tolist()
    course_max = group.loc[group["DR"] == dr_max, "Course Description"].tolist()
    print(f"Min DR: {dr_min} on Courses: {course_min}")
    print(f"Max DR: {dr_max} on Courses: {course_max}")

    #Top/Bottom 5
    top_5 = group.nlargest(5, "DR")[["Course Description", "DR"]]
    bottom_5 = group.nsmallest(5, "DR")[["Course Description", "DR"]]
    print(f"\n{df_semester}: 5 Highest DR Courses:")
    print(top_5.to_string(index=False))
    print(f"\n {df_semester}: 5 Lowest DR Courses:")
    print(bottom_5.to_string(index=False))



🗓 DR Summary for Fall 2021
Mean DR: 0.59
Median DR: 0.61
Standard Deviation: 0.28
Min DR: 0.03 on Courses: ['Ethical Issues in Information']
Max DR: 1.02 on Courses: ['eSport Industries']

Fall 2021: 5 Highest DR Courses:
          Course Description   DR
           eSport Industries 1.02
 Great Ideas of the Info Age 1.01
Digital Crime & Social Media 0.96
  Social Media and Ourselves 0.96
    Intro to Creative Coding 0.95

 Fall 2021: 5 Lowest DR Courses:
           Course Description   DR
Ethical Issues in Information 0.03
Bayesian Modeling & Inference 0.09
     User Interf+Website Dsgn 0.09
       Government Information 0.12
               Special Topics 0.15

🗓 DR Summary for Fall 2022
Mean DR: 0.59
Median DR: 0.64
Standard Deviation: 0.27
Min DR: 0.04 on Courses: ['Ethical Issues in Information', 'Science Information']
Max DR: 0.98 on Courses: ['Diversity and Bias in Games', 'eSport Industries']

Fall 2022: 5 Highest DR Courses:
          Course Description   DR
 Diversity and Bia

<h2>Offering Frequency Score (OFS)</h2>
Captures how often a course if offered: 1: every semester, 2: once a year, 3: less than once a year

Total Courses by "Course Description"

In [884]:
# Total Courses by "Course Description"
course_totals = df_all.groupby("Course Description").size().sort_values(ascending=False)
print("Total Classes by Course Description:")
print(course_totals)

Total Classes by Course Description:
Course Description
Statistic Foundations Info Age    77
Digital Storytelling & Culture    74
Computational Thinking & Doing    72
Social Media and Ourselves        50
Dealing with Data                 49
                                  ..
Foundation of Info & Inference     2
Game AI                            2
Designing an Installation          2
Natural Language Processing        1
Visual Content Creation            1
Length: 81, dtype: int64


Total Courses by Term

In [885]:
course_total_by_semester = df_all.groupby(["Course Description", "Term"]).size().unstack(fill_value=0)
course_total_by_semester = course_total_by_semester[sorted(course_total_by_semester.columns, key=semester_order_func)]
course_total_by_semester = course_total_by_semester.reset_index()   #Include Course Description as a column, was being dropped

print("Total Classes by Semester:")
print(course_total_by_semester)

Total Classes by Semester:
Term             Course Description  Fall 2021  Spring 2022  Fall 2022  \
0         Advanced Game Development          0            1          1   
1               Advanced Web Design          2            2          0   
2              Algorithms for Games          1            1          1   
3     Applied Cyberinfrastruct Conc          6            0          0   
4        Applied Data Visualization          2            2          2   
..                              ...        ...          ...        ...   
76            Theories of New Media          8            6          4   
77         User Interf+Website Dsgn          2            2          0   
78                  Virtual Reality          1            1          1   
79          Visual Content Creation          0            0          0   
80                eSport Industries          2            4          2   

Term  Spring 2023  Fall 2023  Spring 2024  Fall 2024  Spring 2025  
0               

Function: Get OFS Score (get_OFS)

1: Every Semester. 2: Once a year. 3: Less than once a year.

In [886]:
df_OFS = course_total_by_semester.copy()

# Specify Col Names because was having issues with indexing 
semester_cols = ["Fall 2021", "Spring 2022", "Fall 2022", "Spring 2023",
                 "Fall 2023", "Spring 2024", "Fall 2024", "Spring 2025"]

# Add "Term Count" Column: count of Non-Zero values in semester_cols to double check our 1/2/3s
df_OFS["TermCount"] = df_OFS[semester_cols].ne(0).sum(axis=1)

# OFS Function: Assign 1, 2, and 3
def get_OFS(row):
    semester_vals = row[semester_cols].values  # only the 8 columns
    nonzero_count = (semester_vals != 0).sum()

    if nonzero_count >= 7:    # 1, Every semester: at least 7 of the last 8 semesters
        return 1
    elif nonzero_count <= 3:  # 3, Less than once a year: if offered 3 semesters or fewer
        return 3
    else:                     # 2, Every other semester: if offered 4-6 semesters
        return 2



Calculate & Print OFS Scores

In [887]:
# Run our get_OFS to get 1/2/3 OFS scores
df_OFS["OFS"] = df_OFS.apply(get_OFS, axis=1)

# Smaller DF for printing: keep just Course Description", "TermCount", & "OFS"
df_OFS_small = df_OFS[["Course Description", "TermCount", "OFS"]]

print("Course Descriptions & Their Offering Frequency Score (OFS):")
print(df_OFS_small)

Course Descriptions & Their Offering Frequency Score (OFS):
Term             Course Description  TermCount  OFS
0         Advanced Game Development          7    1
1               Advanced Web Design          3    3
2              Algorithms for Games          6    2
3     Applied Cyberinfrastruct Conc          2    3
4        Applied Data Visualization          8    1
..                              ...        ...  ...
76            Theories of New Media          8    1
77         User Interf+Website Dsgn          2    3
78                  Virtual Reality          8    1
79          Visual Content Creation          1    3
80                eSport Industries          8    1

[81 rows x 3 columns]


OFS Summary Stats

In [955]:
# Set up DFs for OFSxCounts and OFSxAverage-TermCount
ofs_counts = df_OFS_small["OFS"].value_counts().sort_index()                    # counting the OFS scores
ofs_termcount_avg = df_OFS_small.groupby("OFS")["TermCount"].mean().round(2)    # group by OFS to get average Term Count

print("Offering Frequency Score (OFS) Summary Stats:\n")
for ofs_score in sorted(ofs_counts.index):
    count = ofs_counts[ofs_score]
    avg_terms = ofs_termcount_avg[ofs_score]
    print(f"OFS Score {ofs_score}: {count} total courses with an Average Term Count of {avg_terms}")


Offering Frequency Score (OFS) Summary Stats:

OFS Score 1: 46 total courses with an Average Term Count of 7.91
OFS Score 2: 14 total courses with an Average Term Count of 5.0
OFS Score 3: 21 total courses with an Average Term Count of 2.29


<h2> Prerequisite Complexity Score (PCS) </h2>
0: No Prerequisites. 1: Yes Prerequisites.

Load Instructor Availability Score (IAS) & Prerequisite Complexity Score (PCS) raw data

In [888]:
df_IAS_raw = pd.read_excel(os.path.join(base_path, "2022_2025_Faculty Load Analysis - CLEANED.xlsx"))
df_PCS_raw = pd.read_excel(os.path.join(base_path, "Course List w. Pre-Reqs.xlsx"))

Clean PCS dataframe to just be UGrad

In [889]:
# Make a copy to avoid modifying the original
df_PCS_ugrad = df_PCS_raw.copy()

# Temporary Column "Catalog_First_Digit":
# Extract first digit from Catalog # and convert to float (<5 is undergrad)
df_PCS_ugrad["Catalog_First_Digit"] = df_PCS_ugrad["Catalog #"].astype(str).str[0].str.extract(r"(\d)").astype(float)
df_PCS_ugrad = df_PCS_ugrad[df_PCS_ugrad["Catalog_First_Digit"] < 5].copy()
df_PCS_ugrad.drop(columns="Catalog_First_Digit", inplace=True) # Remove temporary column

print(f"Undergrad PCS rows: {df_PCS_ugrad.shape[0]}")

# There are more UGrad class options loaded here than we did in the original cleaned dataset
# My df_all was more filtered than this df_PCS copy --> will just keep the values that match in my df_all

Undergrad PCS rows: 181


PCS: Fill my DF (df_PCS_all, from df_all) with PreReqs from our raw data df_PCS_ugrad

In [890]:
# New DF df_PCS_all from df_all to do our PCS analysis
df_PCS_all = df_all.copy()

# Create empty PCS column to store y/n from df_PCS_ugrad
df_PCS_all["PCS"] = None  # or np.nan if you want

# Create map for matching df_PCS_ugrad's Course Descriptions (raw data) into our working df_PCS_all
PCS_map = (df_PCS_ugrad[["Course Description", "Requirements"]]
    .drop_duplicates("Course Description")              # keep first match Course Description
    .set_index("Course Description")["Requirements"])   # set index to Course Description, get values from Requirements column

# Fill PCS column with matching Requirements
df_PCS_all["PCS"] = df_PCS_all["Course Description"].map(PCS_map)

# Convert Y/- to 1 (yes) & 0 (no)
df_PCS_all["PCS"] = (df_PCS_all["PCS"] == "Y").astype(int)


PCS Scores

In [891]:
df_PCS_scores = df_PCS_all.groupby("Course Description", as_index=False)["PCS"].max()

print(df_PCS_scores[["Course Description", "PCS"]])


               Course Description  PCS
0       Advanced Game Development    1
1             Advanced Web Design    1
2            Algorithms for Games    1
3   Applied Cyberinfrastruct Conc    1
4      Applied Data Visualization    1
..                            ...  ...
76          Theories of New Media    0
77       User Interf+Website Dsgn    0
78                Virtual Reality    1
79        Visual Content Creation    0
80              eSport Industries    0

[81 rows x 2 columns]


Check which classes dropped from DR (78 vs 81)

In [None]:
dropped_courses = set(df_PCS_scores["Course Description"]) - set(df_avg_DR["Course Description"])
print("Courses without a DR:")
print(dropped_courses)


Dropped courses:
{'Natural Language Processing', 'Simulation and Problem Solving', 'Special Topics in LIS'}


PCS Summary Stats

In [953]:
# Set up DF for PCSxCounts 
pcs_counts = df_PCS_scores["PCS"].value_counts().sort_index()

print("Prerequisite Complexity Score (PCS) Summary Stats:")
for pcs_score in sorted(pcs_counts.index):
    count = pcs_counts[pcs_score]
    print(f"PCS Score {pcs_score}: {count}")


Prerequisite Complexity Score (PCS) Summary Stats:
PCS Score 0: 50
PCS Score 1: 31


<h2> Instructor Availability Score (IAS)</h2>
IAS = 1/[number of instructors]

Data Cleanse df_IAS_raw a bit

In [893]:
df_IAS_raw_ugrad = df_IAS_raw.copy()

# Keep specific sessions
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Session"].isin(["Regular Academic Session", 
    "Seven Week - First", "Seven Week - Second"])]

# Keep specific campuses
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Class Campus"].isin(["University of Arizona - Main", "Arizona Online"])]

# Filter for ugrad: first digit <5 is ugrad
df_IAS_raw_ugrad["Catalog_First_Digit"] = df_IAS_raw_ugrad["Catalog Number"].astype(str).str[0].str.extract(r"(\d)").astype(float)
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Catalog_First_Digit"] < 5].copy()
df_IAS_raw_ugrad.drop(columns="Catalog_First_Digit", inplace=True) # Remove temporary column

Merge the df_IAS_raw_ugrad with what I'll use

In [894]:
df_IAS_all = df_all.copy()

# If these 4 cols match, we can assume it's the same class in both DFs
merge_cols = ["Term", "Session", "Subject Code", "Catalog Number"]

# Merge Keys: Create Unique Identifier from the 4 columns above
df_IAS_all["merge_key"] = df_IAS_all[merge_cols].astype(str).agg("_".join, axis=1)
df_IAS_raw_ugrad["merge_key"] = df_IAS_raw_ugrad[merge_cols].astype(str).agg("_".join, axis=1)

pseudo_lookup = (df_IAS_raw_ugrad[["merge_key", "Psuedonymn"]]
    .drop_duplicates("merge_key")  # keep first match
    .set_index("merge_key")["Psuedonymn"])
load_lookup = (df_IAS_raw_ugrad[["merge_key", "Instructor Load Factor"]]
    .drop_duplicates("merge_key")
    .set_index("merge_key")["Instructor Load Factor"])

role_lookup = (df_IAS_raw_ugrad[["merge_key", "Instructor Role"]]
    .drop_duplicates("merge_key")
    .set_index("merge_key")["Instructor Role"])

df_IAS_all["Instructor Load Factor"] = df_IAS_all["merge_key"].map(load_lookup)
df_IAS_all["Instructor Role"] = df_IAS_all["merge_key"].map(role_lookup)


df_IAS_all["Pseudonym"] = df_IAS_all["merge_key"].map(pseudo_lookup)
df_IAS_all["Instructor Load Factor"] = df_IAS_all["merge_key"].map(load_lookup)
df_IAS_all["Instructor Role"] = df_IAS_all["merge_key"].map(role_lookup)

Explore missing data
?? need Course ID

In [895]:
df_IAS_missing = df_IAS_all.copy()

df_IAS_missing = df_IAS_all[df_IAS_all[["Instructor Load Factor", "Instructor Role", "Pseudonym"]].isna().any(axis=1)].copy()

# 204 rows / 13% of our data is missing "Instructor Load Factor", "Instructor Role", "Pseudonym"
# Error Source was not my data cleansing - same 204 count either way

?? temporarily removing missing values to continue with code

In [896]:
df_IAS_all = df_IAS_all.dropna(subset=["Instructor Load Factor", "Instructor Role", "Pseudonym"])

IAS by Total:

In [897]:
df_IAS_tot = df_IAS_all.copy()

# Group by Course Description; count unique Pseudonyms
instructor_counts = df_IAS_tot.groupby("Course Description")["Pseudonym"].nunique()

# Create new DataFrame with Course Description, Instructor Count, and IAS
df_IAS_tot = instructor_counts.reset_index()
df_IAS_tot.rename(columns={"Pseudonym": "Instructor Count"}, inplace=True)
df_IAS_tot["IAS"] = (1 / df_IAS_tot["Instructor Count"]).round(2)


In [898]:
df_IAS_tot = df_IAS_all.copy()

# Empty column for Instructor Count
df_IAS_tot["Instructor Count"] = None

# Group by Course Description; count unique Pseudonyms
instructor_counts = df_IAS_tot.groupby("Course Description")["Pseudonym"].nunique()

# Fill Instructor Count with counts of unique Pseudonyms by course
df_IAS_tot["Instructor Count"] = df_IAS_tot["Course Description"].map(instructor_counts)

# Reorganize: Drop unecessary columns, consolidate to unique Course Descriptions
df_IAS_tot.drop(columns=["merge_key", "Instructor Load Factor", "Instructor Role", "Pseudonym"], inplace=True) # drop these - no longer necessary
df_IAS_tot = df_IAS_tot.groupby("Course Description", as_index=False).first()

# Add IAS column & scores; IAS = 1 / Instructor Count
df_IAS_tot["IAS"] = (1 / df_IAS_tot["Instructor Count"]).round(2) 

In [899]:
# Smaller DF for printing
df_IAS_tot_small = df_IAS_tot[["Course Description", "Instructor Count", "IAS"]]

print("Course Descriptions & Their Instructor Availability Score (IAS):")
print(df_IAS_tot_small)

Course Descriptions & Their Instructor Availability Score (IAS):
               Course Description  Instructor Count   IAS
0       Advanced Game Development                 2  0.50
1             Advanced Web Design                 2  0.50
2            Algorithms for Games                 1  1.00
3   Applied Cyberinfrastruct Conc                 1  1.00
4      Applied Data Visualization                 4  0.25
..                            ...               ...   ...
75          Theories of New Media                 3  0.33
76       User Interf+Website Dsgn                 1  1.00
77                Virtual Reality                 2  0.50
78        Visual Content Creation                 1  1.00
79              eSport Industries                 1  1.00

[80 rows x 3 columns]


IAS Totals - Summary Statistics

In [961]:
# Counts by IAS Score
ias_counts = df_IAS_tot_small["IAS"].value_counts().sort_index()

print("Instructor Availability Score (IAS) Counts by Score:")
for ias_score in sorted(ias_counts.index):
    count = ias_counts[ias_score]
    print(f"PCS Score {ias_score}: {count}")

# Overall stats (same code as DR Total)
ias_mean = round(df_IAS_tot_small["IAS"].mean(), 2)
ias_median = round(df_IAS_tot_small["IAS"].median(), 2)
ias_std = round(df_IAS_tot_small["IAS"].std(), 2)
print("\nInstructor Availability Score (IAS) Summary Stats (Overall):")
print(f"Mean IAS: {ias_mean}")
print(f"Median IAS: {ias_median}")
print(f"Standard Deviation: {ias_std}")

# IAS min/max and course name
ias_min = round(df_IAS_tot_small["IAS"].min(), 2)
ias_max = round(df_IAS_tot_small["IAS"].max(), 2)
ias_course_min = df_IAS_tot_small.loc[df_IAS_tot_small["IAS"] == ias_min, "Course Description"].tolist() #Course name(s) as list
ias_course_max = df_IAS_tot_small.loc[df_IAS_tot_small["IAS"] == ias_max, "Course Description"].tolist()

print(f"Min IAS: {ias_min} — Courses: {ias_course_min}")
print(f"Max IAS: {ias_max} — Courses: {ias_course_max}")

# Top 5 highest and lowest IAS courses
top_ias = df_IAS_tot_small.nlargest(5, "IAS")
bottom_ias = df_IAS_tot_small.nsmallest(5, "IAS")

print("\n5 Highest IAS Courses:")
print(top_ias)
print("\n5 Lowest IAS Courses:")
print(bottom_ias)


Instructor Availability Score (IAS) Counts by Score:
PCS Score 0.14: 1
PCS Score 0.25: 6
PCS Score 0.33: 7
PCS Score 0.5: 28
PCS Score 1.0: 38

Instructor Availability Score (IAS) Summary Stats (Overall):
Mean IAS: 0.7
Median IAS: 0.5
Standard Deviation: 0.3
Min IAS: 0.14 — Courses: ['Intro to Machine Learning']
Max IAS: 1.0 — Courses: ['Algorithms for Games', 'Applied Cyberinfrastruct Conc', 'Applied NLP', 'Bayesian Modeling & Inference', 'Collaborating: Online Commun', 'Computational Social Science', 'Data Ethics', 'Database Dev and Mgmt', 'Designing an Installation', 'Dig Games and Society', 'Digital Commerce', 'Digital Crime & Social Media', 'Disruptive Technologies', 'Esports Casting', 'Foundation of Info & Inference', 'Game AI', 'Game Development', 'Gamification in Society', 'Government Information', 'Great Ideas of the Info Age', 'Hacking & Open Source Culture', 'Information Security', 'Instructional Technologies', 'Intro to Data Science', 'Intro to Info Tech', 'Monetizing Indep

IAS by Term

In [900]:
df_IAS_term = df_IAS_all.copy()

# Empty column for Instructor Count
df_IAS_term["Instructor Count"] = None

# Group by Course Description; count unique Pseudonyms
instructor_counts2 = df_IAS_term.groupby(["Term", "Course Description"])["Pseudonym"].nunique()

# Fill Instructor Count with counts of unique Pseudonyms by course
df_IAS_term["Instructor Count"] = df_IAS_term.set_index(["Term", "Course Description"]).index.map(instructor_counts2)

# Reorganize: Drop unecessary columns, consolidate to unique Course Descriptions
df_IAS_term.drop(columns=["merge_key", "Instructor Load Factor", "Instructor Role", "Pseudonym"], inplace=True) # drop these - no longer necessary
df_IAS_term = df_IAS_term.groupby(["Term", "Course Description"], as_index=False).first()

# Add IAS column & scores; IAS = 1 / Instructor Count
df_IAS_term["IAS"] = (1 / df_IAS_term["Instructor Count"]).round(2) 

In [968]:
# Sort by Term with semester_order_func
df_IAS_term = df_IAS_term[df_IAS_term["Term"].notna()]
df_IAS_term = df_IAS_term.set_index(["Term", "Course Description"])
df_IAS_term = df_IAS_term.loc[sorted(df_IAS_term.index, key=lambda x: semester_order_func(x[0]))]
df_IAS_term = df_IAS_term.reset_index()

print(df_IAS_term)

            Term          Course Description                   Session  \
0    Spring 2022   Advanced Game Development       Seven Week - Second   
1    Spring 2022         Advanced Web Design  Regular Academic Session   
2    Spring 2022        Algorithms for Games  Regular Academic Session   
3    Spring 2022  Applied Data Visualization  Regular Academic Session   
4    Spring 2022     Artificial Intelligence  Regular Academic Session   
..           ...                         ...                       ...   
413  Spring 2025      The Past and New Media        Seven Week - First   
414  Spring 2025       Theories of New Media  Regular Academic Session   
415  Spring 2025             Virtual Reality        Seven Week - First   
416  Spring 2025     Visual Content Creation  Regular Academic Session   
417  Spring 2025           eSport Industries        Seven Week - First   

    Session Code                        Campus                  Facility  \
0            7W2  University of Ari

In [969]:
# Smaller DF for printing; sort semesters with my function
df_IAS_term_small = df_IAS_term[["Term", "Course Description", "Instructor Count", "IAS"]]

print("IAS by Course Descriptions & Term:")
print(df_IAS_term_small)

IAS by Course Descriptions & Term:
            Term          Course Description  Instructor Count  IAS
0    Spring 2022   Advanced Game Development                 1  1.0
1    Spring 2022         Advanced Web Design                 1  1.0
2    Spring 2022        Algorithms for Games                 1  1.0
3    Spring 2022  Applied Data Visualization                 1  1.0
4    Spring 2022     Artificial Intelligence                 1  1.0
..           ...                         ...               ...  ...
413  Spring 2025      The Past and New Media                 1  1.0
414  Spring 2025       Theories of New Media                 2  0.5
415  Spring 2025             Virtual Reality                 1  1.0
416  Spring 2025     Visual Content Creation                 1  1.0
417  Spring 2025           eSport Industries                 1  1.0

[418 rows x 4 columns]


IAS by Term - Summary Statistics

In [976]:
# For Loop for our summary stats (mean, median, std, min/max)  
for df_semester, group in df_IAS_term_small.groupby("Term"):

    print(f"\nIAS Summary Stats for {df_semester}:") 

    # Summary stats (mean, median, std)
    ias_mean = round(group["IAS"].mean(), 2)
    ias_median = round(group["IAS"].median(), 2)
    ias_std = round(group["IAS"].std(), 2)
    print(f"Mean IAS: {ias_mean}")
    print(f"Median IAS: {ias_median}")
    print(f"Standard Deviation: {ias_std}")

    # Counts by IAS score within this term
    ias_counts = group["IAS"].value_counts().sort_index()
    print("IAS Score Counts:")
    for ias_score in sorted(ias_counts.index):
        count = ias_counts[ias_score]
        print(f"  IAS {ias_score}: {count} courses")


IAS Summary Stats for Fall 2022:
Mean IAS: 0.95
Median IAS: 1.0
Standard Deviation: 0.16
IAS Score Counts:
  IAS 0.5: 6 courses
  IAS 1.0: 49 courses

IAS Summary Stats for Fall 2023:
Mean IAS: 0.93
Median IAS: 1.0
Standard Deviation: 0.17
IAS Score Counts:
  IAS 0.5: 8 courses
  IAS 1.0: 52 courses

IAS Summary Stats for Fall 2024:
Mean IAS: 0.91
Median IAS: 1.0
Standard Deviation: 0.2
IAS Score Counts:
  IAS 0.33: 1 courses
  IAS 0.5: 9 courses
  IAS 1.0: 50 courses

IAS Summary Stats for Spring 2022:
Mean IAS: 0.92
Median IAS: 1.0
Standard Deviation: 0.18
IAS Score Counts:
  IAS 0.5: 9 courses
  IAS 1.0: 49 courses

IAS Summary Stats for Spring 2023:
Mean IAS: 0.95
Median IAS: 1.0
Standard Deviation: 0.15
IAS Score Counts:
  IAS 0.5: 6 courses
  IAS 1.0: 56 courses

IAS Summary Stats for Spring 2024:
Mean IAS: 0.93
Median IAS: 1.0
Standard Deviation: 0.18
IAS Score Counts:
  IAS 0.33: 1 courses
  IAS 0.5: 7 courses
  IAS 1.0: 52 courses

IAS Summary Stats for Spring 2025:
Mean IAS:

<h2>Composite Bottleneck Index (CBI) Data Frames</h2>

Make CBI Data Frame - Total

In [None]:
# CBI Total Data Frame - Merge on "Course Description"
df_CBI = df_avg_DR.merge(df_OFS_small, on= "Course Description", how= "outer")
df_CBI = df_CBI.merge(df_IAS_tot_small, on="Course Description", how= "outer")
df_CBI = df_CBI.merge(df_PCS_scores, on= "Course Description", how="outer")

df_CBI.drop(columns=["TermCount", "Instructor Count"], inplace=True, errors="ignore")

print("CBI Data Frame by Totals:")
print(df_CBI)


               Course Description    DR  OFS   IAS  PCS
0       Advanced Game Development  0.30    1  0.50    1
1             Advanced Web Design  0.33    3  0.50    1
2            Algorithms for Games  0.68    2  1.00    1
3   Applied Cyberinfrastruct Conc  0.14    3  1.00    1
4      Applied Data Visualization  0.68    1  0.25    1
..                            ...   ...  ...   ...  ...
76          Theories of New Media  0.49    1  0.33    0
77       User Interf+Website Dsgn  0.06    3  1.00    0
78                Virtual Reality  0.48    1  0.50    1
79        Visual Content Creation  0.52    3  1.00    0
80              eSport Industries  0.70    1  1.00    0

[81 rows x 5 columns]


Make CBI Data Frame - By Term

In [None]:
# CBI By Term Data Frame - Merge on "Course Description" and/or ["Term", "Course Description"]
df_CBI_term = df_term_avg_DR.merge(df_OFS_small, on= "Course Description", how= "outer")
df_CBI_term = df_CBI_term.merge(df_IAS_term_small, on=["Term", "Course Description"], how= "outer")
df_CBI_term = df_CBI_term.merge(df_PCS_scores, on= "Course Description", how="outer")

# Drop unnecessary columns
df_CBI_term.drop(columns=["TermCount", "Instructor Count"], inplace=True, errors="ignore")   # Drop exta columns

# print("Testing CBI Data Frame by Term:")
# print(df_CBI_term) # 482 rows

            Term         Course Description    DR  OFS  IAS  PCS
0      Fall 2022  Advanced Game Development  0.29  1.0  1.0    1
1      Fall 2023  Advanced Game Development   NaN  NaN  1.0    1
2      Fall 2024  Advanced Game Development  0.18  1.0  1.0    1
3    Spring 2022  Advanced Game Development  0.50  1.0  1.0    1
4    Spring 2023  Advanced Game Development  0.32  1.0  1.0    1
..           ...                        ...   ...  ...  ...  ...
477    Fall 2024          eSport Industries  0.74  1.0  1.0    0
478  Spring 2022          eSport Industries  0.97  1.0  1.0    0
479  Spring 2023          eSport Industries  0.49  1.0  1.0    0
480  Spring 2024          eSport Industries  0.50  1.0  1.0    0
481  Spring 2025          eSport Industries  0.50  1.0  1.0    0

[482 rows x 6 columns]


In [None]:
# Sort Terms with my semester_order_func function
df_CBI_term = df_CBI_term[df_CBI_term["Term"].notna()]              # Some of the Terms are loading in as Missing (dropped 2 rows)
df_CBI_term = df_CBI_term.set_index(["Term", "Course Description"]) # Set index to group by
df_CBI_term = df_CBI_term.loc[sorted(df_CBI_term.index, key=lambda x: semester_order_func(x[0]))] # Multi-Index sort function
df_CBI_term = df_CBI_term.reset_index()

print("CBI Data Frame by Term:")
print(df_CBI_term)

            Term             Course Description    DR  OFS  IAS  PCS
0      Fall 2021            Advanced Web Design  0.26  3.0  NaN    1
1      Fall 2021           Algorithms for Games  0.52  2.0  NaN    1
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22  3.0  NaN    1
3      Fall 2021     Applied Data Visualization  0.72  1.0  NaN    1
4      Fall 2021  Bayesian Modeling & Inference  0.09  3.0  NaN    1
..           ...                            ...   ...  ...  ...  ...
474  Spring 2025         The Past and New Media  0.34  1.0  1.0    0
475  Spring 2025          Theories of New Media  0.32  1.0  0.5    0
476  Spring 2025                Virtual Reality  0.46  1.0  1.0    1
477  Spring 2025        Visual Content Creation  0.52  3.0  1.0    0
478  Spring 2025              eSport Industries  0.50  1.0  1.0    0

[479 rows x 6 columns]


CBI Total - Weights

In [912]:
weights_tot = {"DR": 1, "OFS": 1,
               "IAS": 1, "PCS": 1}

CBI by Term - Weights

In [911]:
weights_term = {"DR": 1, "OFS": 1,
               "IAS": 1, "PCS": 1}

<h2>Composite Bottleneck Index (CBI) Analysis </h2>
CBI = (DR * w1) + (OFS * w2) + (IAS * w3) + (PCS * w4)

CBI - Total

In [917]:
df_CBI_analysis = df_CBI.copy()

# Add CBI column
# CBI = (DR * w1) + (OFS * w2) + (IAS * w3) + (PCS * w4)
df_CBI_analysis["CBI"] = ((df_CBI_analysis["DR"]  * weights_tot["DR"] +
                          df_CBI_analysis["OFS"] * weights_tot["OFS"] +
                          df_CBI_analysis["IAS"] * weights_tot["IAS"] +
                          df_CBI_analysis["PCS"] * weights_tot["PCS"]).round(2))

print("CBI Analysis by Totals, All Scores")
print(df_CBI_analysis)

print("\nCBI Analysis by Totals")
print(df_CBI_analysis[["Course Description", "CBI"]])

CBI Analysis by Totals, All Scores
               Course Description    DR  OFS   IAS  PCS   CBI
0       Advanced Game Development  0.30    1  0.50    1  2.80
1             Advanced Web Design  0.33    3  0.50    1  4.83
2            Algorithms for Games  0.68    2  1.00    1  4.68
3   Applied Cyberinfrastruct Conc  0.14    3  1.00    1  5.14
4      Applied Data Visualization  0.68    1  0.25    1  2.93
..                            ...   ...  ...   ...  ...   ...
76          Theories of New Media  0.49    1  0.33    0  1.82
77       User Interf+Website Dsgn  0.06    3  1.00    0  4.06
78                Virtual Reality  0.48    1  0.50    1  2.98
79        Visual Content Creation  0.52    3  1.00    0  4.52
80              eSport Industries  0.70    1  1.00    0  2.70

[81 rows x 6 columns]

CBI Analysis by Totals
               Course Description   CBI
0       Advanced Game Development  2.80
1             Advanced Web Design  4.83
2            Algorithms for Games  4.68
3   Applied Cy

CBI Total - Summary Stats

In [983]:
# Overall stats cbi_tot
cbi_tot_mean = round(df_CBI_analysis["CBI"].mean(), 2)
cbi_tot_median = round(df_CBI_analysis["CBI"].median(), 2)
cbi_tot_std = round(df_CBI_analysis["CBI"].std(), 2)

print("CBI Summary Stats (Overall):")
print(f"Mean CBI: {cbi_tot_mean}")
print(f"Median CBI: {cbi_tot_median}")
print(f"Standard Deviation: {cbi_tot_std}")


# DR min/max and course name
cbi_tot_min = round(df_CBI_analysis["CBI"].min(), 2)
cbi_tot_max = round(df_CBI_analysis["CBI"].max(), 2)
cbi_tot_course_min = df_CBI_analysis.loc[df_CBI_analysis["CBI"] == cbi_tot_min, "Course Description"].tolist() #Course name(s) as list
cbi_tot_course_max = df_CBI_analysis.loc[df_CBI_analysis["CBI"] == cbi_tot_max, "Course Description"].tolist()

print(f"Min CBI: {cbi_tot_min} — Courses: {cbi_tot_course_min}")
print(f"Max CBI: {cbi_tot_max} — Courses: {cbi_tot_course_max}")

# Top highest and lowest DR courses
high_low_count = 10
top_cbi_tot = df_CBI_analysis.nlargest(high_low_count, "CBI")
bottom_cbi_tot = df_CBI_analysis.nsmallest(high_low_count, "CBI")

print(f"\n{high_low_count} Highest CBI Courses:")
#print(top_cbi_tot)
print(top_cbi_tot[["Course Description", "CBI"]])
print(f"\n{high_low_count} Lowest CBI Courses:")
#print(bottom_cbi_tot)
print(bottom_cbi_tot[["Course Description", "CBI"]])

CBI Summary Stats (Overall):
Mean CBI: 3.19
Median CBI: 3.01
Standard Deviation: 1.04
Min CBI: 1.66 — Courses: ['Intellectual Property/Copyrigh']
Max CBI: 5.63 — Courses: ['Foundation of Info & Inference']

10 Highest CBI Courses:
                Course Description   CBI
32  Foundation of Info & Inference  5.63
46        Intro to Creative Coding  5.43
33                         Game AI  5.41
3    Applied Cyberinfrastruct Conc  5.14
65                      STEM Games  5.05
7    Bayesian Modeling & Inference  5.03
10    Computational Social Science  5.02
1              Advanced Web Design  4.83
2             Algorithms for Games  4.68
55   Monetizing Independent Gaming  4.60

10 Lowest CBI Courses:
                Course Description   CBI
44  Intellectual Property/Copyrigh  1.66
23                Digital Dilemmas  1.74
26  Digital Storytelling & Culture  1.82
76           Theories of New Media  1.82
62   Publishing:Papyrus to E-Books  1.91
63   Qualitative Internet Research  1.95
31     

In [920]:
df_CBI_term_analysis = df_CBI_term.copy()

df_CBI_term_analysis["CBI"] = (df_CBI_term_analysis["DR"]  * weights_tot["DR"] +
                               df_CBI_term_analysis["OFS"] * weights_tot["OFS"] +
                               df_CBI_term_analysis["IAS"] * weights_tot["IAS"] +
                               df_CBI_term_analysis["PCS"] * weights_tot["PCS"])

print("CBI Analysis by Term, All Scores")
print(df_CBI_term_analysis)

print("\nCBI Analysis by Term")
print(df_CBI_term_analysis[["Term", "Course Description", "CBI"]])

CBI Analysis by Term, All Scores
            Term             Course Description    DR  OFS  IAS  PCS   CBI
0      Fall 2021            Advanced Web Design  0.26  3.0  NaN    1   NaN
1      Fall 2021           Algorithms for Games  0.52  2.0  NaN    1   NaN
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22  3.0  NaN    1   NaN
3      Fall 2021     Applied Data Visualization  0.72  1.0  NaN    1   NaN
4      Fall 2021  Bayesian Modeling & Inference  0.09  3.0  NaN    1   NaN
..           ...                            ...   ...  ...  ...  ...   ...
474  Spring 2025         The Past and New Media  0.34  1.0  1.0    0  2.34
475  Spring 2025          Theories of New Media  0.32  1.0  0.5    0  1.82
476  Spring 2025                Virtual Reality  0.46  1.0  1.0    1  3.46
477  Spring 2025        Visual Content Creation  0.52  3.0  1.0    0  4.52
478  Spring 2025              eSport Industries  0.50  1.0  1.0    0  2.50

[479 rows x 7 columns]

CBI Analysis by Term
            Term     

CBI by Term - Summary Statistics

In [None]:
# For Loop for our summary stats (mean, median, std, min/max) and top/bottom courses
for df_semester, group in df_CBI_term_analysis.groupby("Term"):
    # Summary stats (mean, median, std)
    cbi_term_mean = round(group["CBI"].mean(), 2)
    cbi_term_median = round(group["CBI"].median(), 2)
    cbi_term_std = round(group["CBI"].std(), 2)
    print(f"\n🗓 CBI Summary for {df_semester}") # emoji to make it easier to read
    print(f"Mean CBI: {cbi_term_mean}")
    print(f"Median CBI: {cbi_term_median}")
    print(f"Standard Deviation: {cbi_term_std}")

    # Min/Max with course name(s)
    cbi_term_min = round(group["CBI"].min(), 2)
    cbi_term_max = round(group["CBI"].max(), 2)
    cbi_term_course_min = group.loc[group["CBI"] == cbi_term_min, "Course Description"].tolist()
    cbi_term_course_max = group.loc[group["CBI"] == cbi_term_max, "Course Description"].tolist()
    print(f"Min CBI: {dr_min} on Courses: {cbi_term_course_min}")
    print(f"Max CBI: {dr_max} on Courses: {cbi_term_course_max}")

    #Top/Bottom 
    high_low_count = 10
    top_cbi_term = group.nlargest(high_low_count, "CBI")[["Course Description", "CBI"]]
    bottom_cbi_term = group.nsmallest(high_low_count, "CBI")[["Course Description", "CBI"]]
    print(f"\n{df_semester}: {high_low_count} Highest CBI Courses:")
    print(top_cbi_term.to_string(index=False))
    print(f"\n {df_semester}: {high_low_count} Lowest CBI Courses:")
    print(bottom_cbi_term.to_string(index=False))

# note Fall 2021 isn't calculating because of missing data ??


🗓 CBI Summary for Fall 2021
Mean CBI: nan
Median CBI: nan
Standard Deviation: nan
Min CBI: 0.0 on Courses: []
Max CBI: 0.98 on Courses: []

Fall 2021: 10 Highest CBI Courses:
            Course Description  CBI
           Advanced Web Design  NaN
          Algorithms for Games  NaN
 Applied Cyberinfrastruct Conc  NaN
    Applied Data Visualization  NaN
 Bayesian Modeling & Inference  NaN
  Collaborating: Online Commun  NaN
Computational Thinking & Doing  NaN
        Computing and the Arts  NaN
              Data Engineering  NaN
     Data Mining and Discovery  NaN

 Fall 2021: 10 Lowest CBI Courses:
            Course Description  CBI
           Advanced Web Design  NaN
          Algorithms for Games  NaN
 Applied Cyberinfrastruct Conc  NaN
    Applied Data Visualization  NaN
 Bayesian Modeling & Inference  NaN
  Collaborating: Online Commun  NaN
Computational Thinking & Doing  NaN
        Computing and the Arts  NaN
              Data Engineering  NaN
     Data Mining and Discovery  