In [12]:
%reset -f

In [13]:
# Import packages
import os
import pandas as pd

# Import df_all and raw data for IAS & PCS
base_path = os.path.join(os.getcwd(), "CBI Analysis")
df_all = pd.read_excel(os.path.join(base_path, "df_all.xlsx"))
df_IAS_raw = pd.read_excel(os.path.join(base_path, "2022_2025_Faculty Load Analysis - CLEANED.xlsx"))
df_PCS_raw = pd.read_excel(os.path.join(base_path, "Course List w. Pre-Reqs.xlsx"))


Import the Semester Sort Function

In [14]:
# Strategy: label Fall/Spring starting with the same year, to count them as the same academic year
# e.g. Fall 2021 & Spring 2022 will turn into 2021.0 and 2021.5, to group as the 2021 academic year

def semester_order_func(sem):
    season, year = sem.split()
    year = int(year)
    if season == "Fall":
        return year + 0.0           # Fall 2021 turns into 2021.0; Fall 2021 into 2022.0; etc
    elif season == "Spring":
        return (year - 1) + 0.5     # Spring 2022 - 1 + 0.5 = 2021.5 so that it'll follow Fall 2021.0

<h1> Course Bottleneck Index (CBI) KDD </h1>

<h3> Begin with Defining Terms: DR, OFS, IAS, PCS </h3>

<h2> Demand Ratio (DR)</h2>
DR = enrollment / capacity

Data Cleanse for DR, group Co-Convened Courses

In [15]:
df_DR_all = df_all.copy()

# Co-Convened? no: df_n_coconvened, yes: df_y_coconvened
df_n_coconvened = df_DR_all[df_DR_all["Combined Section"] == "-"] # Whether or not value = "-"
df_y_coconvened = df_DR_all[df_DR_all["Combined Section"] != "-"]

# Co-Convened Yes Modifications -->
# Feed to Aggregate Dictionary: 1) grouping columns; 2) sum columns; 3) all remaining columns, keep "first" value
cc_group_cols = ["Term", "Combined Section"]           # same "term" & "combined section" -> yes co-convened
cc_sum_cols = ["Total Enroll", "Enrollment Capacity"]  # sum Enroll & Capacity if co-convened
cc_first_cols = [col for col in df_y_coconvened.columns if col not in cc_group_cols + cc_sum_cols] # all other cols, keep first value

# Build the aggregate dictionary for Co-Convened Yes
cc_agg_dict = {col: "sum" for col in cc_sum_cols}
cc_agg_dict.update({col: "first" for col in cc_first_cols})

# Co-Convened Yes: Group and aggregate
df_y_coconvened = df_y_coconvened.groupby(cc_group_cols, as_index=False).agg(cc_agg_dict)

# Concatenate the y/n Co-Convened DFs together
df_DR_all = pd.concat([df_n_coconvened, df_y_coconvened], ignore_index=True)

print("df_DR_all had",df_all.shape[0], "rows before consolidating the co-convened classes")
print("\ndf_DR_all has",df_DR_all.shape[0], "rows after consolidating the co-convened classes")

df_DR_all had 1559 rows before consolidating the co-convened classes

df_DR_all has 1047 rows after consolidating the co-convened classes


DR for every class

In [16]:
#DR for Every Individual Class
df_DR_all["DR"] = (df_DR_all["Total Enroll"] / df_DR_all["Enrollment Capacity"]).round(2)

Finalize data cleansing for df_DR_all

In [17]:
df_DR_missing = df_DR_all[df_DR_all["DR"].isna()]
# All of them are classes with no enrollments & no capacity

# Drop the missing values
df_DR_all = df_DR_all.dropna(subset=["DR"])

# Drop the 2 (discussion) classes that had Enrollments but didn't have Enrollment Capacity (DR = "inf")
# Statistic Foundations Info Age & Dealing with Data
df_DR_all = df_DR_all[df_DR_all["DR"].astype(str) != "inf"]

# df_DR_all is ready to go!


DR: Total by Course

In [18]:
df_avg_DR = (df_DR_all.groupby("Course Description", as_index=False)["DR"].mean()).round(2)
print("Average DR by Course:")
print(df_avg_DR)

Average DR by Course:
               Course Description    DR
0       Advanced Game Development  0.30
1             Advanced Web Design  0.33
2            Algorithms for Games  0.68
3   Applied Cyberinfrastruct Conc  0.14
4      Applied Data Visualization  0.68
..                            ...   ...
73          Theories of New Media  0.49
74       User Interf+Website Dsgn  0.06
75                Virtual Reality  0.48
76        Visual Content Creation  0.52
77              eSport Industries  0.70

[78 rows x 2 columns]


DR Total - Summary Stats

In [19]:
# Overall stats
dr_mean = round(df_avg_DR["DR"].mean(), 2)
dr_median = round(df_avg_DR["DR"].median(), 2)
dr_std = round(df_avg_DR["DR"].std(), 2)

print("DR Summary Statistics (Overall):")
print(f"Mean DR: {dr_mean}")
print(f"Median DR: {dr_median}")
print(f"Standard Deviation: {dr_std}")


# DR min/max and course name
dr_min = round(df_avg_DR["DR"].min(), 2)
dr_max = round(df_avg_DR["DR"].max(), 2)
course_min = df_avg_DR.loc[df_avg_DR["DR"] == dr_min, "Course Description"].tolist() #Course name(s) as list
course_max = df_avg_DR.loc[df_avg_DR["DR"] == dr_max, "Course Description"].tolist()

print(f"Min DR: {dr_min} — Courses: {course_min}")
print(f"Max DR: {dr_max} — Courses: {course_max}")

# Top 10 highest and lowest DR courses
top_dr = df_avg_DR.nlargest(10, "DR")
bottom_dr = df_avg_DR.nsmallest(10, "DR")

print("\n10 Highest DR Courses:")
print(top_dr)
print("\n10 Lowest DR Courses:")
print(bottom_dr)


DR Summary Statistics (Overall):
Mean DR: 0.48
Median DR: 0.53
Standard Deviation: 0.24
Min DR: 0.02 — Courses: ['Computational Social Science', 'Data Ethics']
Max DR: 0.93 — Courses: ['Intro to Creative Coding']

10 Highest DR Courses:
                Course Description    DR
46        Intro to Creative Coding  0.93
22    Digital Crime & Social Media  0.91
42             Installation Design  0.87
12          Computing and the Arts  0.85
11  Computational Thinking & Doing  0.84
52  Intro: Human Computer Interact  0.77
53     Introduction to Game Design  0.76
31       Ethics in a Digital World  0.75
67      Social Media and Ourselves  0.75
70  Statistic Foundations Info Age  0.72

10 Lowest DR Courses:
               Course Description    DR
10   Computational Social Science  0.02
14                    Data Ethics  0.02
7   Bayesian Modeling & Inference  0.03
30  Ethical Issues in Information  0.03
64                     STEM Games  0.05
8            Business Information  0.06
74       

DR: By Term & Course

In [20]:
df_term_avg_DR = (df_DR_all.groupby(["Term", "Course Description"], as_index=False)["DR"].mean()).round(2)

# Sort by Term
df_term_avg_DR = df_term_avg_DR.set_index(["Term", "Course Description"])
df_term_avg_DR = df_term_avg_DR.loc[sorted(df_term_avg_DR.index, key=lambda x: semester_order_func(x[0]))]
df_term_avg_DR = df_term_avg_DR.reset_index()

print("Average DR by Course & Term:")
print(df_term_avg_DR)

Average DR by Course & Term:
            Term             Course Description    DR
0      Fall 2021            Advanced Web Design  0.26
1      Fall 2021           Algorithms for Games  0.52
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22
3      Fall 2021     Applied Data Visualization  0.72
4      Fall 2021  Bayesian Modeling & Inference  0.09
..           ...                            ...   ...
460  Spring 2025         The Past and New Media  0.34
461  Spring 2025          Theories of New Media  0.32
462  Spring 2025                Virtual Reality  0.46
463  Spring 2025        Visual Content Creation  0.52
464  Spring 2025              eSport Industries  0.50

[465 rows x 3 columns]


DR by Term - Summary Stats

In [21]:
# For Loop for our summary stats (mean, median, std, min/max) and top/bottom 5 courses
for df_semester, group in df_term_avg_DR.groupby("Term"):
    # Summary stats (mean, median, std)
    dr_mean = round(group["DR"].mean(), 2)
    dr_median = round(group["DR"].median(), 2)
    dr_std = round(group["DR"].std(), 2)
    print(f"\n🗓 DR Summary for {df_semester}") # emoji to make it easier to read
    print(f"Mean DR: {dr_mean}")
    print(f"Median DR: {dr_median}")
    print(f"Standard Deviation: {dr_std}")

    # Min/Max and course name(s)
    dr_min = round(group["DR"].min(), 2)
    dr_max = round(group["DR"].max(), 2)
    course_min = group.loc[group["DR"] == dr_min, "Course Description"].tolist()
    course_max = group.loc[group["DR"] == dr_max, "Course Description"].tolist()
    print(f"Min DR: {dr_min} on Courses: {course_min}")
    print(f"Max DR: {dr_max} on Courses: {course_max}")

    #Top/Bottom 5
    top_5 = group.nlargest(5, "DR")[["Course Description", "DR"]]
    bottom_5 = group.nsmallest(5, "DR")[["Course Description", "DR"]]
    print(f"\n{df_semester}: 5 Highest DR Courses:")
    print(top_5.to_string(index=False))
    print(f"\n {df_semester}: 5 Lowest DR Courses:")
    print(bottom_5.to_string(index=False))



🗓 DR Summary for Fall 2021
Mean DR: 0.59
Median DR: 0.61
Standard Deviation: 0.28
Min DR: 0.03 on Courses: ['Ethical Issues in Information']
Max DR: 1.02 on Courses: ['eSport Industries']

Fall 2021: 5 Highest DR Courses:
          Course Description   DR
           eSport Industries 1.02
 Great Ideas of the Info Age 1.01
Digital Crime & Social Media 0.96
  Social Media and Ourselves 0.96
    Intro to Creative Coding 0.95

 Fall 2021: 5 Lowest DR Courses:
           Course Description   DR
Ethical Issues in Information 0.03
Bayesian Modeling & Inference 0.09
     User Interf+Website Dsgn 0.09
       Government Information 0.12
               Special Topics 0.15

🗓 DR Summary for Fall 2022
Mean DR: 0.59
Median DR: 0.64
Standard Deviation: 0.27
Min DR: 0.04 on Courses: ['Ethical Issues in Information', 'Science Information']
Max DR: 0.98 on Courses: ['Diversity and Bias in Games', 'eSport Industries']

Fall 2022: 5 Highest DR Courses:
          Course Description   DR
 Diversity and Bia

<h2>Offering Frequency Score (OFS)</h2>
Captures how often a course if offered: 1: every semester, 2: once a year, 3: less than once a year

Total Courses by "Course Description"

In [22]:
# Total Courses by "Course Description"
course_totals = df_all.groupby("Course Description").size().sort_values(ascending=False)
print("Total Classes by Course Description:")
print(course_totals)

Total Classes by Course Description:
Course Description
Statistic Foundations Info Age    77
Digital Storytelling & Culture    74
Computational Thinking & Doing    72
Social Media and Ourselves        50
Dealing with Data                 49
                                  ..
Foundation of Info & Inference     2
Game AI                            2
Designing an Installation          2
Natural Language Processing        1
Visual Content Creation            1
Length: 81, dtype: int64


Total Courses by Term

In [23]:
course_total_by_semester = df_all.groupby(["Course Description", "Term"]).size().unstack(fill_value=0)
course_total_by_semester = course_total_by_semester[sorted(course_total_by_semester.columns, key=semester_order_func)]
course_total_by_semester = course_total_by_semester.reset_index()   #Include Course Description as a column, was being dropped

print("Total Classes by Semester:")
print(course_total_by_semester)

Total Classes by Semester:
Term             Course Description  Fall 2021  Spring 2022  Fall 2022  \
0         Advanced Game Development          0            1          1   
1               Advanced Web Design          2            2          0   
2              Algorithms for Games          1            1          1   
3     Applied Cyberinfrastruct Conc          6            0          0   
4        Applied Data Visualization          2            2          2   
..                              ...        ...          ...        ...   
76            Theories of New Media          8            6          4   
77         User Interf+Website Dsgn          2            2          0   
78                  Virtual Reality          1            1          1   
79          Visual Content Creation          0            0          0   
80                eSport Industries          2            4          2   

Term  Spring 2023  Fall 2023  Spring 2024  Fall 2024  Spring 2025  
0               

Function: Get OFS Score (get_OFS)

1: Every Semester. 2: Once a year. 3: Less than once a year.

In [24]:
df_OFS = course_total_by_semester.copy()

# Specify Col Names because was having issues with indexing 
semester_cols = ["Fall 2021", "Spring 2022", "Fall 2022", "Spring 2023",
                 "Fall 2023", "Spring 2024", "Fall 2024", "Spring 2025"]

# Add "Term Count" Column: count of Non-Zero values in semester_cols to double check our 1/2/3s
df_OFS["TermCount"] = df_OFS[semester_cols].ne(0).sum(axis=1)

# OFS Function: Assign 1, 2, and 3
def get_OFS(row):
    semester_vals = row[semester_cols].values  # only the 8 columns
    nonzero_count = (semester_vals != 0).sum()

    if nonzero_count >= 7:    # 1, Every semester: at least 7 of the last 8 semesters
        return 1
    elif nonzero_count <= 3:  # 3, Less than once a year: if offered 3 semesters or fewer
        return 3
    else:                     # 2, Every other semester: if offered 4-6 semesters
        return 2



Calculate & Print OFS Scores

In [25]:
# Run our get_OFS to get 1/2/3 OFS scores
df_OFS["OFS"] = df_OFS.apply(get_OFS, axis=1)

# Smaller DF for printing: keep just Course Description", "TermCount", & "OFS"
df_OFS_small = df_OFS[["Course Description", "TermCount", "OFS"]]

print("Course Descriptions & Their Offering Frequency Score (OFS):")
print(df_OFS_small)

Course Descriptions & Their Offering Frequency Score (OFS):
Term             Course Description  TermCount  OFS
0         Advanced Game Development          7    1
1               Advanced Web Design          3    3
2              Algorithms for Games          6    2
3     Applied Cyberinfrastruct Conc          2    3
4        Applied Data Visualization          8    1
..                              ...        ...  ...
76            Theories of New Media          8    1
77         User Interf+Website Dsgn          2    3
78                  Virtual Reality          8    1
79          Visual Content Creation          1    3
80                eSport Industries          8    1

[81 rows x 3 columns]


OFS Summary Stats

In [26]:
# Set up DFs for OFSxCounts and OFSxAverage-TermCount
ofs_counts = df_OFS_small["OFS"].value_counts().sort_index()                    # counting the OFS scores
ofs_termcount_avg = df_OFS_small.groupby("OFS")["TermCount"].mean().round(2)    # group by OFS to get average Term Count

print("Offering Frequency Score (OFS) Summary Stats:\n")
for ofs_score in sorted(ofs_counts.index):
    count = ofs_counts[ofs_score]
    avg_terms = ofs_termcount_avg[ofs_score]
    print(f"OFS Score {ofs_score}: {count} total courses with an Average Term Count of {avg_terms}")


Offering Frequency Score (OFS) Summary Stats:

OFS Score 1: 46 total courses with an Average Term Count of 7.91
OFS Score 2: 14 total courses with an Average Term Count of 5.0
OFS Score 3: 21 total courses with an Average Term Count of 2.29


<h2> Prerequisite Complexity Score (PCS) </h2>
0: No Prerequisites. 1: Yes Prerequisites.

Clean PCS dataframe to just be UGrad

In [27]:
# Make a copy to avoid modifying the original
df_PCS_ugrad = df_PCS_raw.copy()

# Temporary Column "Catalog_First_Digit":
# Extract first digit from Catalog # and convert to float (<5 is undergrad)
df_PCS_ugrad["Catalog_First_Digit"] = df_PCS_ugrad["Catalog #"].astype(str).str[0].str.extract(r"(\d)").astype(float)
df_PCS_ugrad = df_PCS_ugrad[df_PCS_ugrad["Catalog_First_Digit"] < 5].copy()
df_PCS_ugrad.drop(columns="Catalog_First_Digit", inplace=True) # Remove temporary column

print(f"Undergrad PCS rows: {df_PCS_ugrad.shape[0]}")

# There are more UGrad class options loaded here than we did in the original cleaned dataset
# My df_all was more filtered than this df_PCS copy --> will just keep the values that match in my df_all

Undergrad PCS rows: 181


PCS: Fill my DF (df_PCS_all, from df_all) with PreReqs from our raw data df_PCS_ugrad

In [28]:
# New DF df_PCS_all from df_all to do our PCS analysis
df_PCS_all = df_all.copy()

# Create empty PCS column to store y/n from df_PCS_ugrad
df_PCS_all["PCS"] = None  # or np.nan if you want

# Create map for matching df_PCS_ugrad's Course Descriptions (raw data) into our working df_PCS_all
PCS_map = (df_PCS_ugrad[["Course Description", "Requirements"]]
    .drop_duplicates("Course Description")              # keep first match Course Description
    .set_index("Course Description")["Requirements"])   # set index to Course Description, get values from Requirements column

# Fill PCS column with matching Requirements
df_PCS_all["PCS"] = df_PCS_all["Course Description"].map(PCS_map)

# Convert Y/- to 1 (yes) & 0 (no)
df_PCS_all["PCS"] = (df_PCS_all["PCS"] == "Y").astype(int)


PCS scores

In [29]:
df_PCS_scores = df_PCS_all.groupby("Course Description", as_index=False)["PCS"].max()

print(df_PCS_scores[["Course Description", "PCS"]])


               Course Description  PCS
0       Advanced Game Development    1
1             Advanced Web Design    1
2            Algorithms for Games    1
3   Applied Cyberinfrastruct Conc    1
4      Applied Data Visualization    1
..                            ...  ...
76          Theories of New Media    0
77       User Interf+Website Dsgn    0
78                Virtual Reality    1
79        Visual Content Creation    0
80              eSport Industries    0

[81 rows x 2 columns]


Check which classes dropped from DR (78 vs 81)

In [30]:
dropped_courses = set(df_PCS_scores["Course Description"]) - set(df_avg_DR["Course Description"])
print("Courses without a DR:")
print(dropped_courses)


Courses without a DR:
{'Simulation and Problem Solving', 'Special Topics in LIS', 'Natural Language Processing'}


PCS summary stats

In [31]:
# Set up DF for PCSxCounts 
pcs_counts = df_PCS_scores["PCS"].value_counts().sort_index()

print("Prerequisite Complexity Score (PCS) Summary Stats:")
for pcs_score in sorted(pcs_counts.index):
    count = pcs_counts[pcs_score]
    print(f"PCS Score {pcs_score}: {count}")


Prerequisite Complexity Score (PCS) Summary Stats:
PCS Score 0: 50
PCS Score 1: 31


<h2> Instructor Availability Score (IAS)</h2>
IAS = 1/[number of instructors]

Data Cleanse df_IAS_raw a bit

In [32]:
df_IAS_raw_ugrad = df_IAS_raw.copy()

# Keep specific sessions
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Session"].isin(["Regular Academic Session", 
    "Seven Week - First", "Seven Week - Second"])]

# Keep specific campuses
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Class Campus"].isin(["University of Arizona - Main", "Arizona Online"])]

# Filter for ugrad: first digit <5 is ugrad
df_IAS_raw_ugrad["Catalog_First_Digit"] = df_IAS_raw_ugrad["Catalog Number"].astype(str).str[0].str.extract(r"(\d)").astype(float)
df_IAS_raw_ugrad = df_IAS_raw_ugrad[df_IAS_raw_ugrad["Catalog_First_Digit"] < 5].copy()
df_IAS_raw_ugrad.drop(columns="Catalog_First_Digit", inplace=True) # Remove temporary column

Merge the df_IAS_raw_ugrad with what I'll use

In [33]:
df_IAS_all = df_all.copy()

# If these 4 cols match, we can assume it's the same class in both DFs
merge_cols = ["Term", "Session", "Subject Code", "Catalog Number"]

# Merge Keys: Create Unique Identifier from the 4 columns above
df_IAS_all["merge_key"] = df_IAS_all[merge_cols].astype(str).agg("_".join, axis=1)
df_IAS_raw_ugrad["merge_key"] = df_IAS_raw_ugrad[merge_cols].astype(str).agg("_".join, axis=1)

pseudo_lookup = (df_IAS_raw_ugrad[["merge_key", "Psuedonymn"]]
    .drop_duplicates("merge_key")  # keep first match
    .set_index("merge_key")["Psuedonymn"])
load_lookup = (df_IAS_raw_ugrad[["merge_key", "Instructor Load Factor"]]
    .drop_duplicates("merge_key")
    .set_index("merge_key")["Instructor Load Factor"])

role_lookup = (df_IAS_raw_ugrad[["merge_key", "Instructor Role"]]
    .drop_duplicates("merge_key")
    .set_index("merge_key")["Instructor Role"])

df_IAS_all["Instructor Load Factor"] = df_IAS_all["merge_key"].map(load_lookup)
df_IAS_all["Instructor Role"] = df_IAS_all["merge_key"].map(role_lookup)


df_IAS_all["Pseudonym"] = df_IAS_all["merge_key"].map(pseudo_lookup)
df_IAS_all["Instructor Load Factor"] = df_IAS_all["merge_key"].map(load_lookup)
df_IAS_all["Instructor Role"] = df_IAS_all["merge_key"].map(role_lookup)

Explore missing data
?? need Course ID

In [34]:
df_IAS_missing = df_IAS_all.copy()

df_IAS_missing = df_IAS_all[df_IAS_all[["Instructor Load Factor", "Instructor Role", "Pseudonym"]].isna().any(axis=1)].copy()

# 204 rows / 13% of our data is missing "Instructor Load Factor", "Instructor Role", "Pseudonym"
# Error Source was not my data cleansing - same 204 count either way

?? temporarily removing missing values to continue with code

In [35]:
df_IAS_all = df_IAS_all.dropna(subset=["Instructor Load Factor", "Instructor Role", "Pseudonym"])

In [36]:
df_IAS_tot = df_IAS_all.copy()

# Group by Course Description; count unique Pseudonyms
instructor_counts = df_IAS_tot.groupby("Course Description")["Pseudonym"].nunique()

# Create new DataFrame with Course Description, Instructor Count, and IAS
df_IAS_tot = instructor_counts.reset_index()
df_IAS_tot.rename(columns={"Pseudonym": "Instructor Count"}, inplace=True)
df_IAS_tot["IAS"] = (1 / df_IAS_tot["Instructor Count"]).round(2)


IAS by Total:

In [37]:
df_IAS_tot = df_IAS_all.copy()

# Empty column for Instructor Count
df_IAS_tot["Instructor Count"] = None

# Group by Course Description; count unique Pseudonyms
instructor_counts = df_IAS_tot.groupby("Course Description")["Pseudonym"].nunique()

# Fill Instructor Count with counts of unique Pseudonyms by course
df_IAS_tot["Instructor Count"] = df_IAS_tot["Course Description"].map(instructor_counts)

# Reorganize: Drop unecessary columns, consolidate to unique Course Descriptions
df_IAS_tot.drop(columns=["merge_key", "Instructor Load Factor", "Instructor Role", "Pseudonym"], inplace=True) # drop these - no longer necessary
df_IAS_tot = df_IAS_tot.groupby("Course Description", as_index=False).first()

# Add IAS column & scores; IAS = 1 / Instructor Count
df_IAS_tot["IAS"] = (1 / df_IAS_tot["Instructor Count"]).round(2) 

In [38]:
# Smaller DF for printing
df_IAS_tot_small = df_IAS_tot[["Course Description", "Instructor Count", "IAS"]]

print("Course Descriptions & Their Instructor Availability Score (IAS):")
print(df_IAS_tot_small)

Course Descriptions & Their Instructor Availability Score (IAS):
               Course Description  Instructor Count   IAS
0       Advanced Game Development                 2  0.50
1             Advanced Web Design                 2  0.50
2            Algorithms for Games                 1  1.00
3   Applied Cyberinfrastruct Conc                 1  1.00
4      Applied Data Visualization                 4  0.25
..                            ...               ...   ...
75          Theories of New Media                 3  0.33
76       User Interf+Website Dsgn                 1  1.00
77                Virtual Reality                 2  0.50
78        Visual Content Creation                 1  1.00
79              eSport Industries                 1  1.00

[80 rows x 3 columns]


IAS Totals - Summary Statistics

In [39]:
# Counts by IAS Score
ias_counts = df_IAS_tot_small["IAS"].value_counts().sort_index()

print("Instructor Availability Score (IAS) Counts by Score:")
for ias_score in sorted(ias_counts.index):
    count = ias_counts[ias_score]
    print(f"PCS Score {ias_score}: {count}")

# Overall stats (same code as DR Total)
ias_mean = round(df_IAS_tot_small["IAS"].mean(), 2)
ias_median = round(df_IAS_tot_small["IAS"].median(), 2)
ias_std = round(df_IAS_tot_small["IAS"].std(), 2)
print("\nInstructor Availability Score (IAS) Summary Stats (Overall):")
print(f"Mean IAS: {ias_mean}")
print(f"Median IAS: {ias_median}")
print(f"Standard Deviation: {ias_std}")

# IAS min/max and course name
ias_min = round(df_IAS_tot_small["IAS"].min(), 2)
ias_max = round(df_IAS_tot_small["IAS"].max(), 2)
ias_course_min = df_IAS_tot_small.loc[df_IAS_tot_small["IAS"] == ias_min, "Course Description"].tolist() #Course name(s) as list
ias_course_max = df_IAS_tot_small.loc[df_IAS_tot_small["IAS"] == ias_max, "Course Description"].tolist()

print(f"Min IAS: {ias_min} — Courses: {ias_course_min}")
print(f"Max IAS: {ias_max} — Courses: {ias_course_max}")

# Top 5 highest and lowest IAS courses
top_ias = df_IAS_tot_small.nlargest(5, "IAS")
bottom_ias = df_IAS_tot_small.nsmallest(5, "IAS")

print("\n5 Highest IAS Courses:")
print(top_ias)
print("\n5 Lowest IAS Courses:")
print(bottom_ias)


Instructor Availability Score (IAS) Counts by Score:
PCS Score 0.14: 1
PCS Score 0.25: 6
PCS Score 0.33: 7
PCS Score 0.5: 28
PCS Score 1.0: 38

Instructor Availability Score (IAS) Summary Stats (Overall):
Mean IAS: 0.7
Median IAS: 0.5
Standard Deviation: 0.3
Min IAS: 0.14 — Courses: ['Intro to Machine Learning']
Max IAS: 1.0 — Courses: ['Algorithms for Games', 'Applied Cyberinfrastruct Conc', 'Applied NLP', 'Bayesian Modeling & Inference', 'Collaborating: Online Commun', 'Computational Social Science', 'Data Ethics', 'Database Dev and Mgmt', 'Designing an Installation', 'Dig Games and Society', 'Digital Commerce', 'Digital Crime & Social Media', 'Disruptive Technologies', 'Esports Casting', 'Foundation of Info & Inference', 'Game AI', 'Game Development', 'Gamification in Society', 'Government Information', 'Great Ideas of the Info Age', 'Hacking & Open Source Culture', 'Information Security', 'Instructional Technologies', 'Intro to Data Science', 'Intro to Info Tech', 'Monetizing Indep

IAS by Term

In [40]:
df_IAS_term = df_IAS_all.copy()

# Empty column for Instructor Count
df_IAS_term["Instructor Count"] = None

# Group by Course Description; count unique Pseudonyms
instructor_counts2 = df_IAS_term.groupby(["Term", "Course Description"])["Pseudonym"].nunique()

# Fill Instructor Count with counts of unique Pseudonyms by course
df_IAS_term["Instructor Count"] = df_IAS_term.set_index(["Term", "Course Description"]).index.map(instructor_counts2)

# Reorganize: Drop unecessary columns, consolidate to unique Course Descriptions
df_IAS_term.drop(columns=["merge_key", "Instructor Load Factor", "Instructor Role", "Pseudonym"], inplace=True) # drop these - no longer necessary
df_IAS_term = df_IAS_term.groupby(["Term", "Course Description"], as_index=False).first()

# Add IAS column & scores; IAS = 1 / Instructor Count
df_IAS_term["IAS"] = (1 / df_IAS_term["Instructor Count"]).round(2) 

In [41]:
# Sort by Term with semester_order_func
df_IAS_term = df_IAS_term[df_IAS_term["Term"].notna()]
df_IAS_term = df_IAS_term.set_index(["Term", "Course Description"])
df_IAS_term = df_IAS_term.loc[sorted(df_IAS_term.index, key=lambda x: semester_order_func(x[0]))]
df_IAS_term = df_IAS_term.reset_index()

print(df_IAS_term)

            Term          Course Description                   Session  \
0    Spring 2022   Advanced Game Development       Seven Week - Second   
1    Spring 2022         Advanced Web Design  Regular Academic Session   
2    Spring 2022        Algorithms for Games  Regular Academic Session   
3    Spring 2022  Applied Data Visualization  Regular Academic Session   
4    Spring 2022     Artificial Intelligence  Regular Academic Session   
..           ...                         ...                       ...   
413  Spring 2025      The Past and New Media        Seven Week - First   
414  Spring 2025       Theories of New Media  Regular Academic Session   
415  Spring 2025             Virtual Reality        Seven Week - First   
416  Spring 2025     Visual Content Creation  Regular Academic Session   
417  Spring 2025           eSport Industries        Seven Week - First   

    Session Code                        Campus                  Facility  \
0            7W2  University of Ari

In [42]:
# Smaller DF for printing; sort semesters with my function
df_IAS_term_small = df_IAS_term[["Term", "Course Description", "Instructor Count", "IAS"]]

print("IAS by Course Descriptions & Term:")
print(df_IAS_term_small)

IAS by Course Descriptions & Term:
            Term          Course Description  Instructor Count  IAS
0    Spring 2022   Advanced Game Development                 1  1.0
1    Spring 2022         Advanced Web Design                 1  1.0
2    Spring 2022        Algorithms for Games                 1  1.0
3    Spring 2022  Applied Data Visualization                 1  1.0
4    Spring 2022     Artificial Intelligence                 1  1.0
..           ...                         ...               ...  ...
413  Spring 2025      The Past and New Media                 1  1.0
414  Spring 2025       Theories of New Media                 2  0.5
415  Spring 2025             Virtual Reality                 1  1.0
416  Spring 2025     Visual Content Creation                 1  1.0
417  Spring 2025           eSport Industries                 1  1.0

[418 rows x 4 columns]


IAS by Term - Summary Statistics

In [43]:
# For Loop for our summary stats (mean, median, std, min/max)  
for df_semester, group in df_IAS_term_small.groupby("Term"):

    print(f"\nIAS Summary Stats for {df_semester}:") 

    # Summary stats (mean, median, std)
    ias_mean = round(group["IAS"].mean(), 2)
    ias_median = round(group["IAS"].median(), 2)
    ias_std = round(group["IAS"].std(), 2)
    print(f"Mean IAS: {ias_mean}")
    print(f"Median IAS: {ias_median}")
    print(f"Standard Deviation: {ias_std}")

    # Counts by IAS score within this term
    ias_counts = group["IAS"].value_counts().sort_index()
    print("IAS Score Counts:")
    for ias_score in sorted(ias_counts.index):
        count = ias_counts[ias_score]
        print(f"  IAS {ias_score}: {count} courses")


IAS Summary Stats for Fall 2022:
Mean IAS: 0.95
Median IAS: 1.0
Standard Deviation: 0.16
IAS Score Counts:
  IAS 0.5: 6 courses
  IAS 1.0: 49 courses

IAS Summary Stats for Fall 2023:
Mean IAS: 0.93
Median IAS: 1.0
Standard Deviation: 0.17
IAS Score Counts:
  IAS 0.5: 8 courses
  IAS 1.0: 52 courses

IAS Summary Stats for Fall 2024:
Mean IAS: 0.91
Median IAS: 1.0
Standard Deviation: 0.2
IAS Score Counts:
  IAS 0.33: 1 courses
  IAS 0.5: 9 courses
  IAS 1.0: 50 courses

IAS Summary Stats for Spring 2022:
Mean IAS: 0.92
Median IAS: 1.0
Standard Deviation: 0.18
IAS Score Counts:
  IAS 0.5: 9 courses
  IAS 1.0: 49 courses

IAS Summary Stats for Spring 2023:
Mean IAS: 0.95
Median IAS: 1.0
Standard Deviation: 0.15
IAS Score Counts:
  IAS 0.5: 6 courses
  IAS 1.0: 56 courses

IAS Summary Stats for Spring 2024:
Mean IAS: 0.93
Median IAS: 1.0
Standard Deviation: 0.18
IAS Score Counts:
  IAS 0.33: 1 courses
  IAS 0.5: 7 courses
  IAS 1.0: 52 courses

IAS Summary Stats for Spring 2025:
Mean IAS:

<h2>Composite Bottleneck Index (CBI) Data Frames</h2>

Make CBI Data Frame - Total

In [44]:
# CBI Total Data Frame - Merge on "Course Description"
df_CBI = df_avg_DR.merge(df_OFS_small, on= "Course Description", how= "outer")
df_CBI = df_CBI.merge(df_IAS_tot_small, on="Course Description", how= "outer")
df_CBI = df_CBI.merge(df_PCS_scores, on= "Course Description", how="outer")

df_CBI.drop(columns=["TermCount", "Instructor Count"], inplace=True, errors="ignore")

print("CBI Data Frame by Totals:")
print(df_CBI)


CBI Data Frame by Totals:
               Course Description    DR  OFS   IAS  PCS
0       Advanced Game Development  0.30    1  0.50    1
1             Advanced Web Design  0.33    3  0.50    1
2            Algorithms for Games  0.68    2  1.00    1
3   Applied Cyberinfrastruct Conc  0.14    3  1.00    1
4      Applied Data Visualization  0.68    1  0.25    1
..                            ...   ...  ...   ...  ...
76          Theories of New Media  0.49    1  0.33    0
77       User Interf+Website Dsgn  0.06    3  1.00    0
78                Virtual Reality  0.48    1  0.50    1
79        Visual Content Creation  0.52    3  1.00    0
80              eSport Industries  0.70    1  1.00    0

[81 rows x 5 columns]


Make CBI Data Frame - By Term

In [45]:
# CBI By Term Data Frame - Merge on "Course Description" and/or ["Term", "Course Description"]
df_CBI_term = df_term_avg_DR.merge(df_OFS_small, on= "Course Description", how= "outer")
df_CBI_term = df_CBI_term.merge(df_IAS_term_small, on=["Term", "Course Description"], how= "outer")
df_CBI_term = df_CBI_term.merge(df_PCS_scores, on= "Course Description", how="outer")

# Drop unnecessary columns
df_CBI_term.drop(columns=["TermCount", "Instructor Count"], inplace=True, errors="ignore")   # Drop exta columns

# print("Testing CBI Data Frame by Term:")
# print(df_CBI_term) # 482 rows

In [46]:
# Sort Terms with my semester_order_func function
df_CBI_term = df_CBI_term[df_CBI_term["Term"].notna()]              # Some of the Terms are loading in as Missing (dropped 2 rows)
df_CBI_term = df_CBI_term.set_index(["Term", "Course Description"]) # Set index to group by
df_CBI_term = df_CBI_term.loc[sorted(df_CBI_term.index, key=lambda x: semester_order_func(x[0]))] # Multi-Index sort function
df_CBI_term = df_CBI_term.reset_index()

print("CBI Data Frame by Term:")
print(df_CBI_term)

CBI Data Frame by Term:
            Term             Course Description    DR  OFS  IAS  PCS
0      Fall 2021            Advanced Web Design  0.26  3.0  NaN    1
1      Fall 2021           Algorithms for Games  0.52  2.0  NaN    1
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22  3.0  NaN    1
3      Fall 2021     Applied Data Visualization  0.72  1.0  NaN    1
4      Fall 2021  Bayesian Modeling & Inference  0.09  3.0  NaN    1
..           ...                            ...   ...  ...  ...  ...
474  Spring 2025         The Past and New Media  0.34  1.0  1.0    0
475  Spring 2025          Theories of New Media  0.32  1.0  0.5    0
476  Spring 2025                Virtual Reality  0.46  1.0  1.0    1
477  Spring 2025        Visual Content Creation  0.52  3.0  1.0    0
478  Spring 2025              eSport Industries  0.50  1.0  1.0    0

[479 rows x 6 columns]


CBI Total - Weights

In [47]:
weights_tot = {"DR": 1, "OFS": 1,
               "IAS": 1, "PCS": 1}

CBI by Term - Weights

In [48]:
weights_term = {"DR": 1, "OFS": 1,
               "IAS": 1, "PCS": 1}

<h2>Composite Bottleneck Index (CBI) Analysis </h2>
CBI = (DR * w1) + (OFS * w2) + (IAS * w3) + (PCS * w4)

CBI - Total

In [49]:
df_CBI_analysis = df_CBI.copy()

# Add CBI column
# CBI = (DR * w1) + (OFS * w2) + (IAS * w3) + (PCS * w4)
df_CBI_analysis["CBI"] = ((df_CBI_analysis["DR"]  * weights_tot["DR"] +
                          df_CBI_analysis["OFS"] * weights_tot["OFS"] +
                          df_CBI_analysis["IAS"] * weights_tot["IAS"] +
                          df_CBI_analysis["PCS"] * weights_tot["PCS"]).round(2))

print("CBI Analysis by Totals, All Scores")
print(df_CBI_analysis)

print("\nCBI Analysis by Totals")
print(df_CBI_analysis[["Course Description", "CBI"]])

CBI Analysis by Totals, All Scores
               Course Description    DR  OFS   IAS  PCS   CBI
0       Advanced Game Development  0.30    1  0.50    1  2.80
1             Advanced Web Design  0.33    3  0.50    1  4.83
2            Algorithms for Games  0.68    2  1.00    1  4.68
3   Applied Cyberinfrastruct Conc  0.14    3  1.00    1  5.14
4      Applied Data Visualization  0.68    1  0.25    1  2.93
..                            ...   ...  ...   ...  ...   ...
76          Theories of New Media  0.49    1  0.33    0  1.82
77       User Interf+Website Dsgn  0.06    3  1.00    0  4.06
78                Virtual Reality  0.48    1  0.50    1  2.98
79        Visual Content Creation  0.52    3  1.00    0  4.52
80              eSport Industries  0.70    1  1.00    0  2.70

[81 rows x 6 columns]

CBI Analysis by Totals
               Course Description   CBI
0       Advanced Game Development  2.80
1             Advanced Web Design  4.83
2            Algorithms for Games  4.68
3   Applied Cy

CBI Total - Summary Stats

In [50]:
# Overall stats cbi_tot
cbi_tot_mean = round(df_CBI_analysis["CBI"].mean(), 2)
cbi_tot_median = round(df_CBI_analysis["CBI"].median(), 2)
cbi_tot_std = round(df_CBI_analysis["CBI"].std(), 2)

print("CBI Summary Stats (Overall):")
print(f"Mean CBI: {cbi_tot_mean}")
print(f"Median CBI: {cbi_tot_median}")
print(f"Standard Deviation: {cbi_tot_std}")


# DR min/max and course name
cbi_tot_min = round(df_CBI_analysis["CBI"].min(), 2)
cbi_tot_max = round(df_CBI_analysis["CBI"].max(), 2)
cbi_tot_course_min = df_CBI_analysis.loc[df_CBI_analysis["CBI"] == cbi_tot_min, "Course Description"].tolist() #Course name(s) as list
cbi_tot_course_max = df_CBI_analysis.loc[df_CBI_analysis["CBI"] == cbi_tot_max, "Course Description"].tolist()

print(f"Min CBI: {cbi_tot_min} — Courses: {cbi_tot_course_min}")
print(f"Max CBI: {cbi_tot_max} — Courses: {cbi_tot_course_max}")

# Top highest and lowest DR courses
high_low_count = 10
top_cbi_tot = df_CBI_analysis.nlargest(high_low_count, "CBI")
bottom_cbi_tot = df_CBI_analysis.nsmallest(high_low_count, "CBI")

print(f"\n{high_low_count} Highest CBI Courses:")
#print(top_cbi_tot)
print(top_cbi_tot[["Course Description", "CBI"]])
print(f"\n{high_low_count} Lowest CBI Courses:")
#print(bottom_cbi_tot)
print(bottom_cbi_tot[["Course Description", "CBI"]])

CBI Summary Stats (Overall):
Mean CBI: 3.19
Median CBI: 3.01
Standard Deviation: 1.04
Min CBI: 1.66 — Courses: ['Intellectual Property/Copyrigh']
Max CBI: 5.63 — Courses: ['Foundation of Info & Inference']

10 Highest CBI Courses:
                Course Description   CBI
32  Foundation of Info & Inference  5.63
46        Intro to Creative Coding  5.43
33                         Game AI  5.41
3    Applied Cyberinfrastruct Conc  5.14
65                      STEM Games  5.05
7    Bayesian Modeling & Inference  5.03
10    Computational Social Science  5.02
1              Advanced Web Design  4.83
2             Algorithms for Games  4.68
55   Monetizing Independent Gaming  4.60

10 Lowest CBI Courses:
                Course Description   CBI
44  Intellectual Property/Copyrigh  1.66
23                Digital Dilemmas  1.74
26  Digital Storytelling & Culture  1.82
76           Theories of New Media  1.82
62   Publishing:Papyrus to E-Books  1.91
63   Qualitative Internet Research  1.95
31     

CBI Term - Analysis

In [51]:
df_CBI_term_analysis = df_CBI_term.copy()

df_CBI_term_analysis["CBI"] = (df_CBI_term_analysis["DR"]  * weights_tot["DR"] +
                               df_CBI_term_analysis["OFS"] * weights_tot["OFS"] +
                               df_CBI_term_analysis["IAS"] * weights_tot["IAS"] +
                               df_CBI_term_analysis["PCS"] * weights_tot["PCS"])

print("CBI Analysis by Term, All Scores")
print(df_CBI_term_analysis)

print("\nCBI Analysis by Term")
print(df_CBI_term_analysis[["Term", "Course Description", "CBI"]])

CBI Analysis by Term, All Scores
            Term             Course Description    DR  OFS  IAS  PCS   CBI
0      Fall 2021            Advanced Web Design  0.26  3.0  NaN    1   NaN
1      Fall 2021           Algorithms for Games  0.52  2.0  NaN    1   NaN
2      Fall 2021  Applied Cyberinfrastruct Conc  0.22  3.0  NaN    1   NaN
3      Fall 2021     Applied Data Visualization  0.72  1.0  NaN    1   NaN
4      Fall 2021  Bayesian Modeling & Inference  0.09  3.0  NaN    1   NaN
..           ...                            ...   ...  ...  ...  ...   ...
474  Spring 2025         The Past and New Media  0.34  1.0  1.0    0  2.34
475  Spring 2025          Theories of New Media  0.32  1.0  0.5    0  1.82
476  Spring 2025                Virtual Reality  0.46  1.0  1.0    1  3.46
477  Spring 2025        Visual Content Creation  0.52  3.0  1.0    0  4.52
478  Spring 2025              eSport Industries  0.50  1.0  1.0    0  2.50

[479 rows x 7 columns]

CBI Analysis by Term
            Term     

CBI Term - Summary Stats

In [52]:
# For Loop for our summary stats (mean, median, std, min/max) and top/bottom courses
for df_semester, group in df_CBI_term_analysis.groupby("Term"):
    # Summary stats (mean, median, std)
    cbi_term_mean = round(group["CBI"].mean(), 2)
    cbi_term_median = round(group["CBI"].median(), 2)
    cbi_term_std = round(group["CBI"].std(), 2)
    print(f"\n🗓 CBI Summary for {df_semester}") # emoji to make it easier to read
    print(f"Mean CBI: {cbi_term_mean}")
    print(f"Median CBI: {cbi_term_median}")
    print(f"Standard Deviation: {cbi_term_std}")

    # Min/Max with course name(s)
    cbi_term_min = round(group["CBI"].min(), 2)
    cbi_term_max = round(group["CBI"].max(), 2)
    cbi_term_course_min = group.loc[group["CBI"] == cbi_term_min, "Course Description"].tolist()
    cbi_term_course_max = group.loc[group["CBI"] == cbi_term_max, "Course Description"].tolist()
    print(f"Min CBI: {dr_min} on Courses: {cbi_term_course_min}")
    print(f"Max CBI: {dr_max} on Courses: {cbi_term_course_max}")

    #Top/Bottom 
    high_low_count = 10
    top_cbi_term = group.nlargest(high_low_count, "CBI")[["Course Description", "CBI"]]
    bottom_cbi_term = group.nsmallest(high_low_count, "CBI")[["Course Description", "CBI"]]
    print(f"\n{df_semester}: {high_low_count} Highest CBI Courses:")
    print(top_cbi_term.to_string(index=False))
    print(f"\n {df_semester}: {high_low_count} Lowest CBI Courses:")
    print(bottom_cbi_term.to_string(index=False))

# note Fall 2021 isn't calculating because of missing data ??


🗓 CBI Summary for Fall 2021
Mean CBI: nan
Median CBI: nan
Standard Deviation: nan
Min CBI: 0.0 on Courses: []
Max CBI: 0.98 on Courses: []

Fall 2021: 10 Highest CBI Courses:
            Course Description  CBI
           Advanced Web Design  NaN
          Algorithms for Games  NaN
 Applied Cyberinfrastruct Conc  NaN
    Applied Data Visualization  NaN
 Bayesian Modeling & Inference  NaN
  Collaborating: Online Commun  NaN
Computational Thinking & Doing  NaN
        Computing and the Arts  NaN
              Data Engineering  NaN
     Data Mining and Discovery  NaN

 Fall 2021: 10 Lowest CBI Courses:
            Course Description  CBI
           Advanced Web Design  NaN
          Algorithms for Games  NaN
 Applied Cyberinfrastruct Conc  NaN
    Applied Data Visualization  NaN
 Bayesian Modeling & Inference  NaN
  Collaborating: Online Commun  NaN
Computational Thinking & Doing  NaN
        Computing and the Arts  NaN
              Data Engineering  NaN
     Data Mining and Discovery  