In [None]:
%pip install numpy pandas openpyxl networkx

Create an array of every CS class a student can take in a given semester

In [2]:
import pandas as pd
import numpy as np

# Load the Excel file
df = pd.read_excel('./CSCurriculumRevertedData.xlsx')

# Get all the "Pass" semester columns.
semester_cols = [col for col in df.columns if 'Pass' in col]

classes = np.array([])

for col in semester_cols:
    classes = np.append(classes, [str(col).split("_")[0]])

print("Classes: ")
print(np.array_str(classes))

['CSCI101' 'CSCI128' 'CSCI200' 'CSCI210' 'CSCI220' 'CSCI261' 'CSCI262'
 'CSCI274' 'CSCI306' 'CSCI341' 'CSCI358' 'CSCI370' 'CSCI400' 'CSCI406'
 'CSCI442' 'MATH111' 'MATH112' 'MATH113' 'MATH122' 'MATH213' 'MATH214'
 'MATH223' 'MATH224' 'MATH225' 'MATH235' 'MATH300' 'MATH307' 'MATH332'
 'MATH342' 'PHGN100' 'PHGN200' 'CSCI404' 'CSCI410' 'CSCI422' 'CSCI423'
 'CSCI425' 'CSCI432' 'CSCI436' 'CSCI437' 'CSCI440' 'CSCI441' 'CSCI443'
 'CSCI444' 'CSCI445' 'CSCI446' 'CSCI448' 'CSCI455' 'CSCI470' 'CSCI471'
 'CSCI473' 'CSCI474' 'CSCI475' 'CSCI477' 'CSCI478']


Create directed graph representing the CS flowchart. For the purposes of reading this data into the stochastic matrix, we will want to include cumulative prereqs, so while 403 doesn't require 101 directly, we will still list it as a requirement.

In [3]:
import networkx as nx 

requirements = nx.DiGraph()

requirements.add_nodes_from(classes)

# For now, just add one edge representing that 101 is required for 200 and 220
# Explicit Prerequisites
requirements.add_edge('CSCI101', 'CSCI200')

# Implicit Prerequisites (delete these to get the flowchart instead)
requirements.add_edge('CSCI101', 'CSCI220')

Create the stochastic matrix. 

Note that right now, the stochastic matrix only handles the requirements for a class and the cummulative prerequisite courses for a class. In the future, we will want to edit the stochastic matrix using data from previous semesters to determine what classes students are actually likely to take after completing some class.

In [11]:
num_classes = classes.size

# Fill the matrix with 1's
stochastic = np.empty(shape=(num_classes, num_classes))
stochastic.fill(1)

# If you pass a class, you won't take that class next semester, so set the diagonal to 0
np.fill_diagonal(stochastic, 0)

# Look at the prereqs, if a class is a prereq for another class, that spot in the matrix should be a 0
for required in classes:

    # Get the outgoing edges in the associated requirements network
    classes_requiring = requirements.adj[required]
    
    for requiring_class in classes_requiring:
        stochastic[np.where(classes == required), np.where(classes == requiring_class)] = 0


# Make the matrix stochastic
stochastic = stochastic/stochastic.sum(axis=1)[:,None]

print("Stochastic Matrix: ")
print(stochastic)

Stochastic Matrix: 
[[0.         0.01960784 0.         ... 0.01960784 0.01960784 0.01960784]
 [0.01886792 0.         0.01886792 ... 0.01886792 0.01886792 0.01886792]
 [0.01886792 0.01886792 0.         ... 0.01886792 0.01886792 0.01886792]
 ...
 [0.01886792 0.01886792 0.01886792 ... 0.         0.01886792 0.01886792]
 [0.01886792 0.01886792 0.01886792 ... 0.01886792 0.         0.01886792]
 [0.01886792 0.01886792 0.01886792 ... 0.01886792 0.01886792 0.        ]]


For the sake of demonstration, create a student who is only taking CS200. Estimate what their next semester will look like.

In [15]:
# Create a demo student
demo_student = np.zeros(num_classes)
demo_student[np.where(classes == "CSCI200")] = 1

print("Initial Student Array:")
print(demo_student)

# Use matrix multiplication to guess what their next semester would look like
demo_prediction = np.matmul(stochastic, demo_student)

print("Predicted Schedule: ")
print(demo_prediction)

Initial Student Array:
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
Predicted Schedule: 
[0.         0.01886792 0.         0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792
 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792 0.01886792]


Above, since the student just took CSCI 200, we know that next semester they will not take CSCI 101 since that is a prereq, and they also won't take CSCI 200 again since they just passed the class