In [5]:
import sqlite3
import pandas as pd
import os
import copy
import traceback
import random
import regex as re
import statistics
pd.options.mode.chained_assignment = None

In [6]:
process_path = os.path.join(os.sep+"home"+os.sep+"jupyter"+os.sep+"Team-Prophecy","Data","02_processed","intermediate.db")
print(process_path)

/home/jupyter/Team-Prophecy/Data/02_processed/intermediate.db


In [7]:
output_path = os.path.join(os.sep+"home"+os.sep+"jupyter"+os.sep+"Team-Prophecy","Data","03_output_for_tableau")
print(output_path)

/home/jupyter/Team-Prophecy/Data/03_output_for_tableau


In [8]:
process_connection = sqlite3.connect(process_path)

For our integrated database, we have a list of inputs that we can manipulate in order to handle the total number of students coming (ExpN_eat) and whether or not they are from required classes.

In [9]:
inputs = {    
    'prog_desc'     : "All",
    'courses'       : "All",
    'mod'           : "F2F", #F2F
    'visa'          : "combined", #F1 Visa
    'all_sections'  : 2, #Option 0 = Don't activate, Option 1 = Ensure that each course has at least 5 sections, Option 2 = Ensure each course has 5 sections with randomized registration size
    'required_only' : False,
    'ExpN_eat'      : 3500
}

From there, we feed them into our model parameters which will determine whether we run a WHERE query associated with it.

In [10]:
# Building the model parameters
model_params = {
    'prog_desc'         : inputs['prog_desc'],
    'courses'           : inputs['courses'],
    'mod'               : inputs['mod'],
    'visa'              : inputs['visa'],
    'ExpN_eat'          : inputs['ExpN_eat'],
    'enr_hist'          : 4 if inputs['visa'] == 'F1 Visa' else 8,
    'filters'           : {
        'prog_filter'   : inputs['prog_desc'] != 'All',
        'course_filter' : inputs['courses'] != 'All',
        'visa_filter'   : inputs['visa'] != 'combined',
        'required_only' : inputs['required_only']
    },
    'groupby'           : {
        'mod_groupby'   : inputs['mod'] != 'combined'
    }
}

This is specifically for including any courses that we want to explicitly focus on.

In [11]:
def list_to_str(l):
    """
    Create a string in the format of "('a', 'b', 'c', 'd')" from list ['a', 'b', 'c', 'd']
    """
    return str(tuple(l))

We create the queries themselves and feed the model parameters associated with them

In [12]:
student_stat_query = """
    SELECT rs.reg_term_code, rs.reg_stu_id, crs, rs.sect_id, rs.reg_new_ret_stu, rs.reg_final_status
    FROM registration_status rs
    WHERE 1=1 
"""
student_details_query = """
    SELECT stu_admit_term_code, stu_college, stu_deg_level, stu_dept, stu_id, stu_res, stu_prog, stu_visa, stu_bam
    FROM student_details 
    WHERE 1=1 
"""

program_course_offerings = """
    SELECT * FROM PROGRAM_COURSE_OFFERINGS 
"""

# Program Filter
if model_params['filters']['prog_filter']:
    student_details_query += f"AND stu_prog = '{model_params['prog_desc']}' "

# Course Filter
if model_params['filters']['course_filter']:
    student_stat_query += f"AND crs IN {list_to_str(model_params['courses'])} "

# Visa Filter
if model_params['filters']['visa_filter']:
    student_details_query += f"AND stu_visa = '{model_params['visa']}' "

Then, we merge all of the student registration data and all of the student detail data together. We ensure that **no student is returning**

In [13]:
student_stat = pd.DataFrame(process_connection.execute(student_stat_query).fetchall(),columns=["reg_term_code", "student_id", "crs", "sect_id", "returning_student", "reg_status"])

#WHERE reg_final_status IN ('W','R')
#GROUP BY reg_term_code, crs, sect_id, reg_final_status

student_details = pd.DataFrame(process_connection.execute(student_details_query).fetchall(),columns=["reg_term_code","stu_college","stu_deg_level","stu_dept","student_id",
                                                                                                     "stu_res","stu_prog","stu_visa","stu_bam"])

pco_df = pd.DataFrame(process_connection.execute(program_course_offerings),columns=["stu_prog","crs","required"])

total_stat = student_stat.merge(student_details, on=["reg_term_code","student_id"], how="inner").fillna(0)
total_stat = total_stat.merge(pco_df, on=["stu_prog","crs"])
total_stat = total_stat.loc[total_stat["returning_student"]=='N',:]

**Prior to continuining, we want to identify the total number of students who are returning and who are new**
- _total_stat_ is used instead of student_stat because we want to only merge students that we know for sure were last included.
- _student_stat_ definitely has more returning students, but this doesn't help us because we want to identify students who were introduced each semester.
- We exclude all returning students and assume all students are new students for _total_stat_, but we keep the ratio of returning students under _student_stat_.
- The returning students of the first semester will be ignored because we want to build our history off of the new students of the current semester, rather than going back to the end of time.
- All students from the prior semester are considered "returning students", and will be included based on the respective semester and program data provided.
    + This means the incoming student population will be considered "returning" based on the proportions listed for _student_stat_ for a given semester

The last step is to identify visa students vs non-visa students.

In [14]:
if model_params['filters']['visa_filter'] is False:
    total_stat["stu_visa"] = total_stat["stu_visa"].replace(regex=r'^Not Relevent', value='0').replace(regex=r'^F1 Visa', value='1')

This dictionary handles all classes that are under a certain student program as being required or not

In [15]:
required_dictionary = pco_df.drop_duplicates().set_index(["stu_prog","crs"])["required"].to_dict()
required_dictionary = {f"({k[0]},{k[1]})":v for k,v in required_dictionary.items()}

This is our total student population per course.

In [16]:
total_student_population_per_crs = total_stat[["reg_term_code","crs"]] #.groupby(["crs"]).count()
total_student_population_per_crs["total"] = 1
total_student_population_per_crs = list(total_student_population_per_crs.groupby(["reg_term_code","crs"]).sum("total").reset_index().itertuples(index=False, name=None))
total_student_population_per_crs = { f"{i[0]}-{i[1]}" : i[2] for i in total_student_population_per_crs }

If we're only concerned about required courses, we enable this to be true.

In [17]:
if model_params['filters']['required_only']:
    total_stat = total_stat.loc[total_stat["required"] == 1,:]

Here, we get the percentage of people who are going to be going on a visa and those who are not. We also do the same with returning tudents.

In [18]:
#This is for explicitly handling domestic and visa students
reg_term_code = total_stat["reg_term_code"].drop_duplicates().tolist()
total_visa_dictionary = {}
total_returning_dictionary = {}
total_stat_grp = total_stat.groupby("reg_term_code")
for term in reg_term_code:
    total_visa_dictionary[term] = {}
    total_returning_dictionary[term] = {}
    prog_stat_grp = total_stat_grp.get_group(term)
    reg_av_programs = prog_stat_grp["stu_prog"].drop_duplicates().tolist()
    prog_stat_grp = prog_stat_grp.groupby("stu_prog")
    for prog in reg_av_programs:
        total_visa_dictionary[term][prog] = {}
        total_returning_dictionary[term][prog] = {}
        up_total_stat_dict = prog_stat_grp.get_group(prog).groupby("stu_visa")["stu_visa"].count().to_dict()
        returning_student = prog_stat_grp.get_group(prog).groupby("returning_student")["returning_student"].count().to_dict()
        total_returning_dictionary[term][prog].update(returning_student)
        total_visa_dictionary[term][prog].update(up_total_stat_dict)

This grabs our population count by total stats

In [19]:
reg_term_values = dict(total_stat.groupby(["reg_term_code"])["reg_term_code"].count())

Here, we scrubb any and all dropped students

In [20]:
reg_courses_context = total_stat.loc[total_stat["reg_status"] != "D",["reg_term_code","stu_prog","crs","sect_id","reg_status"]].groupby(["reg_term_code","stu_prog","crs","sect_id"]).count().reset_index()
#nonregistered_courses = total_courses.loc[total_courses["reg_status"] != "R",["reg_term_code","stu_prog","crs","sect_id","reg_status"]].groupby(["reg_term_code","stu_prog","crs","sect_id"]).count().reset_index()

Keep in mind: Our registered values will be dependent on what's in our subset.

In [21]:
#reg_courses_context = registered_courses.merge(ref_course[["reg_term_code", "crs","sect_id", "cum_total_enrollment"]], on=["reg_term_code", "crs","sect_id"], how="inner").drop_duplicates()
#reg_courses_context.loc[reg_courses_context["cum_total_enrollment"] < reg_courses_context["reg_status"],["cum_total_enrollment"]]  = reg_courses_context["reg_status"]
#reg_courses_context.columns

This specifically relates back to the parameters that we created. This makes sure that dependent on how we selected our sections, that we can get different values for experimentation purposes.

For example: Let's say I want to see adding more sections across more courses helps student enrollment. I can resolve this by including more sections using this statement. Option 1 for _all_sections_ makes 5 new sections available. Option 2 does the same as Option 1, but puts a random number of registrations in for proportionality reasons.

In [22]:
if inputs['all_sections'] >= 1:
    left_sect = reg_courses_context[["sect_id"]].drop_duplicates()[:5]
    left_sect["temp"] = 0
    reg_courses_context["temp"] = 0
    reg_courses_context = reg_courses_context.merge(left_sect,on=["temp"],how="left")
    reg_courses_context.loc[~reg_courses_context["sect_id_x"].isin(left_sect["sect_id"].tolist()), "sect_id_y"] = reg_courses_context["sect_id_x"]
    if inputs['all_sections'] == 2:
        reg_courses_context["reg_status"] = reg_courses_context.loc[reg_courses_context["sect_id_x"] != reg_courses_context["sect_id_y"], "reg_status"].apply(lambda x: random.randint(1, 7))
    else:
        reg_courses_context.loc[reg_courses_context["sect_id_x"] != reg_courses_context["sect_id_y"], "reg_status"] = 0
    reg_courses_context = reg_courses_context.rename(columns={"sect_id_x":"remove","sect_id_y":"sect_id"})
    reg_courses_context = reg_courses_context.drop(["remove","temp"],axis=1)
    reg_courses_context = reg_courses_context.fillna(0)

In [23]:
reg_term_courses_context = reg_courses_context.groupby(["reg_term_code"])

Proportionality is really important because the total number of students coming in is not going to evenly distribute across all programs. In order to appropriately assess our total program size, we need to identify trouble areas. We can do this by applying a ratio across all students.

In [24]:
from ortools.linear_solver import pywraplp
from itertools import chain

This is where the actual code is about to get started. Here, we grab the ratios between all programs, courses, and section ids before contiuing.

In [25]:
all_semesters = list(reg_term_values.keys())
#program_limit_values <- included because we need a way to constraint students
all_pcs_list = list(reg_courses_context[["stu_prog","crs","sect_id"]].drop_duplicates().itertuples(index=False,name=None))
all_pcs_dict = {}
#References what program contains which class
pc_dict = {}
cs_dict = {}
for t_pcs in all_pcs_list:
    if t_pcs[0] not in all_pcs_dict:
        pc_dict[t_pcs[0]] = []
        all_pcs_dict[t_pcs[0]] = {}
    pc_dict[t_pcs[0]].append(t_pcs[1])
    if t_pcs[1] not in all_pcs_dict[t_pcs[0]]:
        cs_dict[t_pcs[1]] = []
        all_pcs_dict[t_pcs[0]][t_pcs[1]] = []
    cs_dict[t_pcs[1]].append(t_pcs[2])
    all_pcs_dict[t_pcs[0]][t_pcs[1]].append(t_pcs[2])

An example of how this will work:

Fall 2020
150 -> 80 Program_1, 40 Program_2, 30 Program_3
1 student -> 15 credits = 3-4 classes

This defines our tables that we're going to use to get everything that we need for our results.

In [26]:
try:
    process_connection.execute("DROP TABLE prior_class_table")
    process_connection.commit()
except:
    print()

try:
    process_connection.execute("DROP TABLE student_results_table")
    process_connection.commit()
except:
    print()
    
try:
    process_connection.execute("DROP TABLE program_results_table")
    process_connection.commit()
except:
    print()

try:
    process_connection.execute("DROP TABLE results_table")
    process_connection.commit()
except:
    print()




In [27]:
process_connection.execute("""
                    CREATE TABLE prior_class_table(
                       student_id INTEGER NOT NULL DEFAULT 0, 
                       course_code TEXT NOT NULL,
                       PRIMARY KEY(student_id, course_code)
                    );
                    """)
process_connection.commit()

In [28]:
process_connection.execute("""
                    CREATE TABLE program_results_table(
                       semester TEXT NOT NULL,
                       program TEXT NOT NULL,
                       course_code TEXT NOT NULL,
                       sect_id TEXT,
                       waitlisted TEXT NOT NULL DEFAULT 'F',
                       number_of_students TEXT NOT NULL,
                       percentage_visa_students TEXT NOT NULL
                    );
                       """)
process_connection.commit()

In [29]:
process_connection.execute("""
                    CREATE TABLE results_table(
                       rec_id INTEGER PRIMARY KEY AUTOINCREMENT DEFAULT 0, 
                       semester TEXT NOT NULL,
                       program TEXT NOT NULL,
                       min_waitlisted_students INTEGER NOT NULL
                    );
                       """)
process_connection.commit()

This creates a way to determine our average of students per class section

In [30]:
req_mean = reg_courses_context.groupby(["crs","sect_id"]).mean("reg_status").reset_index()
req_med = reg_courses_context.groupby(["crs","sect_id"]).median("reg_status").reset_index()
req_mean = req_mean.rename(columns={"reg_status": "mean"})
req_med = req_med.rename(columns={"reg_status": "median"})
req_avg = req_mean.merge(req_med,on=["crs","sect_id"],how="inner")
req_avg["avg"] = round(req_avg[["mean","median"]].max(axis=1))
req_avg = req_avg[["crs","sect_id","avg"]]

In [31]:
course_sect_avg_dict = req_avg.set_index(["crs","sect_id"])["avg"].to_dict()

This creates a way to determine our average of students per class

In [32]:
req_mean = reg_courses_context.groupby(["crs"]).mean("reg_status").reset_index()
req_med = reg_courses_context.groupby(["crs"]).median("reg_status").reset_index()
req_mean = req_mean.rename(columns={"reg_status": "mean"})
req_med = req_med.rename(columns={"reg_status": "median"})
req_avg = req_mean.merge(req_med,on=["crs"],how="inner")
req_avg["avg"] = round(req_avg[["mean","median"]].max(axis=1))
req_avg = req_avg[["crs","avg"]]

In [33]:
course_avg_dict = req_avg.set_index(["crs"])["avg"].to_dict()

In [34]:
student_total = student_stat.loc[student_stat["reg_status"] != "D",:].groupby(["reg_term_code","returning_student"])["returning_student"].count().to_dict()

This creates helps determine the percentage of students that are returning/are not returning.

In [35]:
student_ret_ratio = {}
for a in all_semesters:
    student_total_sem = student_total[(f'{a}', 'N')]+student_total[(f'{a}', 'R')]
    student_ret_ratio[a] = round(student_total[(f'{a}', 'R')]/student_total_sem,2)

### ASSUMPTIONS I AM MAKING FOR THE TIME BEING ###

- 30-36 as a max for students, 5 sections max
- Historical Data for training for the data must be ingested.
- Carry-over is necessary
- 180 students max for any given class

In [36]:
def modelSolve(INCSTU_sp, a_sem, program, cxt_grp, visa_students_required, prior_csize = None):
    #WE HAVE RATIOS HERE IN CASE WE NEED TO DO MORE ANALYSIS OR MATCH IT TO 1
    #
    #These will be our constraints
    #

    MAXIMUM_STUDENTS = 180
    
    cxt_grp = cxt_grp.loc[cxt_grp["stu_prog"] == program,:]
    cxt_grp["reg_status"] = cxt_grp["reg_status"]/reg_term_values[a_sem] #(cxt_grp["reg_status"]-cxt_grp["reg_status"].min())/(1+cxt_grp["reg_status"].max()-cxt_grp["reg_status"].min())
    #total_students_list = cxt_grp["cum_total_enrollment"]/reg_term_values[a_sem]
    
    #Here, we get the weighting ratio associated with the model so that way we can appropropriately assess each course.
    pcs_weighting_ratio = cxt_grp[["reg_term_code","stu_prog","crs","reg_status"]].set_index(["crs"])["reg_status"].to_dict()
    pcs_weighting_ratio = {k : v for k,v in pcs_weighting_ratio.items()} #"("+",".join(k)+")"
    #Now that we have the ratios, we can begin with the objective function.
    #
    course_ref = {}      #This allows for us to control students within a given course
    course_sect_ref = {} #This allows for us to control students within a given section
    
    course_w_ref = {}
    course_sect_w_ref = {}
    
    course_student_ref = {}
    course_student_w_ref = {}
    
    course_visa_student_ref = {}
    course_visa_student_w_ref = {}
    
    if prior_csize is None:
        prior_csize = {}
    
    #Total Decision and Constant Variables Used
    decision_vars = {}
    const_vars = {}
    
    #Student ID Decision Variables
    x_decision_vars = {} #This controls the number of students enrolled
    w_decision_vars = {} #This controls the number of students waitlisted
    e_decision_vars = {} #This controls students who didn't take that class (can be as high as it needs to be)
    
    solver = pywraplp.Solver.CreateSolver('SCIP')
    if not solver:
        print("Cannot get solver")
        return
        
    cs_l = cxt_grp["crs"].drop_duplicates().tolist()

    #
    s_inf = solver.infinity()
    
    all_cs = cxt_grp["crs"].drop_duplicates().tolist()
    
    sect_lookup = cxt_grp[["crs","sect_id"]].drop_duplicates().groupby("crs")["sect_id"].apply(list).to_dict()
    
    
    
    total_classes_available = len(all_cs)
    for cs in all_cs:
        if cs not in course_ref.keys():
            course_ref[cs] = []
            course_w_ref[cs] = []
            course_student_ref[cs] = {}
            course_student_w_ref[cs] = {}
        
        if cs not in course_sect_ref.keys():
            course_sect_ref[cs] = {}
            course_sect_w_ref[cs] = {}
        
        for sect_id in sect_lookup[cs]:        
            if sect_id not in course_sect_ref[cs].keys():
                course_sect_ref[cs][sect_id] = []
                course_sect_w_ref[cs][sect_id] = []

            visa_status_iterator = 0
            for student_id in range(1,INCSTU_sp):
                #######################################################
                #DEFINES DECISION VARS
                # -> Establishes all student id based on the INCSTU_sp provided
                #######################################################
                #We'll have two primary decision variables: Waitlist and Student
                #This will be a minimization function for our purposes, as we want all students
                # to be included.
                
                if student_id not in course_student_ref[cs]:
                    course_student_ref[cs][student_id] = []
                    course_student_w_ref[cs][student_id] = []

                if visa_status_iterator < visa_students_required:
                    x_name = f"x({cs},{sect_id},{student_id},1)" #This means the student is registered
                    w_name = f"w({cs},{sect_id},{student_id},1)" #This means the student applied but is waitlisted
                    e_name = f"e({cs},{sect_id},{student_id},1)"
                else:
                    x_name = f"x({cs},{sect_id},{student_id},0)" #This means the student is registered
                    w_name = f"w({cs},{sect_id},{student_id},0)" #This means the student applied but is waitlisted
                    e_name = f"e({cs},{sect_id},{student_id},0)"
                    

                # Total class sizes (for later)
                #r_name = f"r({cs},{student_id})"
                #wait_name = f"wait({cs},{student_id})"

                decision_vars[x_name] = solver.IntVar(0,1,x_name)
                decision_vars[w_name] = solver.IntVar(0,1,w_name)
                decision_vars[e_name] = solver.IntVar(0,1,w_name)

                if student_id not in x_decision_vars:
                    x_decision_vars[student_id] = []
                if student_id not in w_decision_vars:
                    w_decision_vars[student_id] = []
                if student_id not in e_decision_vars:
                    e_decision_vars[student_id] = []
                
                
                #We have different groupings so that the solver can reference them when need be.
                x_decision_vars[student_id].append(decision_vars[x_name])
                w_decision_vars[student_id].append(decision_vars[w_name])
                e_decision_vars[student_id].append(decision_vars[e_name])

                course_ref[cs].append(decision_vars[x_name])
                course_w_ref[cs].append(decision_vars[w_name])
                
                course_sect_ref[cs][sect_id].append(decision_vars[x_name])
                course_sect_w_ref[cs][sect_id].append(decision_vars[w_name])
                
                course_student_ref[cs][student_id].append(decision_vars[x_name])
                course_student_w_ref[cs][student_id].append(decision_vars[w_name])
                
                if visa_status_iterator < visa_students_required and f"({program},{cs})" in required_dictionary:
                    if student_id not in course_visa_student_ref:
                        course_visa_student_ref[student_id] = []
                        course_visa_student_w_ref[student_id] = []
                    course_visa_student_ref[student_id].append(decision_vars[x_name])
                    course_visa_student_w_ref[student_id].append(decision_vars[w_name])
                
                visa_status_iterator += 1
    
    for cs in all_cs:
        # This addresses all values associated with courses. Sections will come later, but for now let's establish a baseline.
        #
        tsppcrs = total_student_population_per_crs[f"{a_sem}-{cs}"]
        pcsw = pcs_weighting_ratio[cs]
        
        if cs not in prior_csize.keys():
            prior_csize[cs] = {}
        #print(f"pcsw = {pcsw}")
        #print(f"Total Student Population on {a_sem} for course {cs}: {tsppcrs}")
        
        # We are assuming 180 students total per class. This is taking into account the largest a class section can be on average (35) * the assumed maximum sections allowed (5)
        # CONSTRAINT 1: ALL STUDENTS WITHIN A COURSE MUST NOT EXCEED THE TOTAL NUMBER OF STUDENTS ALLOWED
        const_vars[f"01_total_class_size_allowed"] = solver.Add(solver.Sum(course_ref[cs]) <= MAXIMUM_STUDENTS, name=f"01_total_class_size_allowed")
        
        for student_id in range(1,INCSTU_sp):
            #CONSTRAINT 2 AND 3: A GIVEN STUDENT IS ONLY ALLOWED TO APPLY TO ONE SECTION PER COURSE
            const_vars[f"02_section_constraint"] = solver.Add(solver.Sum(course_student_ref[cs][student_id]) <= 1,name=f"02_section_constraint")
            const_vars[f"03_section_waitlist_constraint"] = solver.Add(solver.Sum(course_student_w_ref[cs][student_id]) <= 1,name=f"03_section_waitlist_constraint")
            
        #This grabs an estimate as to what should be the total size of the seciton based on contextual information
        total_section_size = (INCSTU_sp-1)*(1-pcsw)/(len(sect_lookup[cs])+1)
        #If our section size goes past 36, we MUST stop it.
        total_section_size = 36 if total_section_size > 36 or total_section_size < 10 else total_section_size
        
        for sect_id in sect_lookup[cs]:
            # We then the section size if available.
            if sect_id in prior_csize[cs]:
                total_section_size = prior_csize[cs][sect_id]
            else:
                prior_csize[cs][sect_id] = total_section_size
            #CONSTRAINT 4: THIS FLOORS THE MAXIMMUM OF THE TOTAL SECTION SIZE
            const_vars[f"04_section_max_size_constraint"] = solver.Add(solver.Sum(course_sect_ref[cs][sect_id]) <= total_section_size, name=f"04_section_max_size_constraint")
            
            #CONSTRAINT 5: THIS ASSUMES THAT THE MINIMUM SECTION SIZE IS 10 (REMOVE BECAUSE OF COMPUTATIONAL PROBLEMS)
            #const_vars[f"05_section_min_size_constraint"] = solver.Add(solver.Sum(course_sect_ref[cs][sect_id]) >= 10, name=f"05_section_min_size_constraint")
            visa_status_iterator = 0
            for student_id in range(1,INCSTU_sp):
                #CONSTRAINT 6: STUDENTS CAN EITHER BE REGISTERED, WAITLISTED, OR EMPTY
                if visa_status_iterator < visa_students_required:
                    const_vars[f"06_student_withvisa_ctrl_constraint"] = solver.Add(decision_vars[f"x({cs},{sect_id},{student_id},1)"] + decision_vars[f"w({cs},{sect_id},{student_id},1)"] + decision_vars[f"e({cs},{sect_id},{student_id},1)"] == 1, name=f"06_student_withvisa_ctrl_constraint")
                else:
                    const_vars[f"06_student_ctrl_constraint"] = solver.Add(decision_vars[f"x({cs},{sect_id},{student_id},0)"] + decision_vars[f"w({cs},{sect_id},{student_id},0)"] + decision_vars[f"e({cs},{sect_id},{student_id},0)"] == 1, name=f"06_student_ctrl_constraint")
                visa_status_iterator += 1
    visa_status_iterator = 0
    for student_id in range(1,INCSTU_sp):
        #if student_id in course_visa_student_ref:
            #CONSTRAINT 7 (FOR VISA): ALL VISA STUDENTS MUST APPLY OR BE WAITLISTED TO ONE CLASS
        #    const_vars[f"07_student_withvisa_enlist_constraint"] = solver.Add(solver.Sum(course_visa_student_ref[student_id]) + solver.Sum(course_visa_student_w_ref[student_id]) >= 3, name=f"07_student_withvisa_enlist_constraint")
        #    continue
        #CONSTRAINT 7: ALL STUDENTS MUST HAVE AT LEAST ONE CLASS UNDER THEIR BELT
        const_vars[f"07_student_enlist_min_constraint"] = solver.Add(solver.Sum(x_decision_vars[student_id]) + solver.Sum(w_decision_vars[student_id]) >= 1, name=f"07_student_enlist_min_constraint")
        #CONSTRAINT 8: ALL STUDENTS CAN ONLY HAVE AT MAXIMUM 5 CLASSES
        const_vars[f"08_student_enlist_max_constraint"] = solver.Add(solver.Sum(x_decision_vars[student_id]) <= 5, name=f"08_student_enlist_max_constraint")
    
    # Minimization function 
    # -> For later * a_decision_vars[student_id]
    
    solver.Minimize(solver.Sum([solver.Sum(w_decision_vars[student_id])
                                for student_id in range(1,INCSTU_sp)]))
    status = solver.Solve()
    
    if status == solver.FEASIBLE or status == solver.OPTIMAL:
        return solver, x_decision_vars, w_decision_vars, prior_csize
    
    return None, x_decision_vars, w_decision_vars, prior_csize

### This is where we begin our solver function ###
- Be sure that the total number of students is equal to or larger than 5000 students

In [37]:
#reg_courses_context.loc[reg_courses_context["reg_term_code"] != "201770"].groupby(["reg_term_code","stu_prog","crs"]).count()

In [38]:
reg_list = reg_term_courses_context["reg_status"].sum().tolist()

In [39]:
avg_list = sum(reg_list)/len(reg_list)
std_list = statistics.stdev(reg_list)

Here, we then formally run the Linear Optimization values to extract their coefficients

In [40]:
passed_student_info = {}
prior_course_size = {}        #Running tally of prior course sizes
diff_course_size = {}              #Consistently changes
results_table = []
#############################################
# INGRESS OF STUDENTS
INCSTU_s = model_params['ExpN_eat']+1 #Rename to INCSTU_s
#############################################
for a in range(0,len(all_semesters)):
    a_sem = all_semesters[a]
    cxt_grp = reg_term_courses_context.get_group(a_sem)
    prg_list = cxt_grp["stu_prog"].drop_duplicates().tolist()
    prior_course_size[a_sem] = {}
    diff_course_size[a_sem] = {}
    program_results_table = []
    proportionality = cxt_grp.groupby("stu_prog")["reg_status"].sum().to_dict()
    total_students_throughout_semester = cxt_grp["reg_status"].sum()
    #THIS CODE LETS US KNOW IF WE ARE GREATER THAN OR EQUAL TO THE SEMESTER DATA PROVIDED
    #print("==================================")
    print(f"semester: {a_sem}, INCSTU_s: {INCSTU_s}, total_students_for_semester: {total_students_throughout_semester}")
    #if INCSTU_s <= total_students_throughout_semester:
    #    print("Cannot include students within the semester as it's smaller than number of students within current semester")
    #    continue
    for p in prg_list:
        pcs_val = None
        
        if a > 0 and p in prior_course_size[all_semesters[a-1]]:
            pcs_val = prior_course_size[all_semesters[a-1]][p]
        
        visa_pop = total_visa_dictionary[a_sem][p]['1'] if p in total_visa_dictionary[a_sem] and '1' in total_visa_dictionary[a_sem][p] else 0
        nonvisa_pop = total_visa_dictionary[a_sem][p]['0'] if p in total_visa_dictionary[a_sem] and '0' in total_visa_dictionary[a_sem][p] else 0
        if visa_pop + nonvisa_pop == 0:
            nonvisa_pop = 1

        prog_prop = proportionality[p]/total_students_throughout_semester
        INCSTU_sp = round(INCSTU_s*prog_prop)
        #THIS CODE LETS US KNOW IF WE ARE GREATER THAN OR EQUAL TO THE PROPORTIONAL DATA PROVIDED
        print(f"program: {p}, INCSTU_sp: {INCSTU_sp}, total_students_for_semester: {proportionality[p]}")
        #if INCSTU_s <= proportionality[p]:
            #print("Cannot include students semester program as it's smaller than students within current semester program")
            #break
        total_pop = (INCSTU_sp+1)*(visa_pop/(visa_pop+nonvisa_pop))
        solver, x_decision_vars, w_decision_vars, course_size = modelSolve(INCSTU_sp,a_sem,p,cxt_grp,total_pop,pcs_val)
        if solver == None:
            print(f"Could not find optimal value for program {p}...")
            continue
        #print('Objective value =', solver.Objective().Value())
        #print(f"Registered Students under the Classes taken for Program {p} on {a_sem}:")
        waitlist_course = {}
        reg_course = {}
        international_check = {}
        
        #print("enroll values")
        for li in list(x_decision_vars.values()):
            for i in li:
                course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
                try:
                    sect_res = re.findall(r"\,([A-Za-z0-9]+),",i.name())[0]
                except:
                    sect_res = ""
                #print(sect_res)
                international_res = re.findall(r"\,([0-1])\)",i.name())[0]
                solVal = i.SolutionValue()
                #print(f"{i.name()} -> {solVal}")
                if course_res not in reg_course:
                    reg_course[course_res] = {}
                if sect_res not in reg_course[course_res]:
                    reg_course[course_res][sect_res] = 0
                
                if course_res not in international_check:
                    international_check[course_res] = {}
                if sect_res not in international_check[course_res]:
                    international_check[course_res][sect_res] = [0,0]
                international_check[course_res][sect_res][0] += int(international_res)
                international_check[course_res][sect_res][1] += 1
                reg_course[course_res][sect_res] += solVal
        
        for crs in reg_course.keys():
            sections = reg_course[crs]
            for sect in sections:
                program_results_table.append((a_sem,p,crs,sect,"F",reg_course[crs][sect],international_check[course_res][sect_res][0]/international_check[course_res][sect_res][1]))
                course_size[crs][sect] = reg_course[crs][sect] if reg_course[crs][sect] <= course_size[crs][sect] else course_size[crs][sect]
        
        international_w_check = {}
        #print(f"Waitlisted Students under the Classes taken for Program {p} on {a_sem}:")
        #print("waitlist values")
        for li in list(w_decision_vars.values()):
            for i in li:
                course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
                try:
                    sect_res = re.findall(r"\,([A-Za-z0-9]+),",i.name())[0]
                except:
                    sect_res = ""
                #print(sect_res)
                international_res = re.findall(r"\,([0-1])\)",i.name())[0]
                solVal = i.SolutionValue()
                #print(f"{i.name()} -> {solVal}")
                if course_res not in waitlist_course:
                    waitlist_course[course_res] = {}
                if sect_res not in waitlist_course[course_res]:
                    waitlist_course[course_res][sect_res] = 0
                    
                if course_res not in international_w_check:
                    international_w_check[course_res] = {}
                if sect_res not in international_w_check[course_res]:
                    international_w_check[course_res][sect_res] = [0,0]
                international_w_check[course_res][sect_res][0] += int(international_res)
                international_w_check[course_res][sect_res][1] += 1
                waitlist_course[course_res][sect_res] += solVal
        
        for crs in waitlist_course.keys():
            sections = waitlist_course[crs]
            for sect in sections:
                program_results_table.append((a_sem,p,crs,sect,"T",waitlist_course[crs][sect],international_w_check[course_res][sect_res][0]/international_w_check[course_res][sect_res][1]))
                course_size[crs][sect] += waitlist_course[crs][sect]
                #We want to change course_size here because we are hoping to reduce issues with waitlisting in the future by modifying the total size
        
        prior_course_size[a_sem][p] = course_size
        
        results_table.append((a_sem,p,solver.Objective().Value()))

    #process_connection.executemany("INSERT INTO prior_class_table(student_id, course_code)")
    #process_connection.commit()
    
    #print([print(i) for i in program_results_table])
    #print()
    
    process_connection.executemany("INSERT INTO results_table(semester, program, min_waitlisted_students) VALUES(?,?,?)",
                                  results_table)
    process_connection.commit()
    
    process_connection.executemany("INSERT INTO program_results_table(semester, program, course_code, sect_id, waitlisted, number_of_students,percentage_visa_students) VALUES(?,?,?,?,?,?,?)",
                                  program_results_table)
    process_connection.commit()
    
    print("==================================")
    print()

#

semester: 201770, INCSTU_s: 3501, total_students_for_semester: 275.0
program: EC-MS-CEIE, INCSTU_sp: 395, total_students_for_semester: 31.0
program: EC-MS-CPE, INCSTU_sp: 293, total_students_for_semester: 23.0
program: EC-MS-CS, INCSTU_sp: 1311, total_students_for_semester: 103.0
program: EC-MS-DAEN, INCSTU_sp: 598, total_students_for_semester: 47.0
program: EC-MS-ELEN, INCSTU_sp: 153, total_students_for_semester: 12.0
program: EC-MS-SWE, INCSTU_sp: 522, total_students_for_semester: 41.0
program: EC-MS-SYST, INCSTU_sp: 229, total_students_for_semester: 18.0

semester: 201810, INCSTU_s: 3501, total_students_for_semester: 265.0
program: EC-MS-AIT, INCSTU_sp: 1717, total_students_for_semester: 130.0
program: EC-MS-CPE, INCSTU_sp: 172, total_students_for_semester: 13.0
program: EC-MS-CS, INCSTU_sp: 951, total_students_for_semester: 72.0
program: EC-MS-DAEN, INCSTU_sp: 423, total_students_for_semester: 32.0
program: EC-MS-OPRS, INCSTU_sp: 238, total_students_for_semester: 18.0

semester: 20

Then, we push it from our process connection into a results and programs table, getting how students were left over.

In [41]:
#student_results_table = pd.DataFrame(process_connection.execute("SELECT * FROM student_results_table").fetchall(),columns=["rec_id","semester","program","course_code","student_id","international","waitlisted","core_course"])
program_results_table = pd.DataFrame(process_connection.execute("SELECT * FROM program_results_table").fetchall(),columns=["semester","program","course_code","sect_id","waitlisted","number_of_students","percentage_visa_students"])
results_table = pd.DataFrame(process_connection.execute("SELECT * FROM results_table").fetchall(),columns=["rec_id","semester","program","min_waitlisted_students"])

In [42]:
model_params['ExpN_eat']

3500

Viewing Registered and Waitlisted students is useful in this manner because we can determine how much "flooding" there is for a given class assuming all students can take at least one course.

There is a caveat:
- International students do have a requirement of at least 3 classes, but prior data has shown that optimal and feasible solutions could not be found because the total number of students is too small.

In [43]:
program_results_table.head()

Unnamed: 0,semester,program,course_code,sect_id,waitlisted,number_of_students,percentage_visa_students
0,201770,EC-MS-CEIE,CEIE605,001,F,36.0,0.0
1,201770,EC-MS-CEIE,CEIE605,003,F,36.0,0.0
2,201770,EC-MS-CEIE,CEIE605,002,F,36.0,0.0
3,201770,EC-MS-CEIE,CEIE605,DL2,F,36.0,0.0
4,201770,EC-MS-CEIE,CEIE605,DL1,F,36.0,0.0


In [44]:
program_results_table.loc[program_results_table["course_code"] == "SWE621",:]

Unnamed: 0,semester,program,course_code,sect_id,waitlisted,number_of_students,percentage_visa_students
140,201770,EC-MS-SWE,SWE621,001,F,36.0,0.0
141,201770,EC-MS-SWE,SWE621,003,F,35.0,0.0
142,201770,EC-MS-SWE,SWE621,002,F,34.0,0.0
143,201770,EC-MS-SWE,SWE621,DL2,F,36.0,0.0
144,201770,EC-MS-SWE,SWE621,DL1,F,34.0,0.0
...,...,...,...,...,...,...,...
12643,202310,EC-MS-SWE,SWE621,001,T,0.0,0.793233082706767
12644,202310,EC-MS-SWE,SWE621,003,T,0.0,0.793233082706767
12645,202310,EC-MS-SWE,SWE621,002,T,0.0,0.793233082706767
12646,202310,EC-MS-SWE,SWE621,DL2,T,0.0,0.793233082706767


In [45]:
program_results_table.loc[program_results_table["program"] == "EC-MS-ISA",:].sort_values(["semester"],ascending=True)

Unnamed: 0,semester,program,course_code,sect_id,waitlisted,number_of_students,percentage_visa_students
674,201870,EC-MS-ISA,CS555,001,F,15.0,0.0
705,201870,EC-MS-ISA,ISA652,DL1,T,0.0,0.0
704,201870,EC-MS-ISA,ISA652,DL2,T,0.0,0.0
703,201870,EC-MS-ISA,ISA652,002,T,0.0,0.0
702,201870,EC-MS-ISA,ISA652,003,T,0.0,0.0
...,...,...,...,...,...,...,...
12312,202310,EC-MS-ISA,INFS612,001,F,2.0,0.806666666666667
12311,202310,EC-MS-ISA,DFOR761,DL1,F,2.0,0.806666666666667
12310,202310,EC-MS-ISA,DFOR761,DL2,F,0.0,0.806666666666667
12326,202310,EC-MS-ISA,ISA681,DL2,F,2.0,0.806666666666667


In [46]:
program_results_table.loc[program_results_table["semester"] == "202310",:]

Unnamed: 0,semester,program,course_code,sect_id,waitlisted,number_of_students,percentage_visa_students
11260,202310,EC-MS-AIT,AIT512,001,F,14.0,0.349673202614379
11261,202310,EC-MS-AIT,AIT512,003,F,8.0,0.349673202614379
11262,202310,EC-MS-AIT,AIT512,002,F,8.0,0.349673202614379
11263,202310,EC-MS-AIT,AIT512,DL2,F,5.0,0.349673202614379
11264,202310,EC-MS-AIT,AIT512,DL1,F,4.0,0.349673202614379
...,...,...,...,...,...,...,...
12723,202310,EC-MS-TCOM,TCOM616,001,T,2.0,0.370786516853933
12724,202310,EC-MS-TCOM,TCOM616,003,T,0.0,0.370786516853933
12725,202310,EC-MS-TCOM,TCOM616,002,T,1.0,0.370786516853933
12726,202310,EC-MS-TCOM,TCOM616,DL2,T,2.0,0.370786516853933


In [47]:
output_path

'/home/jupyter/Team-Prophecy/Data/03_output_for_tableau'

Finally, we send this over for output to tableau

In [48]:
#student_results_table.drop("rec_id",axis=1).to_csv(output_path+os.sep+"student_results.csv")
program_results_table.to_csv(output_path+os.sep+"program_results.csv")
results_table.to_csv(output_path+os.sep+"overall_results.csv")

## WORKING BUT SAVED FOR LATER CODE ##

This code handles the most basic attributes associated with matching to determining how required students can be assigned to the class.
It does not, however, take into account:
- Electives
- Visa students (only focuses on Visa students)
- Prior History in the form of varying class sizes

In [31]:
def modelSolve(a_sem,cxt_grp, prior_csize = None, prior_student_info = {}):
    #WE HAVE RATIOS HERE IN CASE WE NEED TO DO MORE ANALYSIS OR MATCH IT TO 1
    #
    #These will be our constraints
    #
    #############################################
    # INGRESS OF STUDENTS
    CAP_s = model_params['ExpN_eat']+1 #Rename to INCSTU_s
    #############################################
    
    cxt_grp["reg_status"] = cxt_grp["reg_status"]/reg_term_values[a_sem] #(cxt_grp["reg_status"]-cxt_grp["reg_status"].min())/(1+cxt_grp["reg_status"].max()-cxt_grp["reg_status"].min())
    #total_students_list = cxt_grp["cum_total_enrollment"]/reg_term_values[a_sem]
    pcs_weighting_ratio = cxt_grp[["reg_term_code","stu_prog","crs","reg_status"]].set_index(["crs"])["reg_status"].to_dict()
    pcs_weighting_ratio = {k : v for k,v in pcs_weighting_ratio.items()} #"("+",".join(k)+")"
    #Now that we have the ratios, we can begin with the objective function.
    #
    course_ref = {}      #This allows for us to control students within a given course
    course_sect_ref = {} #This allows for us to control students within a given section
    
    course_w_ref = {}
    course_sect_w_ref = {}
    
    course_student_ref = {}
    course_student_w_ref = {}
    
    if prior_csize is None:
        prior_csize = {}
    
    #Total Decision and Constant Variables Used
    decision_vars = {}
    const_vars = {}
    
    #Student ID Decision Variables
    x_decision_vars = {}
    w_decision_vars = {}
    e_decision_vars = {}
    
    solver = pywraplp.Solver.CreateSolver('SCIP')
    if not solver:
        print("Cannot get solver")
        return
        
    cs_l = cxt_grp["crs"].drop_duplicates().tolist()

    #
    s_inf = solver.infinity()
    
    all_cs = cxt_grp["crs"].drop_duplicates().tolist()
    
    sect_lookup = cxt_grp[["crs","sect_id"]].drop_duplicates().groupby("crs")["sect_id"].apply(list).to_dict()
    
    
    
    total_classes_available = len(all_cs)
    for cs in all_cs:
        if cs not in course_ref.keys():
            course_ref[cs] = []
            course_w_ref[cs] = []
            course_student_ref[cs] = {}
            course_student_w_ref[cs] = {}
        
        if cs not in course_sect_ref.keys():
            course_sect_ref[cs] = {}
            course_sect_w_ref[cs] = {}
            
        for sect_id in sect_lookup[cs]:        
            if sect_id not in course_sect_ref[cs].keys():
                course_sect_ref[cs][sect_id] = []
                course_sect_w_ref[cs][sect_id] = []
            for student_id in range(1,CAP_s):
                #######################################################
                #DEFINES DECISION VARS
                # -> Establishes all student id based on the CAP_s provided
                #######################################################
                #We'll have two primary decision variables: Waitlist and Student
                #This will be a minimization function for our purposes, as we want all students
                # to be included.
                
                if student_id not in course_student_ref[cs]:
                    course_student_ref[cs][student_id] = []
                    course_student_w_ref[cs][student_id] = []

                x_name = f"x({cs},{sect_id},{student_id})" #This means the student is registered
                w_name = f"w({cs},{sect_id},{student_id})" #This means the student applied but is waitlisted
                e_name = f"e({cs},{sect_id},{student_id})"

                # Total class sizes (for later)
                #r_name = f"r({cs},{student_id})"
                #wait_name = f"wait({cs},{student_id})"

                decision_vars[x_name] = solver.IntVar(0,1,x_name)
                decision_vars[w_name] = solver.IntVar(0,1,w_name)
                decision_vars[e_name] = solver.IntVar(0,1,w_name)

                if student_id not in x_decision_vars:
                    x_decision_vars[student_id] = []
                if student_id not in w_decision_vars:
                    w_decision_vars[student_id] = []
                if student_id not in e_decision_vars:
                    e_decision_vars[student_id] = []

                x_decision_vars[student_id].append(decision_vars[x_name])
                w_decision_vars[student_id].append(decision_vars[w_name])
                e_decision_vars[student_id].append(decision_vars[e_name])

                course_ref[cs].append(decision_vars[x_name])
                course_w_ref[cs].append(decision_vars[w_name])
                
                course_sect_ref[cs][sect_id].append(decision_vars[x_name])
                course_sect_w_ref[cs][sect_id].append(decision_vars[w_name])
                
                course_student_ref[cs][student_id].append(decision_vars[x_name])
                course_student_w_ref[cs][student_id].append(decision_vars[w_name])
    
    for cs in all_cs:
        # This addresses all values associated with courses. Sections will come later, but for now let's establish a baseline.
        #
        tsppcrs = total_student_population_per_crs[f"{a_sem}-{cs}"]
        pcsw = pcs_weighting_ratio[cs]
        
        if cs not in prior_csize.keys():
            prior_csize[cs] = {}
            prior_csize[cs] = {}
        #print(f"pcsw = {pcsw}")
        #print(f"Total Student Population on {a_sem} for course {cs}: {tsppcrs}")
        const_vars[f"01_total_class_size_allowed"] = solver.Add(solver.Sum(course_ref[cs]) <= CAP_s, name=f"01_total_class_size_allowed")
        
        for student_id in range(1,CAP_s):
            const_vars[f"02_section_constraint"] = solver.Add(solver.Sum(course_student_ref[cs][student_id]) <= 1,name=f"02_section_constraint")
            const_vars[f"03_section_waitlist_constraint"] = solver.Add(solver.Sum(course_student_w_ref[cs][student_id]) <= 1,name=f"03_section_waitlist_constraint")
            
        total_section_size = (CAP_s-1)*(1-pcsw)/(len(sect_lookup[cs])+1)
        for sect_id in sect_lookup[cs]:
            if sect_id in prior_csize[cs]:
                total_section_size = prior_csize[cs][sect_id]
            else:
                prior_csize[cs][sect_id] = total_section_size
            const_vars[f"04_section_max_size_constraint"] = solver.Add(solver.Sum(course_sect_ref[cs][sect_id]) <= total_section_size, name=f"04_section_max_size_constraint")
            #const_vars[f"05_section_min_size_constraint"] = solver.Add(solver.Sum(course_sect_ref[cs][sect_id]) >= 10, name=f"05_section_min_size_constraint")
            for student_id in range(1,CAP_s):
                const_vars[f"06_student_ctrl_constraint"] = solver.Add(decision_vars[f"x({cs},{sect_id},{student_id})"] + decision_vars[f"w({cs},{sect_id},{student_id})"] + decision_vars[f"e({cs},{sect_id},{student_id})"] == 1, name=f"06_student_ctrl_constraint")
    for student_id in range(1,CAP_s):
        #const_vars[f"03_student_cap_{student_id}"] = solver.Add(solver.Sum(x_decision_vars[student_id]) <= 3, name=f"02_student_cap_{student_id}")
        #const_vars[f"03_student_w_cap_{student_id}"] = solver.Add(solver.Sum(w_decision_vars[student_id]) <= 3, name=f"02_student_w_cap_{student_id}")
        
        const_vars[f"07_student_enlist_constraint"] = solver.Add(solver.Sum(x_decision_vars[student_id]) + solver.Sum(w_decision_vars[student_id]) >= 1, name=f"07_student_enlist_constraint")
        #const_vars[f"04_student_w_cap_{student_id}"] = solver.Add(solver.Sum(w_decision_vars[student_id]) >= 3, name=f"02_student_w_cap_{student_id}")
    
    # Minimization function 
    # -> For later * a_decision_vars[student_id]
    
    solver.Minimize(solver.Sum([solver.Sum(w_decision_vars[student_id])
                                for student_id in range(1,CAP_s)]))
    solver.Solve()
    
    return solver, x_decision_vars, w_decision_vars, prior_csize

In [32]:
passed_student_info = {}
prior_course_size = {}        #Running tally of prior course sizes
diff_course_size = {}              #Consistently changes
results_table = []
for a in range(0,len(all_semesters)):
    a_sem = all_semesters[a]
    cxt_grp = reg_term_courses_context.get_group(a_sem)
    prg_list = cxt_grp["stu_prog"].drop_duplicates().tolist()
    prior_course_size[a_sem] = {}
    diff_course_size[a_sem] = {}
    program_results_table = []
    for p in prg_list:
        pcs_val = None
        if a > 0 and p in prior_course_size[all_semesters[a-1]]:
            pcs_val = prior_course_size[all_semesters[a-1]][p]
        solver, x_decision_vars, w_decision_vars, course_size = modelSolve(a_sem,cxt_grp.loc[cxt_grp["stu_prog"] == p,:],passed_student_info,pcs_val)
        #print('Objective value =', solver.Objective().Value())
        #print(f"Registered Students under the Classes taken for Program {p} on {a_sem}:")
        waitlist_course = {}
        reg_course = {}
        #print("enroll values")
        for li in list(x_decision_vars.values()):
            for i in li:
                course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
                try:
                    sect_res = re.findall(r"\,([0-9]+),",i.name())[0]
                except:
                    sect_res = ""
                solVal = i.SolutionValue()
                #print(f"{i.name()} -> {solVal}")
                if course_res not in reg_course:
                    reg_course[course_res] = {}
                if sect_res not in reg_course[course_res]:
                    reg_course[course_res][sect_res] = 0
                reg_course[course_res][sect_res] += solVal
        
        for crs in reg_course.keys():
            sections = reg_course[crs]
            for sect in sections:
                program_results_table.append((a_sem,p,crs,sect,"F",reg_course[crs][sect]))
        
        #print(f"Waitlisted Students under the Classes taken for Program {p} on {a_sem}:")
        #print("waitlist values")
        for li in list(w_decision_vars.values()):
            for i in li:
                course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
                try:
                    sect_res = re.findall(r"\,([0-9]+),",i.name())[0]
                except:
                    sect_res = ""
                solVal = i.SolutionValue()
                #print(f"{i.name()} -> {solVal}")
                if course_res not in waitlist_course:
                    waitlist_course[course_res] = {}
                if sect_res not in waitlist_course[course_res]:
                    waitlist_course[course_res][sect_res] = 0
                waitlist_course[course_res][sect_res] += solVal
        
        for crs in waitlist_course.keys():
            sections = waitlist_course[crs]
            for sect in sections:
                program_results_table.append((a_sem,p,crs,sect,"T",waitlist_course[crs][sect]))
        
        prior_course_size[a_sem][p] = course_size
        
        results_table.append((a_sem,p,solver.Objective().Value()))
        break

    #process_connection.executemany("INSERT INTO prior_class_table(student_id, course_code)")
    #process_connection.commit()
    
    #print([print(i) for i in program_results_table])
    #print()
    
    process_connection.executemany("INSERT INTO results_table(semester, program, min_waitlisted_students) VALUES(?,?,?)",
                                  results_table)
    process_connection.commit()
    
    process_connection.executemany("INSERT INTO program_results_table(semester, program, course_code, sect_id, waitlisted, number_of_students) VALUES(?,?,?,?,?,?)",
                                  program_results_table)
    process_connection.commit()

    
#

## DEPRECATED CODE ##

In [37]:
process_connection.execute("""
                    CREATE TABLE student_results_table(
                       rec_id INTEGER PRIMARY KEY AUTOINCREMENT DEFAULT 0, 
                       semester TEXT NOT NULL,
                       program TEXT NOT NULL,
                       course_code TEXT NOT NULL,
                       student_id INTEGER NOT NULL,
                       international TEXT NOT NULL DEFAULT 'F',
                       waitlisted TEXT NOT NULL DEFAULT 'F',
                       core_course TEXT NOT NULL DEFAULT 'F' 
                    );
                       """)
process_connection.commit()

In [None]:
def modelSolve(a_sem,cxt_grp,prior_student_info):
    #WE HAVE RATIOS HERE IN CASE WE NEED TO DO MORE ANALYSIS OR MATCH IT TO 1
    #
    #These will be our constraints
    #
    #############################################
    # INGRESS OF STUDENTS
    CAP_s = model_params['ExpN_eat'] #Rename to INCSTU_s
    #############################################
    
    cxt_grp["reg_status"] = (cxt_grp["reg_status"]-cxt_grp["reg_status"].min())/(cxt_grp["reg_status"].max()-cxt_grp["reg_status"].min()) #cxt_grp["reg_status"]/reg_term_values[a_sem]
    #total_students_list = cxt_grp["cum_total_enrollment"]/reg_term_values[a_sem]
    pcs_weighting_ratio = cxt_grp[["reg_term_code","stu_prog","crs","sect_id","reg_status"]].set_index(["crs"])["reg_status"].to_dict()
    pcs_weighting_ratio = {k : v for k,v in pcs_weighting_ratio.items()} #"("+",".join(k)+")"
    #Now that we have the ratios, we can begin with the objective function.
    #
    program_ref = {}
    course_ref = {}
    
    program_w_ref = {}
    course_w_ref = {}
    
    course_student_ref = {}
    course_student_w_ref = {}
    
    #Total Decision and Constant Variables Used
    decision_vars = {}
    const_vars = {}
    
    #Student ID Decision Variables
    x_decision_vars = {}
    w_decision_vars = {}
    a_decision_vars = {}
    b_decision_vars = {}
    
    solver = pywraplp.Solver.CreateSolver('SCIP')
    if not solver:
        print("Cannot get solver")
        return
        
    cs_l = cxt_grp["crs"].drop_duplicates().tolist()

    #
    s_inf = solver.infinity()
    
    all_cs = dict(cxt_grp[["crs","sect_id"]].drop_duplicates().itertuples(index=False,name=None))
    
    for cs in all_cs.keys():
        if cs not in course_ref.keys():
            course_ref[cs] = []
            course_w_ref[cs] = []
            
        if cs not in course_student_ref.keys():
            course_student_ref[cs] = {}
            course_student_w_ref[cs] = {}
        
        for student_id in range(1,CAP_s):
            if student_id not in course_student_ref[cs]:
                course_student_ref[cs][student_id] = []
            
            if student_id not in course_student_w_ref[cs]:
                course_student_w_ref[cs][student_id] = []
            #######################################################
            #DEFINES DECISION VARS
            # -> Establishes all student id based on the CAP_s provided
            #######################################################
            for sect in all_cs[cs]:
                #We'll have two primary decision variables: Waitlist and Student
                #This will be a minimization function for our purposes, as we want all students
                # to be included.

                x_name = f"x({cs},{sect},{student_id})"
                w_name = f"w({cs},{sect},{student_id})"

                decision_vars[x_name] = solver.IntVar(0,1,x_name)
                decision_vars[w_name] = solver.IntVar(0,1,w_name)

                if student_id not in x_decision_vars:
                    x_decision_vars[student_id] = []
                if student_id not in w_decision_vars:
                    w_decision_vars[student_id] = []

                x_decision_vars[student_id].append(decision_vars[x_name])
                w_decision_vars[student_id].append(decision_vars[w_name])

                course_ref[cs].append(decision_vars[x_name])
                course_w_ref[cs].append(decision_vars[w_name])

                course_student_ref[cs][student_id].append(decision_vars[x_name])
                course_student_w_ref[cs][student_id].append(decision_vars[w_name])
    
        # This addresses all values associated with courses. Sections will come later, but for now let's establish a baseline.
        #*pcs_weighting_ratio[cs]
        tsppcrs = total_student_population_per_crs[f"{a_sem}-{cs}"]
        #pcsw = pcs_weighting_ratio[cs]
        print(f"Total Student Population on {a_sem} for course {cs}: {tsppcrs}")
        #print(f"Weighting ratio: {pcsw}")
        const_vars[f"01_course_with_w_{cs}_constr_ratio"] = solver.Add(solver.Sum(course_ref[cs]) <= total_student_population_per_crs[f"{a_sem}-{cs}"] + solver.Sum(course_w_ref[cs]), name=f"01_course_with_w_{cs}_constr_ratio")
        #const_vars[f"02_course_with_w_{cs}_constr"] = solver.Add(solver.Sum(course_w_ref[cs]) <= course_avg_dict[cs], name=f"02_course_with_w_{cs}_constr")
        for student_id in range(1,CAP_s): 
            const_vars[f"03_course_{cs}_sections_waitlist_constr"] = solver.Add(solver.Sum(course_student_w_ref[cs][student_id]) <= 1, name=f"03_course_{cs}_sections_waitlist_constr")
            const_vars[f"04_course_{cs}_sections_constr"] = solver.Add(solver.Sum(course_student_ref[cs][student_id]) <= 1, name=f"04_course_{cs}_sections_constr")

            const_vars[f"05_one_regstat_{student_id}"] = solver.Add(solver.Sum(course_student_ref[cs][student_id]) + solver.Sum(course_student_w_ref[cs][student_id]) == 1, name=f"05_one_regstat_{student_id}")

            a_name = f"a({cs},{a_sem})"
            b_name = f"b({cs},{a_sem})"
            a_decision_vars[a_name] = solver.IntVar(0,s_inf,a_name)
            b_decision_vars[b_name] = solver.IntVar(0,s_inf,b_name)

            const_vars[f"06_student_max_classes_bounds_{student_id}"] = solver.Add(solver.Sum(x_decision_vars[student_id]) <= 3, name=f"06_student_max_classes_bounds_{student_id}")
            #const_vars[f"07_student_min_classes_bounds_{student_id}"] = solver.Add(solver.Sum(x_decision_vars[student_id]) >= 1, name=f"07_student_min_classes_bounds_{student_id}")
        const_vars[f"07_organization_course_{cs}_const"] = solver.Add(solver.Sum([solver.Sum(course_student_ref[cs][student_id]) for student_id in range(1,CAP_s)]) == a_decision_vars[a_name],name=f"07_organization_course_{cs}_const")
        const_vars[f"08_organization_wait_course_{cs}_const"] = solver.Add(solver.Sum([solver.Sum(course_student_w_ref[cs][student_id]) for student_id in range(1,CAP_s)]) == b_decision_vars[b_name],name=f"08_organization_wait_course_{cs}_const")

    
    # Minimization function 
    # -> For later * a_decision_vars[student_id]
    
    solver.Minimize(solver.Sum([solver.Sum(w_decision_vars[student_id])
                                for student_id in range(1,CAP_s)]))
    solver.Solve()
    
    return solver, x_decision_vars, w_decision_vars, a_decision_vars, b_decision_vars

In [None]:
passed_student_info = {}
student_results_table = []
program_results_table = []
results_table = []
for a_sem in all_semesters:
    cxt_grp = reg_term_courses_context.get_group(a_sem)
    prg_list = cxt_grp["stu_prog"].drop_duplicates().tolist()
    for p in prg_list:
        solver, x_decision_vars, w_decision_vars, \
            a_decision_vars,b_decision_vars = modelSolve(a_sem,cxt_grp.loc[cxt_grp["stu_prog"] == p,:],passed_student_info)
        #print('Objective value =', solver.Objective().Value())
        #print(f"Registered Students under the Classes taken for Program {p} on {a_sem}:")
        for i in list(a_decision_vars.values()):
            #print(f"{i.name()} -> {i.SolutionValue()}")
            course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
            program_results_table.append((a_sem,p,course_res,"F",i.SolutionValue()))
        #print(f"Waitlisted Students under the Classes taken for Program {p} on {a_sem}:")
        for i in list(b_decision_vars.values()):
            #print(f"{i.name()} -> {i.SolutionValue()}")
            course_res = re.findall(r"\(([A-Z0-9]+),",i.name())[0]
            program_results_table.append((a_sem,p,course_res,"T",i.SolutionValue()))
        
        #print(f"Retrieving individual students under a given class for Program {p} on {a_sem}:")
        all_students = list(x_decision_vars.keys())
        
        #NOTE: Value for international students set to true because we haven't gotten to that point yet
        for i in all_students:
            rdec = range(0,len(x_decision_vars[i]))
            for k in rdec:
                solVal = x_decision_vars[i][k].SolutionValue()
                if solVal == 0:
                    continue
                retained_val = re.findall(r"\(([A-Z0-9]+),([0-9]*),([0-9]*)",x_decision_vars[i][k].name())
                #print(retained_val)
                for r in retained_val:
                    course = r[0]
                    sect = r[1]
                    stu_id = r[2]
                    req_val = "T" if required_dictionary[f"({p},{course})"] == 1 else "F"

                    student_results_table.append((a_sem,p,course,stu_id,"T","F",req_val))
                #print(f"Course: {course} - Required {req_val}, Sect: {sect}, Stu_ID: {stu_id} -> {solVal}")
            
        #print(f"Retrieving waitlisted individual students under a given class for Program {p} on {a_sem}:")
        for i in all_students:
            rdec = range(0,len(w_decision_vars[i]))
            for k in rdec:
                solVal = w_decision_vars[i][k].SolutionValue()
                if solVal == 0:
                    continue
                retained_val = re.findall(r"\(([A-Z0-9]+),([0-9]*),([0-9]*)",w_decision_vars[i][k].name())
                for r in retained_val:
                    course = r[0]
                    sect = r[1]
                    stu_id = r[2]
                    req_val = "T" if required_dictionary[f"({p},{course})"] == 1 else "F"

                    student_results_table.append((a_sem,p,course,stu_id,"T","T",req_val))
                #print(f"{i.name()} -> {i.SolutionValue()}")
        
        results_table.append((a_sem,p,solver.Objective().Value()))
        break
    #process_connection.executemany("INSERT INTO prior_class_table(student_id, course_code)")
    #process_connection.commit()

    process_connection.executemany("INSERT INTO results_table(semester, program, min_waitlisted_students) VALUES(?,?,?)",
                                  results_table)
    process_connection.commit()
    
    process_connection.executemany("INSERT INTO program_results_table(semester, program, course_code, waitlisted, number_of_students) VALUES(?,?,?,?,?)",
                                  program_results_table)
    process_connection.commit()
    
    process_connection.executemany("INSERT INTO student_results_table(semester, program, course_code, student_id, international, waitlisted, core_course) VALUES(?,?,?,?,?,?,?)",
                                  student_results_table)
    process_connection.commit()
    
    break
#