In [2]:
import numpy as np
import cvxpy as opt
import pandas as pd
import csv
import pprint
import glob

In [3]:
"""
This cell will develop the time constraint matrix. We define this matrix as a 168 * 5 (i, j) 
matrix where i represent the 5 minute block of time and 
In this 1 and 0 matrix, 1 represents a scheduled class and 0 implies an empty spot. 
We can set a linear constraint in our linear program that says all must be less than 2. This 
constraint will be later in our optimization. 
"""
time_matrix = np.zeros((168,5))
time_constraint = (time_matrix <= 1)



In [4]:
"""
In this cell we will import all of the csv files and turn them into a parsable dataframe.
We will need to iterate through all of the files in data subfolder. 
"""
# Need to make a list of all file names
course_files = glob.glob('data/*.csv')

def pull_names(course_files):
    """
    Takes in a set of course files that have format {'data/*.csv'} and outputs a set of just department names
    """
    names = [] 
    for file in course_files:
        #take out the data
        pre, rest = file.split("/")
        #take out CSV
        index_name, blah = rest.split('.')
        #take out quarter
        department, quarter = index_name.split("-")
        names.append(department)
    return names

course_file_names = pull_names(course_files)
print("The following datasets have been properly uploaded " + str(course_file_names))

#Now we need to turn these bad boys into data frames. 
def create_dataframes(names_list, quarter):
    """
    Takes a set of CSV names (as a list) and outputs a dictionary of pandas frames 
    that can be easily accessed by department. 
    """
    pandas_frames = {}
    for file_name in names_list:
        pandas_frames[file_name] = pd.read_csv("data/" + file_name + "-" + quarter + ".csv")
        pandas_frames[file_name] = pandas_frames[file_name].reindex(index=pandas_frames[file_name].index[::-1]).reset_index()
    return pandas_frames

quarter = "WINTER2019"
frames = create_dataframes(course_file_names, quarter)

The following datasets have been properly uploaded ['MATH', 'PSTAT', 'CMPSC']


In [65]:
"""
Now need to convert to quantify and vectorize for formulating optimization problem. 
"""

def dataframe_cleaner_vectorizer(df, department):
    """
    Takes in a plaintext csv as imported from UCSB website and converts to columns that we can vectorize 
    easily.
    COLUMNS ARE AS FOLLOW:
    
    
    """
    ##############^^^^^^^^^^ TO DO ^^^^^^^^^^####################
    new_df = pd.DataFrame()
    def time_str_to_blocks(time_string):
        """
        INPUT: Time String from CSV
        OUTPUT: 2 numbers representing start and stop written as five minute block (0-168) for our matrix
        takes the current time string that we have and converts into two separate entries with start
        start_time and end_end
        """
        start_str, end_str = time_string.split(" - ")
        start_dt, end_dt = pd.to_datetime(start_str), pd.to_datetime(end_str)
        def dt_to_fiver(dt): 
            """
            Take a datetime object and convert to our matrix notation
            """
            hour = (dt.hour - 8) * 12
            block = dt.minute/5
            return (hour+block)
        start_vec, end_vec = dt_to_fiver(start_dt), dt_to_fiver(end_dt)
        return start_vec, end_vec
    new_df["tups"] = df["Time"].apply(lambda string: time_str_to_blocks(string))
    new_df["start"] = new_df["tups"].apply(lambda x: x[0])
    new_df["end"] = new_df["tups"].apply(lambda x: x[1])
    new_df = new_df.drop(columns = "tups")
    new_df["dep"] = department
    new_df["cnum"] = df["Code"].apply(lambda x: x.split(" ")[-1])
    def day_str_to_np_array(string):
        """
        INPUT: Takes a string that has the {M T W R F} format 
        converts into a 5-D row vector of 1s and 0s
        OUTPUT: Numpy vector of 1 for days where the class is there.
        """
        array = np.zeros(5)
        if "M" in string:
            array[0] = 1
        if "T" in string:
            array[1] = 1
        if "W" in string:
            array[2] = 1
        if "R" in string:
            array[3] = 1
        if "F" in string:
            array[4] = 1
        return array
    new_df["dayarr"] = df["Days"].apply(lambda daystr: day_str_to_np_array(daystr))
    series = new_df.apply(lambda x: Course(x), axis = 1)
    new_df["object"] = series
    return new_df

In [66]:
class Course:
    # Just an object representation of our class objects in our DataFrame to make our lives easier.
    def __init__(self, df_row):
        self.dep = df_row["dep"]
        self.start = df_row["start"]
        self.end = df_row["end"]
        self.cnum = df_row["cnum"]
        self.dayarr = df_row["dayarr"]
        

def encode_class_to_timemat(matrix, course_object):
    assert (matrix.shape == (168, 5))
    """
    Takes an existing schedule matrix and a course object and adds them together.
    """
    day_vec_T = course_object.dayarr.T
    time_vec = np.zeros(168)
    time_vec[int(course_object.start):int(course_object.end)] = 1
    day_vec_T = day_vec_T.reshape((1,5))
    time_vec = time_vec.reshape((168,1))
    added = time_vec @ day_vec_T
    return matrix + added
    
    

In [77]:
"""
Next step is defining vectors that represent mandatory classes and elective lists.



"""

mandatory = ["MATH 8", "MATH 104A"]
elective = ["MATH 108A", "MATH 120"]

#Vectors to ensure enrollment in a class
man_lec = np.zeros(len(mandatory))
elec_lec = np.zeros(len(elective))

#Vectors to ensure enrollment in sections
man_sec = np.zeros(len(mandatory))
elec_sec = np.zeros(len(elective))

#Need to implement the correct class number constraint
min_classes = 4 
max_classes = 4 
class_req_cons = (min_classes <= (sum(man_lec) + sum(elec_lec)) >= max_classes)

#Need to implement the constraint that all mandatory classes must be enrolled
#Must be one for every point in man_lec 
mandatory_filled_cons = (len(man_lec) == sum(man_lec))

#####Need a constraint that says if a lecture is enrolled then that given section has a section######

def section_confirm(lec, sec):
    """Section value is 1 if scheduled, 0 if not scheduled, 2 if section not needed
    """
    #######TO DO: IMPLEMENT A 2 for our section vector if a section is not needed #########
    if lec == 1:
        return (sec == 1)
    else:
        return (sec != 1)

#ensure that for mandatory and elective classes sections are confirmed

section_match_mandatory_con = (sum([section_confirm(man_lec[i], man_sec[i]) for i in range(0, len(man_lec))]) == len(man_lec))
section_match_elec_con = (sum([section_confirm(elec_lec[i], elec_sec[i]) for i in range(0, len(elec_lec))]) == len(elec_lec))




    


In [None]:
"""
In this cell we will define our parameters that we need to optimize for:
We need to optimize:
man_sec -- A vector that tells us which section to enroll in
elec_lec -- A vector that represents which courses that we must choose
elec_sec -- A vector that represenets which section to enroll in for our electives
"""

In [68]:
#test cell
obj = dataframe_cleaner_vectorizer(frames["MATH"], "MATH")["object"][0]
empty_mat = np.zeros((168, 5))
new = encode_class_to_timemat(empty_mat, obj)


In [76]:
#Optimization Solver Cell

#Human inputs
min_classes = ...
max_classes = ...
mandatory_courses = ...
elective_courses = ...

#Constraints
cons = [time_constraint, mandatory_filled_cons, section_match_mandatory_con, section_match_elec_con, class_req_cons]
