In [15]:
import numpy as np
import cvxpy as opt
import pandas as pd
import csv
import pprint
import glob

In [16]:
"""
This cell will develop the time constraint matrix. We define this matrix as a 168 * 5 (i, j) 
matrix where i represent the 5 minute block of time and 
In this 1 and 0 matrix, 1 represents a scheduled class and 0 implies an empty spot. 
We can set a linear constraint in our linear program that says all must be less than 2. This 
constraint will be later in our optimization. 
"""
time_matrix = np.zeros((168,5))
time_constraint = (time_matrix <= 1)



In [23]:
"""
In this cell we will import all of the csv files and turn them into a parsable dataframe.
We will need to iterate through all of the files in data subfolder. 
"""
# Need to make a list of all file names
course_files = glob.glob('data/*.csv')

def pull_names(course_files):
    """
    Takes in a set of course files that have format {'data/*.csv'} and outputs a set of just department names
    """
    names = [] 
    for file in course_files:
        #take out the data
        pre, rest = file.split("/")
        #take out CSV
        index_name, blah = rest.split('.')
        #take out quarter
        department, quarter = index_name.split("-")
        names.append(department)
    return names

course_file_names = pull_names(course_files)
print("The following datasets have been properly uploaded " + str(course_file_names))

#Now we need to turn these bad boys into data frames. 
def create_dataframes(names_list, quarter):
    """
    Takes a set of CSV names (as a list) and outputs a dictionary of pandas frames 
    that can be easily accessed by department. 
    """
    pandas_frames = {}
    for file_name in names_list:
        pandas_frames[file_name] = pd.read_csv("data/" + file_name + "-" + quarter + ".csv")
        pandas_frames[file_name] = pandas_frames[file_name].reindex(index=pandas_frames[file_name].index[::-1]).reset_index()
    return pandas_frames

quarter = "WINTER2019"
frames = create_dataframes(course_file_names, quarter)

The following datasets have been properly uploaded ['MATH', 'PSTAT', 'CMPSC']


In [69]:
"""
Now need to convert to quantify and vectorize for formulating optimization problem. 
"""

def dataframe_cleaner_vectorizer(df, department):
    """
    Takes in a plaintext csv as imported from UCSB website and converts to columns that we can vectorize 
    easily.
    COLUMNS ARE AS FOLLOW:
    
    
    """
    new_df = pd.DataFrame()
    def time_str_to_blocks(time_string):
        """
        INPUT: Time String from CSV
        OUTPUT: 2 numbers representing start and stop written as five minute block (0-168) for our matrix
        takes the current time string that we have and converts into two separate entries with start
        start_time and end_end
        """
        start_str, end_str = time_string.split(" - ")
        start_dt, end_dt = pd.to_datetime(start_str), pd.to_datetime(end_str)
        def dt_to_fiver(dt): 
            """
            Take a datetime object and convert to our matrix notation
            """
            hour = (dt.hour - 8) * 6
            block = dt.minute/5
            return (hour+block)
        start_vec, end_vec = dt_to_fiver(start_dt), dt_to_fiver(end_dt)
        return start_vec, end_vec
    new_df["tups"] = df["Time"].apply(lambda string: time_str_to_blocks(string))
    new_df["start"] = new_df["tups"].apply(lambda x: x[0])
    new_df["end"] = new_df["tups"].apply(lambda x: x[1])
    new_df = new_df.drop(columns = "tups")
    new_df["dep"] = department
    new_df["cnum"] = df["Code"].apply(lambda x: x.split(" ")[-1])
    return new_df

In [70]:
dataframe_cleaner_vectorizer(frames["MATH"], "MATH").head()

Unnamed: 0,start,end,dep,cnum
0,12.0,22.0,MATH,137A
1,36.0,45.0,MATH,124A
2,30.0,39.0,MATH,122A
3,18.0,27.0,MATH,122A
4,24.0,33.0,MATH,120


In [62]:
#Optimization Solver Cell
cons = [time_constraint]
frames["MATH"].head()

Unnamed: 0,index,Code,Title,Time,Location,Professor,Size,Days
0,63,MATH 137A,Graph and Network Theory,10:00am - 10:50am,ARTS 1349,CASTEELS K L,49 / 50,M W F
1,62,MATH 124A,Partial Differential Equations,2:00pm - 3:15pm,PHELP2516,SIDERIS T C,50 / 50,T R
2,61,MATH CS 122A,Complex Variables I,1:00pm - 2:15pm,CRST 164B,LABUTIN D A,9 / 25,T R
3,60,MATH 122A,Introduction to Theory of Complex Variables,11:00am - 12:15pm,GIRV 1115,PUTINAR M,45 / 45,T R
4,59,MATH CS 120,Special Topics,12:00pm - 1:15pm,CRST 164B,BUENO CACHADI,11 / 25,M W
