# Let's create a dataframe of classes for students

In [410]:
import random
from random import sample
import pandas as pd

### Create student class

In [411]:
class Student:
    def __init__(self, classes, athlete, chorus):
        self.classes = classes
        self.athlete = athlete
        self.chorus = chorus
    def __str__(self):
        return str(self.classes)

### Creating Students

In [412]:
# every student has 6 classes
possibleClasses = ['History', 'Biology', 'Chemistry', 'Physics', 'English', 'Optimization', 'Machine Learning', 'Business', 'Astro Engineering', 'Aero Engineering', 'Political Science', 'Social Science', 'Chinese', 'French', 'Spanish', 'Algebra']
def genStudent(num):
    chor = 1 if random.uniform(0,1) > .8 else 0
    if chor == 1:
        classes = sample(possibleClasses, 4)
        classes.extend(["Chorus1", "Chorus2"])
        student = Student(classes, 0, 1)
    else:
        classes = sample(possibleClasses, 6)
        athlete = 1 if random.uniform(0,1) > 0.7 else 0
        student = Student(classes, athlete, 0)

    return student


In [413]:
students = [0] * 1000
for i in range(1000):
    students[i] = genStudent(i)
df = pd.DataFrame(columns = ["Class 1", "Class 2", "Class 3", "Class 4", "Class 5", "Class 6", "Athlete", "Chorus"])
for i in range(1000):
    df = df.append({
               'Class 1':  students[i].classes[0],
               'Class 2':  students[i].classes[1],
               'Class 3':  students[i].classes[2],
               'Class 4':  students[i].classes[3],
               'Class 5':  students[i].classes[4],
               'Class 6':  students[i].classes[5],
               'Athlete': students[i].athlete,
               'Chorus': students[i].chorus},
               ignore_index = True)

### Creating Classroom Capacities

In [414]:
# capacity for each classroom
classrooms = pd.DataFrame(columns=["Capacity"])
for i in range(30):
    classrooms = classrooms.append({
                       'Capacity': random.randint(20,50)},
                       ignore_index = True)

In [430]:
classrooms

Unnamed: 0,Capacity
0,26
1,21
2,29
3,38
4,32
5,25
6,20
7,26
8,23
9,34


### Which classroom can have which class

In [415]:
# which classroom each class can be in
subjectClassrooms = pd.DataFrame(columns = ['Class', 'Classrooms'])
for i in range(len(possibleClasses)):
    subjectClassrooms = subjectClassrooms.append({
        'Class': possibleClasses[i],
        'Classrooms': sample(range(30), random.randint(6,20))},
        ignore_index = True)

### Which days (A or B) have classes available

In [416]:
daysAvailable = pd.DataFrame(columns = ["Class", "A", "B"])
for i in range(len(possibleClasses)):
    temp = random.uniform(0,1)
    if temp > .35:
        A = 1
        B = 1
    else:
        temp = random.uniform(0,1)
        if temp > .5:
            A = 1
            B = 0
        else:
            A = 0
            B = 1
    daysAvailable = daysAvailable.append({
        'Class': possibleClasses[i],
        'A': A,
        'B': B},
        ignore_index = True)

### Which periods each class can be in

In [417]:
periods =  range(8)

classPeriods = pd.DataFrame(columns = ["Class", "PeriodsA", "PeriodsB"])
for i in range(len(possibleClasses)):

    if daysAvailable.loc[daysAvailable['Class'] == possibleClasses[i], 'A'].any() == 1:
        PeriodsA = sample(range(8), random.randint(2,4))
    else:
        PeriodsA = []
    if daysAvailable.loc[daysAvailable['Class'] == possibleClasses[i], 'B'].any() == 1:
        PeriodsB = sample(range(8), random.randint(2,4))
    else:
        PeriodsB = []

    classPeriods = classPeriods.append({
        'Class': possibleClasses[i],
        'PeriodsA': PeriodsA,
        'PeriodsB': PeriodsB},
        ignore_index = True)

In [418]:
classPeriods

Unnamed: 0,Class,PeriodsA,PeriodsB
0,History,"[5, 3, 4, 2]","[4, 1, 5]"
1,Biology,"[7, 4, 5]",[]
2,Chemistry,[],"[3, 1, 2, 4]"
3,Physics,"[3, 0, 2]","[5, 6]"
4,English,"[7, 4, 3]","[4, 1]"
5,Optimization,"[6, 5, 7]","[2, 7, 0]"
6,Machine Learning,"[6, 3, 1, 2]","[2, 4, 0]"
7,Business,"[5, 6, 7]","[3, 6]"
8,Astro Engineering,[],"[6, 7, 2, 3]"
9,Aero Engineering,[],"[6, 0, 2]"


### Ensuring this is a feasible problem

##### Count num of students in each section

In [419]:
cnt = {}
temp = 0
for i in possibleClasses:
    temp = 0
    for j in range(len(df)):
        for k in range(6):
            if df.iloc[j,k] == i:
                temp += 1
    cnt[i] = temp


In [420]:
cnt

{'History': 350,
 'Biology': 356,
 'Chemistry': 371,
 'Physics': 356,
 'English': 365,
 'Optimization': 357,
 'Machine Learning': 332,
 'Business': 354,
 'Astro Engineering': 331,
 'Aero Engineering': 347,
 'Political Science': 348,
 'Social Science': 352,
 'Chinese': 343,
 'French': 338,
 'Spanish': 366,
 'Algebra': 344}

##### Count num of possible students in each class

In [421]:
# I want to count the number of students possible in neach class for each day
totCap = {}
temp = 0
for i in possibleClasses:
    temp = 0
    for j in subjectClassrooms.loc[subjectClassrooms['Class'] == i, 'Classrooms'].values[0]:
        temp +=  classrooms.iloc[j,:].values[0]
    totCap[i] = temp

In [422]:
# This is only for one day and not taking into account multiple periods
totCap

{'History': 480,
 'Biology': 444,
 'Chemistry': 432,
 'Physics': 567,
 'English': 318,
 'Optimization': 505,
 'Machine Learning': 490,
 'Business': 600,
 'Astro Engineering': 357,
 'Aero Engineering': 269,
 'Political Science': 699,
 'Social Science': 668,
 'Chinese': 504,
 'French': 255,
 'Spanish': 381,
 'Algebra': 290}

# Creating function to make multiplle datasets

In [434]:
from math import floor

def createDataFrames(numStudents):

    # generate the students
    students = [0] * numStudents
    for i in range(numStudents):
        students[i] = genStudent(i)
    df = pd.DataFrame(columns = ["Class 1", "Class 2", "Class 3", "Class 4", "Class 5", "Class 6", "Athlete", "Chorus"])
    for i in range(numStudents):
        df = df.append({
                'Class 1':  students[i].classes[0],
                'Class 2':  students[i].classes[1],
                'Class 3':  students[i].classes[2],
                'Class 4':  students[i].classes[3],
                'Class 5':  students[i].classes[4],
                'Class 6':  students[i].classes[5],
                'Athlete': students[i].athlete,
                'Chorus': students[i].chorus},
                ignore_index = True)

    # capacity for each classroom
    classroomCapacity = pd.DataFrame(columns=["Capacity"])
    for i in range(floor(numStudents/30)):
        classroomCapacity = classroomCapacity.append({
            'Capacity': random.randint(30,70)},
            ignore_index = True)

    # which classroom each class can be in
    subjectClassrooms = pd.DataFrame(columns = ['Class', 'Classrooms'])
    for i in range(len(possibleClasses)):
        subjectClassrooms = subjectClassrooms.append({
            'Class': possibleClasses[i],
            'Classrooms': sample(range(floor(numStudents/30)), random.randint(6,20))},
            ignore_index = True)

    # which day A or B class is available
    daysAvailable = pd.DataFrame(columns = ["Class", "A", "B"])
    for i in range(len(possibleClasses)):
        temp = random.uniform(0,1)
        if temp > .35:
            A = 1
            B = 1
        else:
            temp = random.uniform(0,1)
            if temp > .5:
                A = 1
                B = 0
            else:
                A = 0
                B = 1
        daysAvailable = daysAvailable.append({
            'Class': possibleClasses[i],
            'A': A,
            'B': B},
            ignore_index = True)

    # which periods people have classes
    periods =  range(8)
    classPeriods = pd.DataFrame(columns = ["Class", "PeriodsA", "PeriodsB"])
    for i in range(len(possibleClasses)):

        if daysAvailable.loc[daysAvailable['Class'] == possibleClasses[i], 'A'].any() == 1:
            PeriodsA = sample(range(8), random.randint(2,4))
        else:
            PeriodsA = []
        if daysAvailable.loc[daysAvailable['Class'] == possibleClasses[i], 'B'].any() == 1:
            PeriodsB = sample(range(8), random.randint(2,4))
        else:
            PeriodsB = []

        classPeriods = classPeriods.append({
            'Class': possibleClasses[i],
            'PeriodsA': PeriodsA,
            'PeriodsB': PeriodsB},
            ignore_index = True)

    return df, classroomCapacity, subjectClassrooms, daysAvailable, classPeriods

    

In [441]:
studentClasses, classroomCapacity, subjectClassrooms, daysAvailable, classPeriods = createDataFrames(10000)

In [447]:
classPeriods

Unnamed: 0,Class,PeriodsA,PeriodsB
0,History,"[2, 6, 7, 0]","[0, 1, 3, 7]"
1,Biology,[],"[1, 4]"
2,Chemistry,"[2, 1]","[4, 0, 5]"
3,Physics,"[0, 5]","[3, 7, 1, 0]"
4,English,"[1, 4, 3]","[6, 7, 5, 3]"
5,Optimization,"[0, 5, 6]","[6, 5, 1, 7]"
6,Machine Learning,"[5, 6, 4]","[4, 3]"
7,Business,"[7, 5, 3, 2]","[6, 7, 5]"
8,Astro Engineering,"[7, 5, 1]","[7, 0, 6, 5]"
9,Aero Engineering,"[6, 3, 7, 0]",[]


### Making dataframes

In [423]:
daysAvailable.to_csv("C:\\Users\\PC\\Documents\\MIT\\Opt Project\\Data\\daysAvailable.csv")

In [424]:
studentClasses.to_csv("C:\\Users\\PC\\Documents\\MIT\\Opt Project\\Data\\studentClasses.csv")

In [425]:
subjectClassrooms.to_csv("C:\\Users\\PC\\Documents\\MIT\\Opt Project\\Data\\subjectClassrooms.csv")

In [426]:
classroomCapacity.to_csv("C:\\Users\\PC\\Documents\\MIT\\Opt Project\\Data\\classroomCapacity.csv")

In [427]:
classPeriods.to_csv("C:\\Users\\PC\\Documents\\MIT\\Opt Project\\Data\\classPeriods.csv")