In [11]:
import pandas as pd
import numpy as np
import random

# Course Selection Process
**Two programs of the university are chosen for the course selection simulation:** <br>
1.) Political Science (B.Sc.) <br>
2.) Educational Science (B.Sc.) <br>
<br>
For simplicity a set of assumptions is made to facilitate the simulation of Course choices for both programs:
1. Choices start in Autumn
2. at least 30 CP are taken per semester
3. Mandatory Courses are given priority
4. No Exchange Semester, no MOOCs
5. Finish the degree in six semesters


## 1) Political Science Bachelor Program
Study Program can be found here:
https://www.unige.ch/sciences-societe/formations/bachelors/sciences-politiques/plan-detudes-et-reglement/ <br>
(Study Program Version might be subject to change, version used: Academic Year 2021/2022)

In [13]:
# Mapping of selection rules:
# # Electives in Part 1:
elecp1 = ['T108000','T108002','T106001','T105001'] # choose 3 out of 4
elec_seminars_p1 = ['T108001','T108003','T106006','T105006'] #choose 1

# Electives in Part 2
# choose 5 out of 7
elec_pol_p = ['T207012', 'T207035', 'T207037','T207038']                  
elec_pol_a =  ['T207034','T207036','T207004'] #choose 2 out of 3

 # Choose 2. Some Electives are also among elec_pol 
elec_part2_p = ['T207012','T207035','T207037','T207038','J2P273','J2D034','T207001','J2P234','T207039','J2P302']
elec_part2_a = ['T207034','T207036','T207004','J2D035','T207002','J2P201','J2P015','T207059']
# Choose 12 CP of free options from open faculty T206054
open_faculty_a = ['T206007','T205011','T206000','T208013','T208002','T207003','T214006','T206059','T208016','T208017','T208006','S102012','T208033','T208015','T205000','T205023']
open_faculty_p = ['T206024','T206001','T208003','T208012','T206056','T206058','T206006','T208014','S102013','T208000','T206002','T206057','T208011','T205004','T205020','T205027']
three_CP_Course_selection_a = ['12E050SCIENCES', '5869','']

In [23]:
# If a certain course is selected that represents a prerequisitve, the corresponding seminar is selected
def course_first_year():
    first_year = ['T100007','T100005','T107005','T100000','T100001','T100004','T100006','T107009','T100003']
    choice = []
    choice = np.random.choice(elecp1, size = 3,replace = False )
    for i in choice:
        first_year.append(i)
    sem_count = 0
    if 'T106001' in first_year:
        first_year.append('T106006')
        sem_count += 1
    elif 'T108002' in first_year and sem_count == 0:
        first_year.append('T108003') 
        sem_count += 1
    else: 
        sem = np.random.choice(elec_seminars_p1, size = 1)
        first_year.append(sem)
    return first_year

In [24]:
check = course_first_year()

In [25]:
len(check)
#First Year correct number of classes taken, exactly 60 CP

13

In [26]:
print(check)

['T100007', 'T100005', 'T107005', 'T100000', 'T100001', 'T100004', 'T100006', 'T107009', 'T100003', 'T106001', 'T105001', 'T108002', 'T106006']


In [27]:
def course_second_year():
    first_year = course_first_year()
    second_part_second_year = ['T200002','T200000','T200001','T207041','T207033','T207040','T200003','T207000']
    choice = []
    reduce_choice = False
    geography = False
    choice = np.random.choice(elec_pol_p, size = 3, replace = False)
    for i in choice:
        second_part_second_year.append(i)
    if 'T207038' in second_part_second_year: #Political Theory taken in spring
        second_part_second_year.append('T207002') #Ethic and politics game in Autumn third year
        reduce_choice = True 
    if 'T106006' in first_year: #geography path
        second_part_second_year.append('T206060') # Environmental game Autum second year first sem!
        geography = True
    courses = first_year + second_part_second_year
    return courses, geography, reduce_choice

In [28]:
test2,bol1,bol2 = course_second_year()
print(test2)


['T100007', 'T100005', 'T107005', 'T100000', 'T100001', 'T100004', 'T100006', 'T107009', 'T100003', 'T105001', 'T106001', 'T108002', 'T106006', 'T200002', 'T200000', 'T200001', 'T207041', 'T207033', 'T207040', 'T200003', 'T207000', 'T207012', 'T207038', 'T207037', 'T207002', 'T206060']


In [29]:
print(len(test2)) 
print(bol1)
print(bol2)

26
True
True


In [58]:
def all_courses_pol_science():
    second_part_second_year, geography, reduce_choice = course_second_year()
    second_part_third_year = [] #54 CP to choose
    choice = []
    choice = np.random.choice(elec_pol_a, size = 2, replace = False)
    for i in choice:
        second_part_third_year.append(i)
    count = 1
    while count < 3: 
        choice = np.random.choice(elec_part2_a, size = 1, replace = False)
        if choice not in second_part_third_year:
            for i in choice:
                second_part_third_year.append(i)
                count += 1
    choice = []
    if reduce_choice == True: #no geography, ethics game,take at most 3CP more
        choice = np.random.choice(three_CP_Course_selection_a, size = 1, replace = False, p =[0.4, 0.3, 0.3])
        for i in choice:
            second_part_third_year.append(i)
    elif geography == True and reduce_choice == False : # geography and no Ethics game
        second_part_third_year.append('T206054')# political geography
    elif geography == True and reduce_choice == True:
        second_part_third_year.append('') #empty, do nothing
    else:
        choice = np.random.choice(open_faculty_a, size = 1) #random choice unless geography!
        for i in choice:
            second_part_third_year.append(i)
    choice = []
    if '5869' in second_part_third_year:
        second_part_third_year.append('5870')
        choice = np.random.choice(open_faculty_p, size = 3, replace = False) #only 27 is needed
    elif '12E050SCIENCES' in second_part_third_year:
        second_part_third_year.append('12E051SCIENCES')
        choice = np.random.choice(open_faculty_p, size = 3, replace = False)
    elif reduce_choice == True: # 36 ECTS in Sem1 Second year
        choice = np.random.choice(open_faculty_p, size = 3, replace = False)
    else: # usecase if no specialties are taken
        choice = np.random.choice(open_faculty_p, size = 4,replace = False)
    for i in choice:
        second_part_third_year.append(i)
    second_part_third_year = list(filter(None, second_part_third_year))
    courses = second_part_second_year + second_part_third_year
    return courses, geography

In [59]:
full_hist, geo = all_courses_pol_science()
print(len(full_hist))
print(full_hist)
print(geo)

32
['T100007', 'T100005', 'T107005', 'T100000', 'T100001', 'T100004', 'T100006', 'T107009', 'T100003', 'T108000', 'T105001', 'T108002', 'T108003', 'T200002', 'T200000', 'T200001', 'T207041', 'T207033', 'T207040', 'T200003', 'T207000', 'T207038', 'T207012', 'T207035', 'T207002', 'T207004', 'T207034', 'J2P015', 'J2D035', 'T208003', 'T205027', 'T206057']
False


In [71]:
grades = [4.0, 4.25, 4.5, 4.75, 5, 5.25, 5.5, 5.75, 6.0]
p_grades = [1/34,2/34,4/34,6/34,8/34,6/34,4/34,2/34,1/34]
len(p_grades)
print(p_grades)

[0.029411764705882353, 0.058823529411764705, 0.11764705882352941, 0.17647058823529413, 0.23529411764705882, 0.17647058823529413, 0.11764705882352941, 0.058823529411764705, 0.029411764705882353]


### Obtain Training Data

In [60]:
df = pd.DataFrame()

for i in range(1,301):
    df_temp = pd.DataFrame()
    course_selection, geography = all_courses_pol_science()
    df_temp['CourseID'] = course_selection
    df_temp['StudentID'] = [i] * len(df_temp)
    df_temp['Grades'] = np.random.choice(grades, size = len(df_temp), p = p_grades)
    df_temp['Geography'] = [geography] * len(df_temp)
    df = pd.concat([df, df_temp], ignore_index = True)
df.head(10)    

Unnamed: 0,CourseID,StudentID,Grades,Geography
0,T100007,1,5.75,True
1,T100005,1,5.25,True
2,T107005,1,4.75,True
3,T100000,1,5.25,True
4,T100001,1,5.0,True
5,T100004,1,4.75,True
6,T100006,1,5.75,True
7,T107009,1,5.5,True
8,T100003,1,5.0,True
9,T106001,1,4.5,True


In [68]:
len(df.StudentID.unique()) # 300 Training subjects for political Science

300

In [80]:
df[df.StudentID ==1].Geography.unique()

array([ True])

In [87]:
# Look up table is created to check for Geography Orientation at a later step
look_up_table = pd.DataFrame()

for i in df.StudentID.unique():
    df_temp = pd.DataFrame()
    df_temp['StudentID'] = [i]
    df_temp['Geography'] = df[df.StudentID == i].Geography.unique()
    look_up_table = pd.concat([look_up_table, df_temp])

look_up_table.set_index('StudentID')

In [94]:
look_up_table.tail(10)

Unnamed: 0,StudentID,Geography
0,291,True
0,292,True
0,293,True
0,294,False
0,295,False
0,296,True
0,297,True
0,298,True
0,299,True
0,300,True


In [91]:
look_up_table.Geography.value_counts()

True     230
False     70
Name: Geography, dtype: int64

In [67]:
df.to_csv('obtained_data/train_pol_BSc.csv',index = False)

In [95]:
look_up_table.to_csv('obtained_data/train_pol_BSc.csv',index = False)

## 2) Educational Science Bachelor Program
The program outline is to be found at: <br>
https://pgc.unige.ch/main/study-plans?year=2022&fac=464&studyPlanDep=default-value

In [4]:
# First Cycle courses

# Domain 1: Choose 4
dom1 = ['742000', '742001', '742002', '742003', '742004', '742006', '742111', '742007']
# Domain 2: Choose 4
dom2 = ['742060','7420601', '742062', '742064', '742066', '742067']
# Domain Psy: Choose 1 (or 1 from Dom 1 or 2)
dom_psy = ['71105', '71120', '71121', '74112', '71133']
# Seminar in SITS: choose 1
sem_sits = ['7420AA', '7420AB', '7420AC', '7420AD', '7420AE', '7420AF', '7420AH', '7421AA', '7421AB', '7421AC', '7421AD', '7421AE', '7421AG']
# Seminar in education: choose 1
sem_edu = ['742120', '742121']


In [58]:
def first_cycle():
    first_cycle = []
    domains = [dom1, dom2]
    names = ['dom1', 'dom2', 'dom_psy']
    for i in domains:
        choice = []
        choice = np.random.choice(i, size = 4, replace = False)
        [first_cycle.append(j) for j in choice]
    choice = np.random.choice(names, size = 1, replace = False)
    if choice == 'dom1':
        course = np.random.choice(dom1, size = 1, replace = False)
        first_cycle.append(course[0])
    elif choice == 'dom2':
        course = np.random.choice(dom2, size = 1, replace = False)
        first_cycle.append(course[0])
    else:
        course = np.random.choice(dom_psy, size = 1, replace = False)
        first_cycle.append(course[0])
    sem = np.random.choice(sem_sits, size = 1, replace = False)
    first_cycle.append(sem[0])
    sem = np.random.choice(sem_edu, size = 1, replace = False)
    first_cycle.append(sem[0])
    return first_cycle

In [59]:
test1 = first_cycle()
len(test1) # 11 courses are chosen during the first cycle

11

In [2]:
# Second Cycle Course (BSEF Only)

# Domain 1 (choose 6)
dom1_cyc2 = ['742215', '742205', '742206', '742208', '742209', '742211', '742327', '742214', 'J2M292', '7414H']

# Domain 2 (choose 6)
dom2_cyc2 = ['742261', '742262', '742272', '742270', '742840','742841', '742851', '742271', '742871']

# Domain 3 (choose 6)
dom3_cyc2 = ['742329', '742331', '742334', '742336', '742337', '742338','742340', '742342', '742343', '742344']

# Domain 4 (choose 6)
dom4_cyc2 = ['742380', '742382', '742383', '742385', '742386', '742396', '742397', '742389', '752254', '742861', '742870', '74110', '74146']

# Domain 5 (choose all)
dom5_cyc2 = ['7422A5', '7422A8', '7422A7', '7417I']

# Domain 6 
# (Methods: choose 4 in total, at least one of each subdomain)
m1 = ['742450', '742452']
m2 = ['742481', '742481','742483', '742514']
m3 = ['742515', '742516', '742517', '742518']

# Seminar: Choose 1
sem_cyc2 = ['7422AA', '7422AG', '7422AH', '7422AN', '7422AQ', '7422AR', '7422AS', '7422AT', '7422AU', '7422AV', '7422AK']


In [46]:
def domVI():
    domains = [m1, m2, m3]
    names = ['m1', 'm2', 'm3']
    dom6 = []
    for i in domains:
        methods = []
        methods = np.random.choice(i, size = 1, replace = False)
        [dom6.append(j) for j in methods]
    while len(dom6) < 4:
        extra = np.random.choice(names, size = 1, replace = False)
        methods = []
        if extra == 'm1': 
            methods = np.random.choice(m1, size = 1, replace = False)
            [dom6.append(j) for j in methods if j not in dom6]
        elif extra == 'm2':
            methods = np.random.choice(m2, size = 1, replace = False)
            [dom6.append(j) for j in methods if j not in dom6]
        else:
            methods = np.random.choice(m3, size = 1, replace = False)
            [dom6.append(j) for j in methods if j not in dom6]    
    return dom6

In [51]:
test6 = domVI()
len(test6)

4

In [52]:
test6

['742450', '742481', '742517', '742452']

In [55]:
def second_cycle():
    domains = [dom1_cyc2, dom2_cyc2, dom3_cyc2, dom4_cyc2] # Domains 1-5
    second_cycle = []
    for i in domains:
        choice = []
        choice = np.random.choice(i, size = 6, replace = False )
        [second_cycle.append(j) for j in choice]
    [second_cycle.append(k) for k in dom5_cyc2]
    sem = np.random.choice(sem_cyc2, size = 1, replace = False)
    second_cycle.append(sem[0])
    dom6 = domVI()
    [second_cycle.append(l) for l in dom6]
    return second_cycle

In [56]:
test = second_cycle()
set(dom5_cyc2) & set(test) # testing with intersection of sets to validate the number of choices made

{'7417I', '7422A5', '7422A7', '7422A8'}

In [69]:
len(test)

33

In [66]:
def all_courses_edu():
    all_courses = []
    first = first_cycle()
    second = second_cycle()
    [all_courses.append(x) for x in first]
    [all_courses.append(y) for y in second]
    return all_courses

In [67]:
complete_record = all_courses_edu()

In [68]:
len(complete_record)
# 11 from first cycle
# 33 from second cycle

44

In [72]:
df_edu = pd.DataFrame()

for i in range(301,601):
    df_temp = pd.DataFrame()
    course_selection = all_courses_edu()
    df_temp['CourseID'] = course_selection
    df_temp['StudentID'] = [i] * len(df_temp)
    df_temp['Grades'] = np.random.choice(grades, size = len(df_temp), p = p_grades)
    df_edu = pd.concat([df_edu, df_temp], ignore_index = True)
df_edu.head(10)  

Unnamed: 0,CourseID,StudentID,Grades
0,742004,301,5.75
1,742111,301,4.25
2,742003,301,5.0
3,742000,301,4.75
4,742066,301,5.25
5,742064,301,5.25
6,742062,301,4.75
7,742060,301,5.25
8,71105,301,4.5
9,7421AB,301,4.5
