# Parse CSE File to Get Course List

In [None]:
import PyPDF2
import pandas as pd

course_type = 'CSE'
pdfFileObj = open('ComputingScienceSummaries.pdf', 'rb')
pdfReader = PyPDF2.PdfReader(pdfFileObj)
  
n_pages = len(pdfReader.pages)
text = ''

for n in range(n_pages):
    pageObj = pdfReader.pages[n]
    text += pageObj.extract_text()

pdfFileObj.close()

courses = {}
use_next_line = False

for i, line in enumerate(text.splitlines()):
    if line.strip().startswith(course_type) and line[9] == ' ':
        course = line.replace('  ', ' ').strip()
    if '©' in line and course_type in line:
        # 100 © Alberta Education, Alberta, Canada   Revised 2010  CSE2910:  CSE PROJECT B  
        course = course_type + line.split(course_type)[1]
        course = course.replace('  ', ' ').strip()
    if 'Prerequ' in line:
        preq = line.split(': ')[1].strip().replace(' ', '')
        if 'None' in preq:
            preq = ''
        courses[course] = [preq]
    if use_next_line:
        #print('True', line)
        preq2 = line.split(': ')[0].strip().replace(' ', '')
        if len(preq2) != 0: # in case there is no prereq on this line
            preq = preq + ',' + preq2
        courses[course] = [preq]
        use_next_line = False
    if 'Prerequisites' in line:
        #print('use next line', line)
        #print(i, course, preq, '---', line)
        use_next_line = True
    #if '1120' in line:
    #if '1010' in line:
    #    print(i, use_next_line, line)
cse = pd.DataFrame.from_dict(courses, orient='index', columns=['Prerequisites']).reset_index()
cse.columns = ['Course', 'Prerequisites']
cse

In [None]:
for i, line in enumerate(text.splitlines()):
    if '©' in line:
        print(i, line)

In [None]:
cse[cse['Prerequisites'].str.contains(course_type)]

In [None]:
cse[cse['Course'].str[3]=='3']

In [None]:
cse.to_csv(course_type+'.csv', index=False)

## Import from CSE.csv file

In [None]:
import pandas as pd
cse = pd.read_csv('CSE.csv')
#df.sort_values('Course', ascending=False)

In [None]:
# add graphviz to path
import os

# if it is Windows
if os.name == 'nt':
    os.environ["PATH"] += os.pathsep + 'C:/Program Files/Graphviz/bin'

In [None]:
import graphviz

dot = graphviz.Digraph(format='png') # or 'pdf' or 'svg'

for row in cse.itertuples():
    course_number = row.Course.split(':')[0]#[3:]
    if '950' not in course_number:
        if course_number[3] == '3':
            #dot.node(course_number, shape='diamond')
            shape = 'diamond'
        elif course_number[3] == '2':
            #dot.node(course_number, shape='ellipse')
            shape = 'ellipse'
        else:
            #dot.node(course_number, shape='box')
            shape = 'box'
        
        color = 'black'
        if course_number[4] == '9':
            color = 'red'

        dot.node(course_number, shape=shape, color=color)
        try:
            if len(row.Prerequisites) > 3:
                for preq in row.Prerequisites.split(','):
                    preq_number = preq#[3:]
                    dot.edge(preq_number, course_number)
        except:
            pass
dot

In [None]:
# render the graph as a png
file_string = '../images/CSE'
dot.render(file_string)
import os
os.remove(file_string)

# NET Courses

In [None]:
import PyPDF2
import pandas as pd

course_type = 'NET'
pdfFileObj = open('NetworkingSummary.pdf', 'rb')

pdfReader = PyPDF2.PdfReader(pdfFileObj)
n_pages = len(pdfReader.pages)
text = ''
for n in range(n_pages):
    pageObj = pdfReader.pages[n]
    text += pageObj.extract_text()
pdfFileObj.close()

courses = {}
use_next_line = False

for i, line in enumerate(text.splitlines()):
    if line.strip().startswith(course_type):# and line[9] == ' ':
        course = line.replace('  ', ' ').strip()
        use_next_line = False
    if '©' in line and course_type in line and ':' in line:
        course = line.split(':')[0].strip().split(' ')[-1] + ':' + line.split(':')[1].strip()
        use_next_line = False
    if 'Prerequ' in line:
        try:
            preq = line.split(':')[1].strip().replace(' ', '')
        except:
            preq = ''
        if 'None' in preq:
            preq = ''
        courses[course] = [preq]
    if use_next_line:
        preq2 = line.split(': ')[0].strip().replace(' ', '')
        if len(preq2) != 0: # in case there is no prereq on this line
            preq = preq + ',' + preq2
        courses[course] = [preq]
    if 'Prerequisites' in line:
        use_next_line = True
net = pd.DataFrame.from_dict(courses, orient='index', columns=['Prerequisites']).reset_index()
net.columns = ['Course', 'Prerequisites']
net

In [None]:
net['Prerequisites'] = net['Prerequisites'].str.replace('NET2030','NET2030,NET2040,NET2050,NET2060,NET2070')

In [None]:
net[net['Prerequisites'].str.contains(course_type)]

In [None]:
net.to_csv(course_type+'.csv', index=False)

## Read from NET.csv

In [None]:
net = pd.read_csv('NET.csv')

In [None]:
import graphviz
dot = graphviz.Digraph(format='png') # or 'pdf' or 'svg'

for row in net.itertuples():
    course_number = row.Course.split(':')[0]
    if '950' not in course_number:
        if course_number[3] == '3':
            shape = 'diamond'
        elif course_number[3] == '2':
            shape = 'ellipse'
        else:
            shape = 'box'

        color = 'black'
        if course_number[4] == '9':
            color = 'red'

        dot.node(course_number, shape=shape, color=color)#, fontcolor='blue')
        try:
            if row.Prerequisites != '':
                for preq in row.Prerequisites.split(','):
                    dot.edge(preq, course_number)
        except:
            pass
dot

In [None]:
# render the graph as a png
file_string = '../images/NET'
dot.render(file_string)
import os
os.remove(file_string)

# ELT Courses

In [None]:
import PyPDF2
import pandas as pd

course_type = 'ELT'
pdfFileObj = open('ElectroTechnologiesSummaries.pdf', 'rb')

pdfReader = PyPDF2.PdfReader(pdfFileObj)
n_pages = len(pdfReader.pages)
text = ''
for n in range(n_pages):
    pageObj = pdfReader.pages[n]
    text += pageObj.extract_text()
pdfFileObj.close()

courses = {}
use_next_line = False

for i, line in enumerate(text.splitlines()):
    if line.strip().startswith(course_type):# and line[9] == ' ':
        course = line.replace('  ', ' ').strip()
        use_next_line = False
    if '©' in line and course_type in line and ':' in line:
        course = line.split(':')[0].strip().split(' ')[-1] + ':' + line.split(':')[1].strip()
        use_next_line = False
    if 'Prerequ' in line:
        try:
            preq = line.split(':')[1].strip().replace(' ', '')
        except:
            preq = ''
        if 'None' in preq:
            preq = ''
        courses[course] = [preq]
    if use_next_line:
        preq2 = line.split(': ')[0].strip().replace(' ', '')
        if len(preq2) != 0: # in case there is no prereq on this line
            preq = preq + ',' + preq2
        courses[course] = [preq]
    if 'Prerequisites' in line:
        use_next_line = True
elt = pd.DataFrame.from_dict(courses, orient='index', columns=['Prerequisites']).reset_index()
elt.columns = ['Course', 'Prerequisites']
elt

In [None]:
elt.to_csv(course_type+'.csv', index=False)

## Read from ELT.csv

In [None]:
import pandas as pd
elt = pd.read_csv('ELT.csv')

In [None]:
recommended = ['1010','1130','1140','2010','2140','2160','2240','2170','3170','3150','3180','3190','3200','1910','2910','2920','3910','3920']
design_courses = ['DES1020','DES1040','DES2045']

import graphviz
dot = graphviz.Digraph(format='png') # or 'pdf' or 'svg'

for course_number in design_courses:
    if course_number[3] == '3':
        shape = 'diamond'
    elif course_number[3] == '2':
        shape = 'ellipse'
    else:
        shape = 'box'
    color = 'black'
    fontcolor = 'grey'
    dot.node(course_number, shape=shape, color=color, fontcolor=fontcolor)
    try:
        dot.edge(previous_course, course_number)
    except:
        pass
    previous_course = course_number

for row in elt.itertuples():
    course_number = row.Course.split(':')[0]
    if '950' not in course_number:
        if course_number[3:] in recommended:
            if course_number[3] == '3':
                shape = 'diamond'
            elif course_number[3] == '2':
                shape = 'ellipse'
            else:
                shape = 'box'

            fontcolor = 'black'

            color = 'black'
            if course_number[4] == '9':
                color = 'red'
            dot.node(course_number, shape=shape, color=color, fontcolor=fontcolor)

        try:
            if row.Prerequisites != '':
                for preq in row.Prerequisites.split(','):
                    if preq[3:] in recommended:
                        dot.edge(preq, course_number)
        except:
            pass
dot

In [None]:
# render the graph as a png
file_string = '../images/ELT'
dot.render(file_string)
import os
os.remove(file_string)

# Combining All Together

Download CSV files from PASI

In [None]:
import pandas as pd
from time import sleep
students_df = pd.read_excel('../../../StudentCoursesAndMarks/studentRosterReport.xlsx')
student_id_list = students_df['Id'].tolist()
#student_id_list = students_df['Id'].tail(3).tolist()

from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
# wait for login
driver.get('https://extranet.education.alberta.ca/PASI/PASIprep/')
input("Press Enter to continue...")

for student_id in student_id_list:
    url = 'https://extranet.education.alberta.ca/PASI/PASIprep/view-student/'+str(student_id) + '?left=Courses%20%26%20Marks'
    driver.get(url)
    sleep(7)
    try:
        spans = driver.find_elements(By.TAG_NAME, 'span')
        for span in spans:
            if 'Export' in span.get_attribute('innerHTML'):
                span.click()
                break
    except:
        print(student_id, 'not found')

In [None]:
student_marks = pd.DataFrame()
for student_id in student_id_list:
    filename = '../../../StudentCoursesAndMarks/' + str(student_id)+' - CoursesAndMarks.csv'
    try:
        new_df = pd.read_csv(filename)
        new_df['StudentId'] = student_id
        student_marks = pd.concat([student_marks, new_df])
    except:
        print(filename, 'not found')
student_marks

In [None]:
import pandas as pd

c = pd.read_csv('CSE.csv').fillna('')
n = pd.read_csv('NET.csv').fillna('')
e = pd.read_csv('ELT.csv').fillna('')

# removed unused ELT courses
recommended = ['1010','1130','1140','1910','2010','2140','2160','2240','2170','2910','2920','3170','3150','3180','3190','3200','3910','3920']
elt_recommended_list = ['ELT' + x for x in recommended]
e = e[e['Course'].str[:7].isin(elt_recommended_list)]

design_courses = {'DES1020: THE DESIGN PROCESS':'','DES1040: 3-D DESIGN 1':'DES1020','DES2045: 3-D DESIGN 2':'DES1040'}
d = pd.DataFrame.from_dict(design_courses, orient='index', columns=['Prerequisites']).reset_index()
d.columns = ['Course', 'Prerequisites']

df = pd.concat([c,e,n,d])
df.reset_index(drop=True, inplace=True)

# split column to course code and course name
df['Course Number'] = df['Course'].str.split(':').str[0].str.strip()
df['Course Name'] = df['Course'].str.split(':').str[1].str.strip()
df

In [None]:
df.to_csv('all_courses.csv', index=False)

In [None]:
import pandas as pd
import graphviz
import os
from PIL import Image

df = pd.read_csv('all_courses.csv')

def coursesVisualized(filtered_df):
    dot = graphviz.Digraph(format='png') # or 'pdf' or 'svg'
    for row in filtered_df.itertuples():
        course_number = row.Course.split(':')[0]

        if course_number[3] == '3':
            shape = 'diamond'
        elif course_number[3] == '2':
            shape = 'ellipse'
        else:
            shape = 'box'

        fontcolor = 'black'
        color = 'black'
        #if course_number[4] == '9': # project courses
        #    color = 'red'
        dot.node(course_number, shape=shape, color=color, fontcolor=fontcolor)

        try:
            if row.Prerequisites != '':
                for preq in row.Prerequisites.split(','):
                    dot.edge(preq, course_number)
        except:
            pass
    #display(dot)
    # render the graph as a png
    #file_count = len([name for name in os.listdir('images/') if os.path.isfile(name)])
    #file_string = 'images/' + str(file_count) + '.png'
    #dot.render(file_string)
    #os.remove(file_string)
    return(dot)


dot0 = coursesVisualized(df[df['Course Number'].str.contains('CSE')])
dot0.render('images/0')

dot1 = coursesVisualized(df[df['Course Number'].str.contains('DES') | df['Course Number'].str.contains('ELT')].sort_values(by='Course Number'))
dot1.render('images/1')

net_set_1 = ['NET2030', 'NET2040', 'NET2050', 'NET2060', 'NET2070', 'NET3050', 'NET3060', 'NET3070', 'NET3080', 'NET3090']
dot2 = coursesVisualized(df[df['Course Number'].isin(net_set_1)])
dot3 = coursesVisualized(df[df['Course Number'].str.contains('NET') & ~df['Course Number'].isin(net_set_1)])
dot2.render('images/2')
dot3.render('images/3')


# use pillow to combine images
images = [Image.open('images/'+str(x)+'.png') for x in range(4)]
widths, heights = zip(*(i.size for i in images))
max_width = max(widths)
total_height = sum(heights)
#create a white background image
new_im = Image.new('RGB', (max_width, total_height), color = (255, 255, 255))
y_offset = 0
for im in images:
    new_im.paste(im, (0,y_offset))
    y_offset += im.size[1]
new_im.save('images/combined.png')

# delete temporary images
for x in range(4):
    os.remove('images/'+str(x))
    os.remove('images/'+str(x)+'.png')

## Create an image for each student

### This works

In [None]:
import pandas as pd
import graphviz
import os
from PIL import Image

df = pd.read_csv('all_courses.csv')

def coursesVisualized(filtered_df, completed_courses):
    dot = graphviz.Digraph(format='png') # or 'pdf' or 'svg'
    for row in filtered_df.itertuples():
        course_number = row.Course.split(':')[0]
        if course_number[3] == '3':
            shape = 'diamond'
        elif course_number[3] == '2':
            shape = 'ellipse'
        else:
            shape = 'box'
        fontcolor = 'black'
        color = 'black'
        #if course_number[4] == '9': # project courses
        #    color = 'red'
        if course_number in completed_courses:
            color = 'green'
            fontcolor = 'green'
        dot.node(course_number, shape=shape, color=color, fontcolor=fontcolor)
        try:
            if row.Prerequisites != '':
                for preq in row.Prerequisites.split(','):
                    dot.edge(preq, course_number)
        except:
            pass
    file_count = len([name for name in os.listdir('images/')])
    file_string = 'images/' + str(file_count) # + '.png'
    dot.render(file_string)
    os.remove(file_string)
    return(dot)

def make_image(student_id, completed_courses):
    coursesVisualized(df[df['Course Number'].str.contains('CSE')], completed_courses)
    coursesVisualized(df[df['Course Number'].str.contains('DES') | df['Course Number'].str.contains('ELT')].sort_values(by='Course Number'), completed_courses)

    net_set_1 = ['NET2030', 'NET2040', 'NET2050', 'NET2060', 'NET2070', 'NET3050', 'NET3060', 'NET3070', 'NET3080', 'NET3090']
    coursesVisualized(df[df['Course Number'].isin(net_set_1)], completed_courses)
    coursesVisualized(df[df['Course Number'].str.contains('NET') & ~df['Course Number'].isin(net_set_1)], completed_courses)

    number_of_images = len([name for name in os.listdir('images/')])
    #number_of_images = 4

    # use pillow to combine images
    images = [Image.open('images/'+str(x)+'.png') for x in range(number_of_images)]
    widths, heights = zip(*(i.size for i in images))
    max_width = max(widths)
    total_height = sum(heights)
    #create a white background image
    new_im = Image.new('RGB', (max_width, total_height), color = (255, 255, 255))
    y_offset = 0
    for im in images:
        new_im.paste(im, (0,y_offset))
        y_offset += im.size[1]
    new_im.save('student_images/'+str(student_id)+'.png')

    # delete temporary images
    for x in range(number_of_images):
        os.remove('images/'+str(x)+'.png')

students_df = pd.read_excel('../../../StudentCoursesAndMarks/studentRosterReport.xlsx')
student_id_list = students_df['Id'].tolist()

student_marks = pd.DataFrame()
for student_id in student_id_list:
    filename = '../../../StudentCoursesAndMarks/' + str(student_id)+' - CoursesAndMarks.csv'
    try:
        new_df = pd.read_csv(filename)
        new_df['StudentId'] = student_id
        student_marks = pd.concat([student_marks, new_df])
    except:
        print(filename, 'not found')

#for student_id in student_marks['StudentId'].unique():
#    completed_courses = student_marks[(student_marks['StudentId'] == student_id)][' Code'].unique().tolist()
#    make_image(student_id, completed_courses)

for student_name in students_df['Name'].unique():
    student_id = students_df[students_df['Name'] == student_name]['Id'].tolist()[0]
    completed_courses = student_marks[(student_marks['StudentId'] == student_id)][' Code'].unique().tolist()
    make_image(student_name, completed_courses)

print('done')

In [None]:
'''
#completed = ['CSE1010']
completed = []

import graphviz
dot = graphviz.Graph()

for row in elt.itertuples():
    course_number = row.Course
    if '950' not in course_number:
        if course_number in completed:
            color = 'red'
        else:
            color = 'black'
        if course_number[3] == '3':
            d = dot.node(course_number, shape='diamond',color=color)
            #d.attr('node', color='blue')
        else:
            d = dot.node(course_number, shape='box',color=color)
        
        if row.Prerequisites != '':
            for preq in row.Prerequisites.split(','):
                dot.edge(preq, course_number)

#dot.attr('graph', ratio='compress')
#dot.attr('graph', ratio='0.5')
#dot.attr('graph', size='80,80')
#dot.attr('graph', ratio='1.5')
#dot.attr('graph', layout='neato', defaultdist='2.0', overlap='false', splines='true', sep='+2.0', nodesep='0.5', ranksep='0.5', concentrate='true', size='8,8', ratio='0.5', orientation='landscape')
dot.attr('graph', layout='neato', pack='false', defaultdist='800')
#dot.attr('graph', orientation='landscape')
dot
'''

In [None]:
# search through all .md files in the current directory for the string 'goals' and print the filename and line number
'''
import os
for root, dirs, files in os.walk(".."):
    for filename in files:
        if filename.endswith(".md"):
            with open(os.path.join(root, filename), encoding='latin1') as f:
                for i, line in enumerate(f):
                    if 'goals' in line:
                        print(os.path.join(root, filename), i, line)
'''