In [453]:
import pandas as pd
import math
import io
from datetime import datetime
from alive_progress import alive_bar


### Read Data

In [454]:
### Note: if using Google Colab, this must be set to the filename of the 
# file you will load. This is a quirk of Colab. 
filename = "2020-2022 Training Records.xlsx"

try:
  import google.colab
  from google.colab import files
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
  print("Colab")
  uploaded = files.upload()
  source = pd.read_excel(io.BytesIO(uploaded.get(filename)))
else:
  print("Not Colab")
  source = pd.read_excel(filename)

Not Colab


In [455]:
FILTER = "RANDOM"
SIZE = 20

if FILTER == "RANDOM":
    source = source.sample(SIZE)
elif FILTER == "HEAD":
    source = source.head(SIZE)
elif FILTER == "TAIL":
    source = source.tail(SIZE)


In [456]:
source.head(15)

Unnamed: 0,Employee Number,Employee Name,Waived Notes,Employee email,Job Title,Department Code,Department Name,MANAGER,Activity Type,OFFERING,Activity Name,COURSE_VERSION,Score,Attendance Status,Pass/Fail,Completion Status,Completion Date,Expiration Date,Note
145753,61803,"VISNESKIE, Kevin",,kevin.visneskie@cnl.ca,Millwright,8270.0,CRL DECOMM PROGRAM EXECUTION,"PYOLI, Jason",ILT Course,RP-Misc-39,Air Supplied Suit Operation,,,Attended,Pass,Completed,2021-03-04,NaT,
72209,90339,"ADIBE, Dani",,danielle.adibe@cnl.ca,Radiation Surveyor,8182.0,RP OPERATIONS SUPPORT,"EBSARY, Jamie",ILT Class,OSH-1007,Virtual Offering of Asbestos Module 6E,,,Attended,Pass,Completed,2021-01-12,NaT,
129982,54477,"KUEHL, Chental",,Chental.Kuehl@cnl.ca,Office Assistant III,8335.0,FIRE PROTECTION PROGRAM,"BILTON, Adrian",Versional,EMP-1037-Online,Emergency Procedure Refresher,,100.0,Attended,Pass,Completed,2022-01-13,2023-01-14,
127016,51772,"DEMERS, Nancy",,Nancy.Demers@cnl.ca,Senior Contracts Officer,467.0,CORPORATE MATERIALS & SERVICES,"SIMPSON, Candice",ILT Class,IT-1024,SharePoint Online - General User Training (Ins...,,,Attended,Pass,Completed,2021-01-20,NaT,
5098,20945,"STOLBERG, Lorne",,Lorne.Stolberg@cnl.ca,Chemical Scientist,3001.0,HYDROGEN TECHNOLOGIES,"RYLAND, Donald",Versional,OSH-9045-Online,Hand Safety,,82.0,Attended,Fail,Completed,2022-03-29,NaT,
70715,90453,"CONROY, Ryan",,ryan.conroy@cnl.ca,Project Control Coordinator,8462.0,CAPITAL PROJECT PROGRAM,"HADLEY, Heather",Versional,EMP-1037-Online,Emergency Procedure Refresher,,100.0,Attended,Pass,Completed,2022-04-05,2023-04-06,
110184,92483,"SATTAR, Abdul",,abdul.sattar@cnl.ca,Project Control Coordinator,8632.0,INTEGRATED SERVICES,"INNES, Kellie",Versional,NMMS-1011-Online,Nuclear Criticality Safety Program Awareness,,62.5,Attended,Fail,Completed,2021-11-11,NaT,
146177,61936,"DROUIN, Marc",,marc.drouin@cnl.ca,Security Officer - NRF,8241.0,CRL PHYSICAL SECURITY OPERATNS,"FROMENT, Nathan",ILT Course,SECU-1058,Nuclear Response Force (NRF) Firearms Theory,,,Attended,Pass,Completed,2022-02-28,NaT,
10490,52695,"SERVAIS, Shawn",,Shawn.Servais@cnl.ca,Mechanical System Specialist,6902.0,ENERGY & INFRASTRUCTURE MANAGE,"RANGER, Scott",Course,SECU-2008-Online (REV 1.0),Security Awareness Refresher (1.0),1.0,,Attended,,Completed,2021-04-22,NaT,
35969,57923,"SURNOSKIE, Dennis",,Dennis.Surnoskie@cnl.ca,Preventative Maintenance Secti,8589.0,OPERATIONS TECHNICAL SERVICES,"MELDRUM, David",Versional,OSH-9045-Online,Hand Safety,,100.0,Attended,Pass,Completed,2021-08-09,NaT,


In [457]:
source[["Completion Status", "Attendance Status", "Pass/Fail"]].groupby(["Pass/Fail","Completion Status", "Attendance Status"], dropna=False, as_index=False).size()

Unnamed: 0,Pass/Fail,Completion Status,Attendance Status,size
0,Fail,Completed,Attended,3
1,Pass,Completed,Attended,12
2,,Completed,Attended,3
3,,,Cancelled,2


### Read SABA Data

#### Course Data

In [458]:
courses = pd.read_csv("saba_courses.csv")

#subset = courses[["Course ID", "Version"]]
#tuples = [tuple(x) for x in subset.to_numpy()]

#course_id = set(tuples)
course_id = courses[["Course ID", "Version"]]
ids_only = pd.Series(courses["Course ID"].unique())
#course_id = set((courses["Course ID"], courses["Version"]))
#courses.head()
len(course_id)
#course_id

1457

In [459]:
("IT-9050", 1.0) in course_id

False

#### Employee Data

In [460]:
people = pd.read_csv("saba_people.csv")
people_un = set(people["Username"].astype("int64", errors="ignore").unique())
#people.head()
len(people_un)

3869

In [461]:
"91003" in people_un

True

### Process Data

In [462]:
def procStatus(pass_fail, comp_status, att_status):
    status = -1
    ## Fail
    if pass_fail == "Fail":
        status = 400
    elif (pass_fail == "Pass") & (comp_status == "Not Completed"):
        status = 400
    #elif (math.isnan(pass_fail)) & (math.isnan(comp_status)) & ((att_status == "No-show") | (att_status == "Replaced")):
    #    status = 400
    ## Pass
    elif (pass_fail == "Pass") & (comp_status != "Not Completed"):
        status = 200
    #elif (math.isnan(pass_fail)) & (comp_status == "Completed"):
    #    status = 200
    #elif (math.isnan(pass_fail)) & (math.isnan(comp_status)) & (att_status == "Attended"):
    #    status = 200
    return status

def checkPerson(unList, person):
    is_in = False
    if str(person) in unList:
        is_in = True
    return is_in

def checkCourse(courseList, courseID, version):
    is_in = False
    replace_version = version
    for _, row in courseList.iterrows():
        #print(courseID, version, row["Course ID"], row["Version"])
        if (courseID == row["Course ID"]):
            print("Course Match", courseID, version, row["Course ID"], row["Version"])
            replace_version = row["Version"]
            is_in = True
            if (version == row["Version"]):
                print("Version Match")
                is_in = True
                #break
    return is_in, replace_version

In [463]:
# Helpers

class TranscriptRow:
    def __init__(self, username, courseID, courseVersion, compDate, status, courseTitle, inCourse, inPerson):
        self.username = username
        self.courseID = courseID
        self.courseVersion = courseVersion
        self.compDate = compDate
        #self.status = self.makeStatus(status)
        self.status = status
        self.courseTitle = courseTitle
        self.inCourse = inCourse
        self.inPerson = inPerson
    
    def makeStatus(self, status):
        newStat = -1
        if status == "Completed":
            newStat = 200
        return newStat

In [464]:
USERNAME = "Employee Number"
COURSEID = "OFFERING"
COURSEVERSION = "COURSE_VERSION"
COMPDATE = "Completion Date"
STATUS = "Completion Status"
COURSETITLE = "Activity Name"
PASSFAIL = "Pass/Fail"
ATTSTATUS = "Attendance Status"

In [465]:
## Read transcripts in

transcripts = []
for index, trans in source.iterrows():
    username = trans[USERNAME]
    courseID = trans[COURSEID]
    course_ver = trans[COURSEVERSION]
    comp_date = trans[COMPDATE].strftime('%Y-%m-%d')
    status = procStatus(trans[PASSFAIL], trans[STATUS], trans[ATTSTATUS])
    course_title = trans[COURSETITLE]
    in_course, rep_ver = checkCourse(course_id, trans[COURSEID], trans[COURSEVERSION])
    in_pers = checkPerson(people_un, trans[USERNAME])

    #Update Version
    course_ver = rep_ver
    
    trans_entry = TranscriptRow(username, courseID, course_ver, comp_date, status, course_title, in_course, in_pers)
    transcripts.append(trans_entry)
    #print(courseID, type(courseID), courseID in ids_only.values)

print(len(transcripts))

Course Match OSH-1007 nan OSH-1007 1.0
Course Match IT-1024 nan IT-1024 1.0
Course Match NFO-508000-M-02 nan NFO-508000-M-02 1.0
Course Match WL-306 nan WL-306 1.0
Course Match TD-2000-WL nan TD-2000-WL 1.0
Course Match PMO-9013 nan PMO-9013 1.0
Course Match RP-G3-FCO nan RP-G3-FCO 1.0
Course Match WL-120 nan WL-120 1.0
20


### Print to Output

In [466]:
def write_row(df_output, trans, test=False, subset="ALL"):
    #df = df_output
    
    #inCourse = trans.inCourse
    #inPerson = trans.inPerson
    #if subset == "BOTH":
        #if ()

    tmp_dict = {
        "LEARNER": trans.username,
        "COURSE": trans.courseID,
        "COURSE_VERSION": trans.courseVersion,
        "COMPLETION_DATE": trans.compDate,
        "COMPLETION_STATUS": trans.status,
        "CREATE_ADHOC_COURSE": "FALSE",
        "COURSE_TITLE": trans.courseTitle
    }
    if test == True:
        tmp_dict = {
            "LEARNER": trans.username,
            "COURSE": trans.courseID,
            "COURSE_VERSION": trans.courseVersion,
            "COMPLETION_DATE": trans.compDate,
            "COMPLETION_STATUS": trans.status,
            "CREATE_ADHOC_COURSE": "FALSE",
            "COURSE_TITLE": trans.courseTitle,
            "IN_COURSE": trans.inCourse,
            "IN_PERSON": trans.inPerson
        }
    #df = df.append(tmp_dict, ignore_index=True)
    df = df_output.append(tmp_dict, ignore_index=True)
    return df
    

In [467]:
# Output Values
out_header = [
    "LEARNER",
    "COURSE",
    "COURSE_VERSION",
    "COMPLETION_DATE",
    "COMPLETION_STATUS",
    "CREATE_ADHOC_COURSE",
    "COURSE_TITLE"
]

In [468]:
out = pd.DataFrame(columns = out_header)
n = 0
with alive_bar(len(transcripts), force_tty=True) as bar:
    for ts in transcripts:
        out = write_row(out, ts, False)
        #n += 1
        bar()
out["ID"] = out.index
out.head(10).T

|████████████████████████████████████████| 20/20 [100%] in 0.1s (172.46/s)                                              


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
LEARNER,61803,90339,54477,51772,20945,90453,92483,61936,52695,57923
COURSE,RP-Misc-39,OSH-1007,EMP-1037-Online,IT-1024,OSH-9045-Online,EMP-1037-Online,NMMS-1011-Online,SECU-1058,SECU-2008-Online (REV 1.0),OSH-9045-Online
COURSE_VERSION,,1.0,,1.0,,,,,1.0,
COMPLETION_DATE,2021-03-04,2021-01-12,2022-01-13,2021-01-20,2022-03-29,2022-04-05,2021-11-11,2022-02-28,2021-04-22,2021-08-09
COMPLETION_STATUS,200,200,200,200,400,200,400,200,-1,200
CREATE_ADHOC_COURSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
COURSE_TITLE,Air Supplied Suit Operation,Virtual Offering of Asbestos Module 6E,Emergency Procedure Refresher,SharePoint Online - General User Training (Ins...,Hand Safety,Emergency Procedure Refresher,Nuclear Criticality Safety Program Awareness,Nuclear Response Force (NRF) Firearms Theory,Security Awareness Refresher (1.0),Hand Safety
ID,0,1,2,3,4,5,6,7,8,9


In [469]:
out.to_csv("out_transcript.csv", index=False)

### Write Failiures