In [513]:
import pandas as pd
import math

### State Variables and Defaults

In [514]:
#Input Column Names
CERT_NUMBER = "Cert_number"
CERT_NAME = "Cert_name"
PATH = "Path"
PATH_RECERT = "Path_recert"
COURSE_CODE = "Course_code"
COURSE_TITLE = "Course_title"
COURSE_VERSION = "Course_version"
TARGET_DAYS = "Target_days"
EXPIRES_IN = "Expires_in"
CERT_DESC = "Cert_desc"
RECERT_DAYS = "Recert_days"
PAST_CREDIT = "Past_credit"

#Default Values
Version = "V0"
Domain = "CNL Global"
Avail_from = "1900-01-01"
Status = "in effect"
Display_learn = True
Expiry_type = 1
Eenforce_target = False
One_click = True
Mod_reg = True
Past_cred_pol = 1
Mod_name = "Main"
Mod_req = True


In [506]:
# Output Values
out_header = [
    "ID",
    "NAME",
    "VERSION",
    "DESCRIPTION",
    "DOMAIN",
    "AVAILABLE_FROM",
    "STATUS",
    "DISPLAY_TO_LEARNER",
    "TARGET_DAYS",
    "EXPIRY_TYPE",
    "EXPIRE_IN_DAYS",
    "NOTIFY_BEFORE_DAYS",
    "ENFORCE_TARGET_DATE",
    "ONE_CLICK_REG",
    "PATH_NAME",
    "MODULE_NAME",
    "NO_OF_MODULE_ELEMENTS",
    "MODULE_REQUIRED",
    "COURSE_ELEMENT",
    "COURSE_VERSION",
    "ELEMENT_SEQUENCE"
]

### Helper Classes

In [507]:
class Certification:
    def __init__(self, cert_name, target_days, expires_days, description, recert, past_credit):
        self.cert_name = cert_name
        self.target_days = target_days
        self.expires_days = expires_days
        self.description = description
        self.recert_days = recert
        self.past_credit = past_credit
        self.paths = []
    def add_path(self, path):
        self.paths.append(path)
    def print_info(self):
        print("Cert Name:", self.cert_name)
        print("Paths: ", len(self.paths))
        print("Target: ", self.target_days)
        print("Expires: ", self.expires_days)
        print("Past Credit: ", self.past_credit)

class Path:
    def __init__(self, path_name, is_recert):
        self.path_name = path_name
        self.is_recert = is_recert
        self.modules = []
    def add_module(self, module):
        self.modules.append(module)

class Module:
    def __init__(self):
        self.courses = []
    def add_course(self, course):
        self.courses.append(course)
    def print_info(self):
        print(self.courses)

class Course:
    def __init__(self, code, title, version):
        self.code = code
        self.title = title
        self.version = version

def MergeDict(dict1, dict2):
    return(dict1 | dict2)

### Load Input File

In [508]:
source = pd.read_excel("Input.xlsx")
source[PATH_RECERT] = source[PATH_RECERT].astype("bool")
source.head()

Unnamed: 0,Cert_number,Cert_name,Target_days,Expires_in,Cert_desc,Recert_days,Past_credit,Unnamed: 7,Path,Path_recert,Unnamed: 10,Course_code,Course_title,Course_version
0,1,zzz Aerial Work Platform Practical - Articulat...,42,1095,This certification qualifies workers to operat...,90,365.0,,Main,False,,OSH-1003-Online,Aerial Work Platform Theory (R3),1.6
1,1,zzz Aerial Work Platform Practical - Articulat...,42,1095,This certification qualifies workers to operat...,90,365.0,,Main,False,,OSH-3003-A,Aerial Work Platform Practical - Articulating ...,1.0
2,1,zzz Aerial Work Platform Practical - Articulat...,42,1095,This certification qualifies workers to operat...,90,365.0,,Recertification,True,,OSH-1003-Online,Aerial Work Platform Theory (R3),1.6
3,1,zzz Aerial Work Platform Practical - Articulat...,42,1095,This certification qualifies workers to operat...,90,365.0,,Recertification,True,,OSH-3003-A,Aerial Work Platform Practical - Articulating ...,1.0
4,2,zzz Aerial Work Platform Practical - Articulat...,42,1095,This certification qualifies workers to operat...,90,,,Main,False,,OSH-1003-Online,Aerial Work Platform Theory (R3),1.6


### Set Variables

In [509]:
## Read Certs into separate df
#df_list = [d for _, d in source.groupby([CERT_NUMBER])]
df_list = [d for _, d in source.groupby([CERT_NAME])]

certs = []
## For each cert, generate paths
for cert in df_list:
    cert_name = cert[CERT_NAME].iloc[0]
    cert_target = cert[TARGET_DAYS].iloc[0]
    cert_exp = cert[EXPIRES_IN].iloc[0]
    cert_des = cert[CERT_DESC].iloc[0]
    cert_recert = cert[RECERT_DAYS].iloc[0]
    cert_pastCred = cert[PAST_CREDIT].iloc[0]
    if math.isnan(cert_pastCred):
        cert_pastCred = cert_exp
    #print(cert_name, cert_target, cert_exp, cert_des)
    curr_cert = Certification(cert_name, cert_target, cert_exp, cert_des, cert_recert, cert_pastCred)

    ## Read paths for the cert
    paths = []
    path_list = [d for _, d in cert.groupby([PATH])]
    ## For each path, generate list of courses and module
    for path in path_list:
        path_name = path[PATH].iloc[0]
        path_recert = path[PATH_RECERT].iloc[0]
        if path_recert == 0:
            path_recert = False
        curr_path = Path(path_name, path_recert)

        mod = Module()
        for _, row in path.iterrows():
            course_code = row[COURSE_CODE]
            course_title = row[COURSE_TITLE]
            course_version = row[COURSE_VERSION]
            #print(course_code, course_title, course_version)
            tmp_course = Course(course_code, course_title, course_version)
            mod.add_course(tmp_course)
            #mod.print_info()
        curr_path.add_module(mod)
        curr_cert.add_path(curr_path)
    certs.append(curr_cert)

for cert in certs:
    cert.print_info()


Cert Name: zzz Aerial Work Platform Practical - Articulating Boom 135 ft
Paths:  2
Target:  42
Expires:  1095
Past Credit:  365.0
Cert Name: zzz Aerial Work Platform Practical - Articulating Boom 60 ft or Less
Paths:  2
Target:  42
Expires:  1095
Past Credit:  1095


### Write Output

In [510]:
def write_cert(df_output, cert):
    df = df_output
    
    c_name = cert.cert_name
    c_version = Version
    c_desc = cert.description
    c_domain = Domain
    c_avail = Avail_from
    c_status = Status
    c_dispLearn = Display_learn
    c_target = cert.target_days
    c_pastCred = cert.past_credit
    c_pcPolicy = Past_cred_pol
    c_expires = cert.expires_days
    c_recertDays = cert.recert_days

    tmp_dict = {
        "NAME": c_name,
        "VERSION": c_version,
        "DESCRIPTION": c_desc,
        "DOMAIN": c_domain,
        "AVAILABLE_FROM": c_avail,
        "STATUS": c_status,
        "DISPLAY_TO_LEARNER": c_dispLearn,
        "TARGET_DAYS": c_target,
        "PAST_CREDIT_DAYS": c_pastCred,
        "PAST_CREDIT_POLICY": c_pcPolicy,
        "EXPIRY_TYPE": Expiry_type,
        "EXPIRE_IN_DAYS": c_expires,
        "RECERTIFY_IN_DAYS": c_recertDays
    }
    for path in cert.paths:
        p_name = path.path_name
        p_recert = bool(path.is_recert)
        path_dict = {
            "PATH_NAME": p_name,
            "IS_RECERT_PATH": p_recert
        }
        w_path = MergeDict(tmp_dict, path_dict)
        #print("Path:", path_dict)
        #print(path.modules)
        for mod in path.modules:
            m_name = Mod_name
            m_numEl = len(mod.courses)

            mod_dict = {
                "MODULE_NAME": m_name,
                "NO_OF_MODULE_ELEMENTS": m_numEl,
                "MODULE_REQUIRED": Mod_req
            }
            w_mod = MergeDict(w_path, mod_dict)
            #print("Mod: ", w_mod)
            for course in mod.courses:
                co_title = course.title
                co_id = course.code
                co_version = course.version

                course_dict = {
                    "COURSE_ELEMENT": co_id,
                    "COURSE_VERSION": co_version
                }
                w_course = MergeDict(w_mod, course_dict)
                #print("Course: ", w_course)
                df = df.append(w_course, ignore_index=True)
    return df

In [511]:
out = pd.DataFrame(columns=out_header)
for cert in certs:
    out = write_cert(out, cert)
out["ID"] = out.index
out.head(10).T

Unnamed: 0,0,1,2,3,4,5,6,7
ID,0,1,2,3,4,5,6,7
NAME,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...,zzz Aerial Work Platform Practical - Articulat...
VERSION,V0,V0,V0,V0,V0,V0,V0,V0
DESCRIPTION,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...,This certification qualifies workers to operat...
DOMAIN,CNL Global,CNL Global,CNL Global,CNL Global,CNL Global,CNL Global,CNL Global,CNL Global
AVAILABLE_FROM,1900-01-01,1900-01-01,1900-01-01,1900-01-01,1900-01-01,1900-01-01,1900-01-01,1900-01-01
STATUS,in effect,in effect,in effect,in effect,in effect,in effect,in effect,in effect
DISPLAY_TO_LEARNER,True,True,True,True,True,True,True,True
TARGET_DAYS,42,42,42,42,42,42,42,42
EXPIRY_TYPE,1,1,1,1,1,1,1,1


### Write to CSV

In [512]:
out.to_csv("output.csv", index=False)