# Detailed Documentation for the Utils script

# Import the Dependencies
>```json```: Required for interacting with APIs

>```re```: Necessary for using regular expressions

In [64]:
import json
import re

# Function Descriptions

## SplitAlpha
    Separates course abbreviations and codes into separate lists

In [65]:
def splitAlpha(string: str):
    """
    Separates course abbreviations and codes into separate lists

    :param string: The input string containing a combination of course abbreviations and codes
    :type string: str
    :return: A dictionary containing two lists: 'courses' for course abbreviations and 'codes' for numeric codes
    :rtype: dict

    """
    courses = []
    codes = []
    alpha = ""
    integer = ""

    for item in string:
        if item.isalpha():
            alpha += item
        else:
            if alpha:
                courses.append(alpha)
                alpha = ""

            if not (len(integer) >= 3):
                integer += item

            else:
                codes.append(integer)
                integer = item

    if integer and integer not in codes:
        codes.append(integer)

    return {"course": courses, "code": codes}

## Usage

In [66]:
inputString = "CYB267270CSCI201"
print(splitAlpha(inputString))

{'course': ['CYB', 'CSCI'], 'code': ['267', '270', '201']}


## extractCourseCode
    Utilizes regular expressions to process a string and extract course codes that adhere to the format used at St. Cloud State University

In [67]:
def extractCourseCode(text: str):
    """
    This function utilizes regular expressions to process a string and extract course codes 
    that adhere to the format used at St. Cloud State University

    :param text: The string to be parsed
    :type text: str
    :return: A list of unique course codes found in the text
    :rtype: list
    """
    pattern = re.compile(r"\b[A-Za-z]{2,4}\d{3}\b")
    matches = pattern.findall(text)

    comma_pattern = re.compile(
        r"\b([A-Za-z]{2,4}(?:\s|\d{3}(?:,|\s))\d{3}(?:,\d{3})*)\b"
    )
    matches += comma_pattern.findall(text)

    space_pattern = re.compile(r"\b([A-Za-z]{2,4}\d{3}(?:\s[A-Za-z]{3,4}\d{3})*)\b")
    matches += space_pattern.findall(text)

    parsed = {}

    for item in matches:
        item = item.split()
        item = ",".join(item).split(",")
        item = "".join(item)

        vals = splitAlpha(item)

        courses = vals["course"]
        codes = vals["code"]

        cur = courses[0]

        while courses or codes:
            if courses:
                cur = courses.pop(0)

            if cur in parsed:
                parsed[cur].append(codes.pop(0))
            else:
                parsed[cur] = [codes.pop(0)]

    courselist = set()

    for course, codes in parsed.items():
        for code in codes:
            courselist.add(course + code)

    courselist = list(sorted(courselist))

    return courselist

## Usage

In [68]:
inputString = """GOAL 3: Natural and Physical Sciences
                     (2 courses or experiences)
     -R    Lab
           SELECT FROM:        ASTR106,120,205 BIOL101,102,103,104,107,152
                               CHEM151,160,207,210,211,307 AHS 104,105,
                               AHS 109,220,230,260 GEOG272 PHYS103,231,
                               PHYS232,234,235"""

print(extractCourseCode(inputString))

['AHS104', 'AHS105', 'AHS109', 'AHS220', 'AHS230', 'AHS260', 'ASTR106', 'ASTR120', 'ASTR205', 'BIOL101', 'BIOL102', 'BIOL103', 'BIOL104', 'BIOL107', 'BIOL152', 'CHEM151', 'CHEM160', 'CHEM207', 'CHEM210', 'CHEM211', 'CHEM307', 'GEOG272', 'PHYS103', 'PHYS231', 'PHYS232', 'PHYS234', 'PHYS235']


## extractGoalAreas
    Extracts goal areas and their associated courses from a user's Degree Audit using regular expressions

In [69]:
def extractGoalAreas(text: list):
    """
    Extracts goal areas and their associated courses from a user's Degree Audit using regular expressions

    :param text: The Degree Audit content read from a text file
    :type text: list
    :return: A dictionary containing goal areas, their optimal courses, and progress information
    :rtype: dict
    """
    pattern_1 = r"GOAL\s*(?P<goal_number>\d+):"
    pattern_2 = r"\(\d+ courses? or experience[s]?\)"

    current = -1 
    sortGoalAreas = {"NO": [], "YES": [], "IP": []}  
    goalCourses = {} 

    for line in text:
        if re.search(pattern_1, line):
            content = line.split()
            current = int(content[2][:-1])
            description = " ".join(content[3:])
            goalCourses[current] = ""

        elif re.search(pattern_2, line):
            description += f" {line.strip()}"

            if content[0] in sortGoalAreas:
                sortGoalAreas[content[0]].append({current: description})
            else:
                sortGoalAreas[content[0]] = [{current: description}]

        elif "UNIVERSITY REQUIREMENTS-DIVERSITY" in line:
            break

        else:
            if current in goalCourses:
                goalCourses[current] += line

    for goals in goalCourses:
        goalCourses[goals] = extractCourseCode(goalCourses[goals])

    return {"Goal-Courses": goalCourses, "Progress-Goal": sortGoalAreas}


## Usage

In [70]:
inputString = """NO           GOAL 3: Natural and Physical Sciences
             (2 courses or experiences)
     -R    Lab
           SELECT FROM:        ASTR106,120,205 BIOL101,102,103,104,107,152
                               CHEM151,160,207,210,211,307 AHS 104,105,
                               AHS 109,220,230,260 GEOG272 PHYS103,231,
                               PHYS232,234,235"""

# convert 'inputString' to a list
inputString = inputString.split("\n")
print(extractGoalAreas(inputString))

{'Goal-Courses': {3: ['AHS104', 'AHS105', 'AHS109', 'AHS220', 'AHS230', 'AHS260', 'ASTR106', 'ASTR120', 'ASTR205', 'BIOL101', 'BIOL102', 'BIOL103', 'BIOL104', 'BIOL107', 'BIOL152', 'CHEM151', 'CHEM160', 'CHEM207', 'CHEM210', 'CHEM211', 'CHEM307', 'GEOG272', 'PHYS103', 'PHYS231', 'PHYS232', 'PHYS234', 'PHYS235']}, 'Progress-Goal': {'NO': [{3: 'Natural and Physical Sciences (2 courses or experiences)'}], 'YES': [], 'IP': []}}


## extractCourseGoals
    Creates a dictionary mapping each course code to a list of goal areas they fulfill

In [71]:
def extractCourseGoals(goalCourses: dict, coursesTaken: list):
    """
    Creates a dictionary mapping each course code to a list of goal areas they fulfill

    :param goalCourses: A dictionary where keys are goal areas, and values are lists of associated course codes
    :type goalCourses: dict
    :param coursesTaken: A list of courses that the user has already taken
    :type coursesTaken: list
    :return: A dictionary where keys are course codes, and values are lists of goal areas they fulfill
    :rtype: dict
    """
    courseGoals = {}

    for goal in goalCourses:
        for course in goalCourses[goal]:
            if course in coursesTaken:
                pass
            elif course in courseGoals:
                courseGoals[course].append(goal)
                courseGoals[course] = courseGoals[course]
            else:
                courseGoals[course] = [goal]

    return courseGoals

## Usage

In [72]:
goal_course = {
    3: [
        "AHS104",
        "AHS105",
        "AHS109",
        "AHS220",
        "AHS230",
        "AHS260",
        "ASTR106",
        "ASTR120",
        "ASTR205",
        "BIOL101",
        "BIOL102",
        "BIOL103",
        "BIOL104",
        "BIOL107",
        "BIOL152",
        "CHEM151",
        "CHEM160",
        "CHEM207",
        "CHEM210",
        "CHEM211",
        "CHEM307",
        "GEOG272",
        "PHYS103",
        "PHYS231",
        "PHYS232",
        "PHYS234",
        "PHYS235",
    ]
}

coursesUserhasTaken = [
    "AHS104",
    "AHS105",
    "CSCI201",
    "CYB267",
    "CYB268",
    "CYB270",
    "EAP150",
    "EAP151",
    "EAP202",
    "MATH271",
    "STAT239",
]

print(extractCourseGoals(goalCourses=goal_course, coursesTaken=coursesUserhasTaken))

{'AHS109': [3], 'AHS220': [3], 'AHS230': [3], 'AHS260': [3], 'ASTR106': [3], 'ASTR120': [3], 'ASTR205': [3], 'BIOL101': [3], 'BIOL102': [3], 'BIOL103': [3], 'BIOL104': [3], 'BIOL107': [3], 'BIOL152': [3], 'CHEM151': [3], 'CHEM160': [3], 'CHEM207': [3], 'CHEM210': [3], 'CHEM211': [3], 'CHEM307': [3], 'GEOG272': [3], 'PHYS103': [3], 'PHYS231': [3], 'PHYS232': [3], 'PHYS234': [3], 'PHYS235': [3]}


## findOptimalCourse
    Creates a dictionary of optimal courses based on unfulfilled goal areas in descending order

In [73]:
def findOptimalCourse(courses: dict, goalLeft: list, userNotCompleted: dict): 
    """
    Creates a dictionary of optimal courses based on unfulfilled goal areas in descending order

    :param courses: A dictionary containing course codes as keys and lists of associated goal areas as values
    :type courses: dict
    :param goalLeft: A list of goal areas that the user needs to fulfill
    :type goalLeft: list
    :param userNotCompleted: A dictionary with goal areas as keys and their descriptions as values.
    :type userNotCompleted: dict
    :return: A JSON representation of optimal courses for unfulfilled goal areas
    :rtype: str
    """
    hashMap = {}
    res = []

    for course in courses:
        points = 0
        goalFulfill = courses[course]

        for goals in goalLeft:
            if goals in goalFulfill:
                points += 1

        hashMap[course] = points

    hashMap = dict(sorted(hashMap.items(), key=lambda x: x[1], reverse=True))

    for key, val in hashMap.items():
        if val > 0:
            res.append(key)

    for i,key in enumerate(res):
        if key in courses:
            res[i] = {key: courses[key]}

    courseGoal = {}

    for key in res:
        for val in key.values():
            for item in val:
                if item in courseGoal:
                    courseGoal[item].append(key)

                else:
                    courseGoal[item] = [key]

    newDict = {}

    for key in goalLeft:
        newDict[f"{str(key)} {userNotCompleted[key]}"] = courseGoal[key]

    return json.dumps(newDict)

## Usage

In [74]:
goalAreasUserNeeds = [3]
course_goalList = x = {
        "AHS109": [3],
        "AHS220": [3],
        "AHS230": [3],
        "AHS260": [3],
        "ASTR106": [3],
        "ASTR120": [3],
        "ASTR205": [3],
        "BIOL101": [3],
        "BIOL102": [3],
        "BIOL103": [3],
        "BIOL104": [3],
        "BIOL107": [3],
        "BIOL152": [3],
        "CHEM151": [3],
        "CHEM160": [3],
        "CHEM207": [3],
        "CHEM210": [3],
        "CHEM211": [3],
        "CHEM307": [3],
        "GEOG272": [3],
        "PHYS103": [3],
        "PHYS231": [3],
        "PHYS232": [3],
        "PHYS234": [3],
        "PHYS235": [3],
    }

goalInfo = {3: "Natural and Physical Sciences (2 courses or experiences)"}

print(findOptimalCourse(courses=course_goalList,goalLeft=goalAreasUserNeeds,userNotCompleted=goalInfo))

{"3 Natural and Physical Sciences (2 courses or experiences)": [{"AHS109": [3]}, {"AHS220": [3]}, {"AHS230": [3]}, {"AHS260": [3]}, {"ASTR106": [3]}, {"ASTR120": [3]}, {"ASTR205": [3]}, {"BIOL101": [3]}, {"BIOL102": [3]}, {"BIOL103": [3]}, {"BIOL104": [3]}, {"BIOL107": [3]}, {"BIOL152": [3]}, {"CHEM151": [3]}, {"CHEM160": [3]}, {"CHEM207": [3]}, {"CHEM210": [3]}, {"CHEM211": [3]}, {"CHEM307": [3]}, {"GEOG272": [3]}, {"PHYS103": [3]}, {"PHYS231": [3]}, {"PHYS232": [3]}, {"PHYS234": [3]}, {"PHYS235": [3]}]}


## pipeline
    Takes in a Degree Audit and transcript, and returns a JSON representation of optimal courses for unfulfilled goal areas

In [75]:
def pipeline(audit: list, transcript: list):
    """
    Takes in a Degree Audit and transcript, and returns a JSON representation of optimal courses for unfulfilled goal areas

    :param audit: The Degree Audit content read from a text file
    :type audit: list
    :param transcript: The transcript content read from a text file
    :type transcript: list
    :return: A JSON representation of optimal courses for unfulfilled goal areas
    :rtype: str
    """
    auditData = extractGoalAreas(audit)
    transcriptData = extractCourseCode(str(transcript))

    courseContents = auditData["Goal-Courses"]

    userNotCompleted = auditData["Progress-Goal"]["NO"]

    goalsLeft = []

    for data in userNotCompleted:
        for key in data:
            goalsLeft.append(key)

    newDict = {}

    for data in userNotCompleted:
        for key, val in data.items():
            newDict[key] = val

    courseGoals = extractCourseGoals(courseContents, transcriptData)

    return findOptimalCourse(courseGoals, goalsLeft, newDict)

In [76]:
transcript = """
                 ***** St. Cloud State University *****                            |
                                                                                   |
Spring 2023                                                                        |
Major: Cybersecurity                                                               |
  Freshman                                                                         |
        MATH 271 Discrete Mathematics         3.00 Z           0.00     0.00 0.00 |
        CYB 267 Intro to Programming          3.00 Z           0.00     0.00 0.00 |
        CYB 268 Intro to Programming Lab      1.00 Z           0.00     0.00 0.00 |
        EAP 151 Admin Orie Intl Student       1.00 S           1.00     0.00 0.00 |
        EAP 150 Cultural Ori Intl Student 2.00 Z               0.00     0.00 0.00 |
        EAP 202 Reading and Writing II        4.00 Z           0.00     0.00 0.00 |
 UNDG Term Att:   14.00 Earn:   1.00 GPA Crs:    0.00 GPA Pts:      0.00 GPA: 0.00 |
 **** Cum Att:    14.00 Earn:   1.00 GPA Crs:    0.00 GPA Pts:      0.00 GPA: 0.00 |
        """


audit = """
        *****************************************************************
Goal 3: Natural & Physical Sciences
Minimum of two courses or experiences, with no more than one
from any rubric or academic area. Include one laboratory course.
*****************************************************************
NO           GOAL 3: Natural and Physical Sciences
             (2 courses or experiences)
     -R    Lab
           SELECT FROM:        ASTR106,120,205 BIOL101,102,103,104,107,152
                               CHEM151,160,207,210,211,307 AHS 104,105,
                               AHS 109,220,230,260 GEOG272 PHYS103,231,
                               PHYS232,234,235
     -     Anthropology
           SELECT FROM:        ANTH140
     +     Astronomy
           Term Course         Credits Grade    Title
           F 23 ASTR107            3.0 Z     IP Concepts Stars/Universe
     -     Biology
           SELECT FROM:        BIOL101,102,103,104,106,107,152
     -     Chemistry
           SELECT FROM:        CHEM101,105,151,160,207,210,211,307
     -     Atmospheric and Hydrologic Sciences
           SELECT FROM:        AHS 104,105,106,109,220,230,260
     -     Health
           SELECT FROM:        HLTH210
     -     Physics
           SELECT FROM:        PHYS101,103,208,231,232,234,235
     -     Geography
           SELECT FROM:        GEOG272
IP        GOAL 4: Mathematical/Logical Reasoning
          (1 course or experience)
     +     Mathematical/Logical Reasoning
           Term Course         Credits Grade    Title
           F 23 STAT239            3.0 Z     IP Stat Method I for Nat Sci

*****************************************************************
Goal 5: History & the Social and Behavioral Sciences
Two courses or experiences with no more than 4 credits from one


                                            Page 3 of 10
rubric or academic area
*****************************************************************
NO            GOAL 5: History & the Social and Behavioral Sciences
              (2 courses or experiences)
   -       AFST
           SELECT FROM:      AFST250
   -       ANTH
           SELECT FROM:      ANTH101,130,188,175,198,250
   -       ART
           SELECT FROM:      ART 231
   -       BRIT
   -       CMST
           SELECT FROM:      CMST220
   -       CMTY
           SELECT FROM:      CMTY200,222,394
   -       COLL
   -       CPSY
           SELECT FROM:      CPSY101,262,330
   -       CEEP
   -       CFS
           SELECT FROM:      CFS 220
   -       CJS
           SELECT FROM:      CJS 111
   -       CSD
           SELECT FROM:      CSD 130
   -       EAST
           SELECT FROM:      EAST250
   -       ECON
           SELECT FROM:      ECON201,205,206,381
   -       ETHS
           SELECT FROM:      ETHS312,335,345,410,425
   -       ETS
           SELECT FROM:      ETS 182,183,375
   -       GERO
           SELECT FROM:      GERO208,405,415,470
   -       GEOG
           SELECT FROM:      GEOG271,394
   -       GWS
           SELECT FROM:      GWS 201
   -       HIST
           SELECT FROM:      HIST101,106,109,110,111,140,141,150,197,336,
                             HIST352,358
   -       HLTH


                                       Page 4 of 10
           SELECT FROM:      HLTH215
   -       LAST
           SELECT FROM:      LAST250
   -       POL
           SELECT FROM:      POL 111
   -       PSY
           SELECT FROM:      PSY 115,225,228,240
   -       SOC
           SELECT FROM:      SOC 111,160,200,211,273,276
   -       SPED
           SELECT FROM:      SPED203
   -       TH
           SELECT FROM:      TH   270
*****************************************************************
Goal 6: The Humanities & Fine Arts
Two courses or experiences with no more than 4 credits from one
rubric or academic area
*****************************************************************
NO            GOAL 6: The Humanities & Fine Arts
              (2 courses or experiences)
   -       ART
           SELECT FROM:      ART 101,102,110,111,120,121,130,131,230,231,
                             ART 333,433,434,435,490
   -       CMST
           SELECT FROM:      CMST210,211,306,316
   -       COLL
   -       CSD
           SELECT FROM:      CSD 171
   -       DANC
   -       ENGL
           SELECT FROM:      ENGL184,201,202,203,215,216,280,305,306
   -       ETHS
           SELECT FROM:      ETHS301,307,345
   -       FREN
           SELECT FROM:      FREN101,102,110,201,202
   -       FS
           SELECT FROM:      FS   175,260,270
   -       GEOG
           SELECT FROM:      GEOG270
   -       GER
           SELECT FROM:      GER 101,102,110,201,202
   -       GWS
           SELECT FROM:      GWS 330


                                        Page 5 of 10
     -   HIST
         SELECT FROM:         HIST136
     -   HUMS
     -   IM
         SELECT FROM:         IM      260
     -   JPN
         SELECT FROM:         JPN 101,102,201,202
     -   LC
         SELECT FROM:         LC      101,102,201,202
     -   MCOM
         SELECT FROM:         MCOM146,200,275
     -   MUSM
         SELECT FROM:         MUSM100,123,125,126,229
     -   MUSP
         SELECT FROM:         MUSP110,112,114,116,118,120,122,124,126,128,
                              MUSP130,132,134,138,140,142,144,146,148,155,
                              MUSP156,157,158,159,161,162,163,164,165,166,
                              MUSP167,168,353,355,356,357,358,360,362,364,
                              MUSP367
     -   PHIL
         SELECT FROM:         PHIL111,112,116,211,212,221,222,251,252
     -   REL
         SELECT FROM:         REL 100,150,260
     -   RUSS
         SELECT FROM:         RUSS101,102,110,201,202
     -   SPAN
         SELECT FROM:         SPAN101,102,103,110,201,202
     -   TH
         SELECT FROM:         TH      148,231,242,258,270
NO       GOAL 7: Human Diversity
         (1 course or experience)
     -   Racial Issues Graduation Requirement Goal 7 Courses
         SELECT FROM:          CMTY222 CPSY384 ENGL216 ETHS111,201,205,
                               ETHS210,215,220,310,470 GWS 220 HIST109
                               HURL102 POL 310 SOC 268
     -   Additional Goal 7 Courses
         SELECT FROM:          JWST180 CSD 130 REL 180 ETHS308,408
                               HIST140,141,352,358,420
NO       GOAL 8: Global Perspective
         (1 course or experience)
     -   Global Perspective
         SELECT FROM:         ART 131,230,433,434,435,490 AFST250
                              ANTH101,175,250 ASTR120 BRIT101,201
                              CFS 260 CMST330 CMTY200 EAST250 ECON350

                                            Page 6 of 10
                                   ENTR200 ETS 182,183,185 FREN101,102,110,
                                   FREN201,202 GEOG111 GER 101,102,110,201,
                                   GER 202 GERO470 GLST190 GWS 340,430
                                   HIST101,106,110,111,150 HTSM111,215
                                   HURL303 IM 204 JPN 101,102,201,202
                                   LAST250 LC 101,102,201,202 LIB 290
                                   MCOM218 MKTG100 MUSM125 MUSP159 PHIL111,
                                   PHIL211,327 POL 101,251,333,334,335,337,
                                   POL 434,436 REL 100,225 RUSS101,102,110,
                                   RUSS201,202 SOC 273,276 SPAN100,101,102,
                                   SPAN103,110,201,202
        """

# converts strings to list
transcript = transcript.split("\n")
audit = audit.split("\n")

print(pipeline(audit=audit, transcript=transcript))

{"3 Natural and Physical Sciences (2 courses or experiences)": [{"ASTR120": [3, 8]}, {"AHS104": [3]}, {"AHS105": [3]}, {"AHS106": [3]}, {"AHS109": [3]}, {"AHS220": [3]}, {"AHS230": [3]}, {"AHS260": [3]}, {"ANTH140": [3]}, {"ASTR106": [3]}, {"ASTR107": [3]}, {"ASTR205": [3]}, {"BIOL101": [3]}, {"BIOL102": [3]}, {"BIOL103": [3]}, {"BIOL104": [3]}, {"BIOL106": [3]}, {"BIOL107": [3]}, {"BIOL152": [3]}, {"CHEM101": [3]}, {"CHEM105": [3]}, {"CHEM151": [3]}, {"CHEM160": [3]}, {"CHEM207": [3]}, {"CHEM210": [3]}, {"CHEM211": [3]}, {"CHEM307": [3]}, {"GEOG272": [3]}, {"HLTH210": [3]}, {"PHYS101": [3]}, {"PHYS103": [3]}, {"PHYS208": [3]}, {"PHYS231": [3]}, {"PHYS232": [3]}, {"PHYS234": [3]}, {"PHYS235": [3]}], "5 History & the Social and Behavioral Sciences (2 courses or experiences)": [{"AFST250": [5, 8]}, {"ANTH101": [5, 8]}, {"ANTH175": [5, 8]}, {"ANTH250": [5, 8]}, {"ART231": [5, 6]}, {"CMTY200": [5, 8]}, {"CMTY222": [5, 7]}, {"CSD130": [5, 7]}, {"EAST250": [5, 8]}, {"ETHS345": [5, 6]}, {"ETS