# **Automating Course Descriptor**

# IT9502 Thesis

**Student ID:** 22201670

**Student Name:** Sachith M. Gunawardane

### Connect to Google Drive

*Google Drive holds training data for this research*
* PDF files
* PDF files created using images

In [1]:
# Connect Google Drive
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


### Install External Packages

* *PyPDF2* library has been chosen over *tika* because, tika is capable of reading PDF with specific page numbers. Therefore, if MsWord file required seperate implementation with and *docx* library.

In [2]:
# Install PyPDF2
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━[0m [32m143.4/232.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [3]:
# Install MySQL connector
!pip install mysql-connector-python

Collecting mysql-connector-python
  Downloading mysql_connector_python-8.2.0-cp310-cp310-manylinux_2_17_x86_64.whl (31.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.6/31.6 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting protobuf<=4.21.12,>=4.21.1 (from mysql-connector-python)
  Downloading protobuf-4.21.12-cp37-abi3-manylinux2014_x86_64.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.8/409.8 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, mysql-connector-python
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you h

### Import Libraries

In [4]:
# Import Libraries
import PyPDF2
import numpy as np
import nltk
nltk.download('punkt')
from nltk import sent_tokenize
from nltk import word_tokenize
import re
import mysql
import mysql.connector as msql
from mysql.connector import Error
from datetime import datetime

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Global Parameters

In [5]:
# Global Parameters
## Ground Truth Parameter details
ground_truth_file = '/content/gdrive/MyDrive/OWR/data/files/2023 Programme Handbook.pdf'
start_page = 40
end_page   = 130




In [7]:
# establish database connection
try:
  conn = msql.connect(host='db4free.net',database='education_nz' ,user='whitireia_admin',
                        password='weltec#2023')
except Error as e:
    print("Error while connecting to MySQL now ", e)

cursor = conn.cursor()

### Implementation

*

In [8]:
def get_pageContent(pdf_reader, page_no):
  ''' Function to read and extract text
      Input:
              1. pdf reader
              2. Page number
      Returns text from the requested page'''

  page = pdf_reader.pages[int(page_no)]
  text = page.extract_text().upper()

  return text

In [9]:
def get_courseCode(text):
  '''
  This function is designed to extract Course Code
  Input: Page text
  Output: Course Code, Position for Level and Credit
  Logic:  1. Track Level and Credit positions in List
          2. Reverse search for
                a. Text with length 6
                b. Start with 2 characters
                c. End with 2 numeric
  '''

  # extract words
  words = word_tokenize(text)

  # search string
  search_str = ['Level','Credits']
  word_gap = 3

  # find the index of the search string
  search_pos = []
  for i, item in enumerate(words):
    if item.lower() == search_str[0].lower():
      search_pos.append(i)
    if item.lower() == search_str[1].lower() and len(search_pos) > 0:
      search_pos.append(i)

    if len(search_pos) > 1:
      if search_pos[1] - search_pos[0] < word_gap:
        break
      else: search_pos = []
    elif len(search_pos) > 0 and (i - search_pos[0]) > word_gap:
      search_pos = []

  # regular expression patterns for code
  pattern1 = r"\b\w{6}\b" # word with 6 positions
  pattern2 = r"\b^[a-zA-Z]{2}\w+" # start with 2 characters
  pattern3 = r"\w+\d{2}$" # end with 2 numbers

  code = None
  if len(search_pos) > 0:
    for i in range(search_pos[0], -1, -1):
      matches1 = re.findall(pattern1, words[i])
      matches2 = re.findall(pattern2, words[i])
      matches3 = re.findall(pattern3, words[i])

      if len(matches1)> 0 and len(matches2)> 0 and len(matches3)> 0:
        code = words[i]
        break

  return (code, search_pos)

In [10]:
def get_courseTitle(text, code, pos):
  '''
  This function is designed to return Course Tile
  Prerequisite: get_courseCode
  Input: Page text ,  Course Code and Position of Level and Credit
  Output: Course Title
  Logic: 1. Start position based on Course Code
         2. End position based on Level
  Both above information are retrieved from prerequisite function
  '''

  # extract words
  words = word_tokenize(text)

  Index_Code = words.index(code)

  title = []
  if Index_Code < pos[0]:
    for i in range(Index_Code +1, pos[0]):
      title.append(words[i])

  if len(title) > 0:
    return ' '.join(title, )
  else: return None

In [11]:
def get_courseLevel(text, pos):
  '''
  '''
  #search text
  end_search = 'Aim'
  pattern = r"\b^\d{1,2}" # start with 2 numbers

  # extract words
  words = word_tokenize(text)

  level = 0
  for i in range(pos[0], len(words)):

    matches = re.findall(pattern, words[i])

    if len(matches) > 0:
      try:
        level = int(words[i])
        break
      except ValueError:
        level = 0

    if end_search.lower() == words[i].lower():
      break

  return level

In [12]:
def get_courseCredit(text, pos):
  #search text
  end_search = 'Aim'
  pattern = r"\b^\d{1,2}" # start with 2 numbers

  # extract words
  words = word_tokenize(text)

  credit = 0
  for i in range(pos[1], len(words)):

    matches = re.findall(pattern, words[i])

    if len(matches) > 0:
      try:
        credit = int(words[i])
        break
      except ValueError:
        credit = 0

    if end_search.lower() == words[i].lower():
      break
  return credit

In [13]:
def get_courseTutorHrs(text):

  # search text
  search_text = ('learning', 'tutor','tutor-directed')
  stop_text   = ('aim','aims')
  pattern = r"\b^\d{1,2}" # start with 2 numbers

  # extract words
  words = word_tokenize(text)

  search_pos = []
  for i, item in enumerate(words):
    if item.lower() in search_text:
      search_pos.append(i)

    if item.lower() in stop_text:
      break

  tutor_hrs = 0
  if len(search_pos) > 0:
    search_pos.sort(reverse=True)

    for i in range(search_pos[0], len(words)):
      matches = re.findall(pattern, words[i])

      if len(matches) > 0:
        try:
          tutor_hrs = int(words[i])
          break
        except ValueError:
          tutor_hrs = 0

      if words[i].lower() ==  stop_text:
        break

  return tutor_hrs

In [14]:
def get_courseSelfHrs(text):

  # search text
  search_text = ('learning', 'self','self-directed')
  stop_text   = ('aim','aims')
  pattern = r"\b^\d{1,2}" # start with 2 numbers

  # extract words
  words = word_tokenize(text)

  search_pos = []
  for i, item in enumerate(words):
    if item.lower() in search_text:
      search_pos.append(i)

    if item.lower() in stop_text:
      break

  self_hrs = 0
  if len(search_pos) > 0:
    search_pos.sort(reverse=True)

    for i in range(search_pos[0], len(words)):
      matches = re.findall(pattern, words[i])

      if len(matches) > 0:
        try:
          self_hrs = int(words[i])
          break
        except ValueError:
          self_hrs = 0

      if words[i].lower() ==  stop_text:
        break

    if self_hrs == 0:
      for i in range(search_pos[0]-1, len(words)):
        matches = re.findall(pattern, words[i])

        if len(matches) > 0:
          try:
            self_hrs = int(words[i])
            break
          except ValueError:
            self_hrs = 0

        if words[i].lower() ==  stop_text:
          break

  return self_hrs

In [15]:
def get_endPoint4Exceptions(sentence):
  end_text_exception = ['learning outco mes','lear ning outcomes',
                        'identify and explain contemporary','critically analyse ethical issues'
                       ]

  count = 0
  positions = []
  for search_word in end_text_exception:
    pos = sentence.lower().find(search_word)
    if pos != -1:
      count += 1
      positions.append(pos)

    if count > 0:
      break

  if count > 0:
    return positions[0]
  else: return len(sentence)

def get_courseAim(text):

  # search text
  search_text  = 'aim'
  end_text = 'learning outcomes'

  # extract sentence
  sentences = sent_tokenize(text)

  # get 1st sentence with 'aim' word
  aim = ''
  pos = -1
  end_pos = -1
  start_flag = False
  stop_flag = False
  for sentance in sentences:
    if not start_flag:
      pos = sentance.lower().find(search_text)
      if pos != -1:
        start_flag = True
        temp_pos = sentance[pos:].find('\n')
        if temp_pos != -1 and temp_pos < 6:
          pos += temp_pos

    if start_flag:
      temp_pos = sentance.lower().find(end_text)
      if temp_pos != -1:
        end_pos = temp_pos
        stop_flag = True
      else:
        end_pos = get_endPoint4Exceptions(sentance)
        if end_pos != len(sentance):
          stop_flag = True

      aim += sentance[pos:end_pos] + '\n'

      pos = 0
      if stop_flag:
        break

  aim = aim.lstrip("\n")
  aim = aim.rstrip("\n")

  return aim

In [16]:
def get_coursePrerequisite(text, course_credit, tutor_directed):

  ignore_text = ('learning', 'hours', 'tutor','tutor-directed','directed','-directed',
                 'pre-requisites', '-requisites','requisites' ,'none',str(tutor_directed))
  stop_text = ('pre-requisites', '-requisites','requisites', str(course_credit), 'credits' )

  # regular expression patterns for code
  pattern1 = r"\b\w{6}\b" # word with 6 positions
  pattern2 = r"\b^[a-zA-Z]{2}\w+" # start with 2 characters
  pattern3 = r"\w+\d{2}$" # end with 2 numbers

  # extract words
  words = word_tokenize(text)

  start_pos = 0
  for i, item in enumerate(words):
    if item.lower() == str(tutor_directed):
      start_pos = i
      break

  code = []
  title = []
  code_flag = False
  output = []
  for i in range(i,0,-1):

    if words[i].lower() in stop_text:
      break

    if not ( words[i].lower() in ignore_text):
      match_num = re.findall(pattern3, words[i])
      if len(match_num) > 0:
        code.append(words[i])
      elif code_flag:  code.append(words[i])
      else: title.append(words[i])

    if len(code) > 0:
      tempCode = code.copy()
      tempCode.reverse()
      course_code = ''.join(tempCode)

      if len(course_code) < 6:
        code_flag = True

      matches1 = re.findall(pattern1, course_code)
      matches2 = re.findall(pattern2, course_code)
      matches3 = re.findall(pattern3, course_code)

      if len(matches1)> 0 and len(matches2)> 0 and len(matches3)> 0:
        tempTitle = title.copy()
        tempTitle.reverse()
        output.append([course_code, ' '.join(tempTitle)])
        code = []
        title = []
        code_flag = False


  return output

In [17]:
def get_courseLearningOutcome(text):

  # search text
  search_text = ('learning', 'outcomes','outco', 'mes','lear' ,'ning')
  stop_text   = ('indicative', 'content', 'conte', 'nt', '•')

  # regular expression patterns for code
  pattern = r"\d{1}$" # end with 1 numbers

  # extract words
  words = word_tokenize(text)

  start_pos = []
  end_pos = []
  for i, item in enumerate(words):
    if item.lower() in search_text and words[i+1].lower() in search_text:
      start_pos.append(i+1)
      break

  for i, item in enumerate(words):
    if item.lower() in stop_text:
      end_pos.append(i)
      break


  learning_outcome = []
  index = -1
  outcome = []
  start_flag = False
  for i in range(start_pos[0], end_pos[0]):
    if (len(words[i]) == 1) and (len(re.findall(pattern, words[i])) > 0):
      if index != -1:
        learning_outcome.append([index, ' '.join(outcome)])
        outcome = []
      index = int( words[i])
      start_flag = True
      continue
    elif start_flag:
      if not ((len(words[i-1]) == 1) and (len(re.findall(pattern, words[i-1])) > 0) and words[i] == '.') :
        outcome.append(words[i])
  if index != -1:
    learning_outcome.append([index, ' '.join(outcome)])

  return learning_outcome

In [18]:
string1 = '3-5'
string2 = '-'

if string2 in string1:
  string3 = string1.split(string2)
  for i in range(int(string3[0]), int(string3[1]) +1 ):
    print(i)



3
4
5


In [51]:
def get_courseCompletion(text):

  output = []
  specialChar = '•'

  # search text
  text_array = text.splitlines()

  search_text = ('ASSESSMENTS' , 'ASSESSMENT',  'METHOD',  'WEIGHTING',  'LEARNING',  'OUTCOME/S')
  search_text2 = ('SUCCESSFUL', 'COMPLETION', 'OF', 'COURSE')
  end_text = ('RESOURCES')

  start_pos = []
  mid_pos = []
  end_pos = []

  for i, item in enumerate(text_array):

    words = word_tokenize(item)
    for word in words:
      if word.upper() in search_text:
        if max(start_pos, default = 0) + 3 > i:
          start_pos.append(i)
        else:
          start_pos.clear()
          start_pos.append(i)

      if word.upper() in search_text2:
        if not(len(mid_pos) > 3):
          if max(mid_pos, default = 0) + 2 > i and len(start_pos) > 2:
            mid_pos.append(i)
          else:
            mid_pos.clear()
            mid_pos.append(i)

      if word.upper() in end_text:
        if len(mid_pos) > 2:
          end_pos.append(i)


      if len(mid_pos) > 3 and len(end_pos) > 0 :
        break
    if len(mid_pos) > 3 and len(end_pos) > 0:
      break

  if len(end_pos) == 0:
    end_pos.append(len(text_array))

  assess_start = max(mid_pos, default = 0) + 1
  assess_end = max(end_pos, default = 0)

  print(assess_start)
  print(assess_end)
  outtext = []
  for sentance in text_array[assess_start:assess_end]:
    words = word_tokenize(sentance)
    for i in range(len(words)):
      if specialChar in words[i] and  i == 0 and len(outtext) > 0:
        output.append(' '.join(outtext))
        outtext.clear()
      else :
        tmpStr = words[i]
        tmpStr = tmpStr.strip()
        if len(tmpStr) > 0:outtext.append(tmpStr)
  output.append(' '.join(outtext))

  return output

In [19]:
def get_courseAssessments(text):

  method = []
  weight = []
  learning = []

  SpecialChar = '-'

  # search text

  text_array = text.splitlines()

  search_text = ('ASSESSMENTS' , 'ASSESSMENT',  'METHOD',  'WEIGHTING',  'LEARNING',  'OUTCOME/S')
  end_text = ('SUCCESSFUL', 'COMPLETION', 'OF', 'COURSE')

  start_pos = []
  end_pos = []

  for i, item in enumerate(text_array):

    words = word_tokenize(item)
    for word in words:
      if word.upper() in search_text:
        if max(start_pos, default = 0) + 3 > i:
          start_pos.append(i)
        else:
          start_pos.clear()
          start_pos.append(i)

      if word.upper() in end_text:
        if max(end_pos, default = 0) + 2 > i and len(start_pos) > 2:
          end_pos.append(i)
        else:
          end_pos.clear()
          end_pos.append(i)

      if len(end_pos) > 3:
        break
    if len(end_pos) > 3:
      break

  assess_start = max(start_pos, default = 0) + 1
  assess_end = max(end_pos, default = 0)

  for i in range(assess_start, assess_end):
    words = word_tokenize(text_array[i])
    try:
      percentagePos =  words.index('%')
    except:
      continue

    method.append(''.join(words[:percentagePos - 1]))
    weight.append(words[percentagePos -1])

    learningOutcome = []
    for word in words[percentagePos +1:]:
      if word.upper() == 'ALL':
        learningOutcome.append('99')
      elif SpecialChar in word:
        tmpWord = word.split(SpecialChar)
        for i in range(int(tmpWord[0]), int(tmpWord[1]) +1 ):
          learningOutcome.append(str(i))
      elif word.isdigit():
        learningOutcome.append(word)
    learning.append(learningOutcome)


  return   method, weight, learning


### Implementation of DB Functionality

In [18]:
def insert_version(cur, name, des):

  # Get the current date and time
  now = datetime.now()

  # Format the current date and time as a string in MySQL datetime format
  dt_string = now.strftime('%Y-%m-%d %H:%M:%S')

  insert_query = 'INSERT INTO education_nz.version (name, description, created_datetime) VALUES (%s, %s, %s)'
  values =(name, des, dt_string )

  cur.execute(insert_query, values)
  cur.commit()



  return None

In [None]:
def insert_course(course_details, cur):

  return None

### Ground Truth Creation

In [20]:
# Number of pages to be read
no_of_pages = np.arange(start_page, end_page+ 1, dtype=int)

# Open Ground Truth File
gt_pdf_file = open(ground_truth_file, 'rb')

# Create a PDF reader object
gt_pdf_reader = PyPDF2.PdfReader(gt_pdf_file)


In [54]:
#no_of_pages= [45,46,66,67,68,71,72]
#for page in no_of_pages:
if True:
  page = 65
  # get the text of given page
  passed_page = get_pageContent(gt_pdf_reader,page)
  print(passed_page)

  # get course code
  course_code, level_credit_pos = get_courseCode(passed_page)
  print(course_code)

  # if course code extracted successful proceed
  if course_code != None:
    # get course title
    course_title = get_courseTitle(passed_page, course_code, level_credit_pos)
    print(course_title)

    # get course level
    course_level = get_courseLevel(passed_page,level_credit_pos)
    print(course_level)

    # get course credit
    course_credit = get_courseCredit(passed_page,level_credit_pos)
    print(course_credit)

    # get tutor directed
    course_tutor_directed = get_courseTutorHrs(passed_page)
    print(course_tutor_directed)

    # get self directed
    course_self_directed = get_courseSelfHrs(passed_page)

    # get aim
    course_aim = get_courseAim(passed_page)

    # get course pre-requisite
    course_prerequisite = get_coursePrerequisite(passed_page, course_credit, course_tutor_directed)
    print(course_prerequisite)

    # get Learning outcome
    course_learning_outcomes = get_courseLearningOutcome(passed_page)
    print(course_learning_outcomes)

    # get Assessments
    print(get_courseAssessments(passed_page))

    print(get_courseCompletion(passed_page))


    #print('Credit', get_courseCredit(passed_page,level_credit_pos))

    ## DB functinality
    #DB_versionID =

    # DB_courseID = insert_course([course_code,
    #                              course_title,
    #                              course_credit,
    #                              course_level,
    #                              course_tutor_directed,
    #                              course_self_directed,


    # ], cursor)
    # print(DB_courseID)




#print('*'*15 ,'Start Doc','*'*15 )
#print(passed_page)

#print('*'*15 ,'Start Words','*'*15 )
#words = word_tokenize(passed_page)
#print(words)

#print('*'*15 ,'Start Sentences','*'*15 )
#sentences = sent_tokenize(passed_page)
#print(sentences)

NEW ZEALAND CERTIFICATE IN INFORMATION TECHNOLOGY ESSENTIALS (LEVEL 4)  – PROGRAMME DOCUMENT  66 CS6503  DIGITAL FORENSICS  
LEVEL  6 CREDITS  15 
PRE-REQUISITES  IT5504 INFORMATION SECURITY I  
 IT5506 INTRODUCTION TO NETWORKING  
LEARNING HOURS  TUTOR DIRECTED    52 HOURS   
 SELF-DIRECTED    98 HOURS  
AIM 
TO PROVIDE LEARNERS WITH A COMPREHENSIVE UNDERSTANDING OF DIGITAL FORENSIC PRINCIPLES AND THE COLLECTION, 
PRESERVATION, AND ANALYSIS OF DIGITAL EVIDENCE.  
 
LEARNING OUTCOMES  
ON SUCCESSFUL COMPLETION OF THIS COURSE, THE LEARNER WILL BE ABLE TO:  
1. IDENTIFY THE  ATTRIBUTES OF FILE SYSTEMS AND STORAGE MEDIA AND PERFORM ANALYSIS ON AT LEAST TWO COMMON 
FILE SYSTEMS  
2. IDENTIFY AND ANALYSE POTENTIAL SOURCES OF ELECTRONIC EVIDENCE  
3. DESCRIBE THE IMPORTANCE OF MAINTAINING THE INTEGRITY OF DIGITAL EVIDENCE  
4. PERFORM BASIC FORE NSIC DATA ACQUISITION AND ANALYSIS USING COMPUTER AND NETWORK -BASED APPLICATIONS 
AND UTILITIES  
5. ACCURATELY DOCUMENT FORENSIC PROCEDURES AND RE

In [53]:
  page = 67
  # get the text of given page
  passed_page = get_pageContent(gt_pdf_reader,page)
  print(passed_page)

  print(get_courseCompletion(passed_page))



NEW ZEALAND CERTIFICATE IN INFORMATION TECHNOLOGY ESSENTIALS (LEVEL 4)  – PROGRAMME DOCUMENT  68 DS6501  SOCIAL DATA ANALYTICS  
 
LEVEL 6  CREDITS 15  
PRE-REQUISITES  IT5507 FUNDAMENTALS OF DATA SCIENCE  
LEARNING HOURS  TUTOR DIRECTED    52 HOURS  
 SELF-DIRECTED    98 HOURS  
AIM 
TO INTRODUCE LEARNERS TO THE ANALYSIS OF SOCIAL DATA USING TOOLS AND TECHNIQUES TO EXTRACT KNOWLEDGE AND INSIGHTS FROM 
SOCIAL MEDIA NETWORKS.  
LEARNING OUTCO MES  
ON SUCCESSFUL COMPLETION OF THIS COURSE, THE LEARNER WILL BE ABLE TO:  
1. IDENTIFY AND EXPLAIN CONTEMPORARY TEXT MINING TASKS TYPICALLY APPLIED TO DOCUMENT COLLECTIONS  
2. PERFORM INTRODUCTORY TEXT MINING TASKS ON PUBLICLY  AVAILABLE SOCIAL MEDIA DATA  
3. IDENTIFY AND EXPLAIN THE VISUAL ANALYTICAL CONCEPTS APPLIED TO LARGE SOCIAL DATA SETS  
4. ANALYSE AND DISCUSS CURRENT SOCIAL, ETHICAL, SECURITY AND PRIVACY ISSUES RELATING TO LARGE -SCALE SOCIAL DATA 
ANALYTICS  
INDICATIVE CONTENT  
• SOCIAL DATA ANALYTICS AND THE FACTORS OF CONTEXT, CO

In [52]:
  page = 40
  # get the text of given page
  passed_page = get_pageContent(gt_pdf_reader,page)
  print(passed_page)
  print(get_courseCompletion(passed_page))

NEW ZEALAND CERTIFICATE IN INFORMATION TECHNOLOGY ESSENTIALS (LEVEL 4)  – PROGRAMME DOCUMENT  41 IT5116  DATABASE ADMINISTRATION  
 
LEVEL  5  CREDITS  15 
LEARNING HOURS  TUTOR -DIRECTED   85   
 SELF-DIRECTED   65 
AIM 
THIS COURSE INTRODUCES STUDENTS TO KEY DATABASE CONCEPTS AS WELL AS DEVELOPING SKILLS TO MANAGE AND ADMINISTRATE A 
RELATIONAL DATABASE.  
LEARNING OUTCOMES  
BY THE END OF THIS COURSE THE STUDENT WILL BE ABLE TO:  
1. DESCRIBE  AND APPLY  DATABASE ADMINISTRATION AND QUERY LANGUAGES (SQL) TO MEET ORGANISATIONAL DATA STORAGE 
AND RETRIEVAL REQUIREMENTS, INCLUDING DATABASE MANAGEMENT (DBMS) OPTIMISATION, CLEANSING, SECURITY, AND 
BACKUPS.  
2. IMPLEMENT THE FUNDAMENTAL  KNOWLEDGE OF DATA MODELLING.  
3. APPLY FUNDAMENTAL MATHEMATICAL AND LOGICAL CONCEPTS FOR A RELATIONAL DATABASE.  
4. APPLY P ROBLEM -SOLVING TECHNIQUES TO DATABASE RELATED ISSUES.  
CONTENT  
• RELATIONAL DATABASE CONCEPTS  
• DATABASE MANAGEMENT SYSTEM (DBMS)  
• BASIC SQL COMMA NDS 
• RELATIONAL DATAB

In [89]:
method, weight, learning = get_courseAssessments(passed_page)

print(method)
print(weight)
print(learning)

# print(type(passed_page))
# print(len(passed_page))

# string_array = passed_page.splitlines()

# print(string_array)
# print(len(string_array))
# index = 0
# for word in string_array:
#   index += 1
#   print(index ,word)

['ASSIGNMENT1', 'ASSIGNMENT2', 'EXAMINATION']
['30', '30', '40']
[['1', '2', '4'], ['2', '3', '5'], ['1', '2', '3', '4', '5']]


In [None]:
# Close the PDF file
gt_pdf_file.close()

# Disconnect from MySQL database
conn.close()

In [None]:
string = 'New Zealand Certificate in Information Technology Essentials (Level 4)  – Programme Document  44 IT5119  IT Technical Support'
#string = '44'
pattern = r"\b\w{6}\b"

matches = re.findall(pattern, string)

print(matches)



['IT5119']


In [None]:
string = "IT519"
#pattern = r"\b\w{6}\b"
#pattern = r"\b^[a-zA-Z]{2}\w+"
pattern = r"\w+\d{4}$"
matches = re.findall(pattern, string)

print(matches)  # Output: ['1234']

[]


In [None]:
string = "1234 is a valid code, XY7890 too, 1A2345 and ABCD are not 1 23"
pattern = r"\b^\d{1,2}"
matches = re.findall(pattern, string)

print(matches)  # Output: ['AB1234', 'XY7890']

['12']


In [None]:
for i in range(10,-1,-1):
  if i > 5: continue
  print(i)

5
4
3
2
1
0


In [None]:
  # regular expression patterns for code
  pattern1 = r"\b\w{6}\b" # word with 6 positions
  pattern2 = r"\b^[a-zA-Z]{2}\w+" # start with 2 characters
  pattern3 = r"\d{1}$" # end with 2 numbers

  matches3 = '123n'

  #matches1 = re.findall(pattern1, course_code)
  #matches2 = re.findall(pattern2, course_code)
  matches3 = re.findall(pattern3, matches3)

  print(matches3)


[]
