## Installing necessary libraries

In [2]:
%%bash

# Dependencies
pip install docx2txt
pip install strsimpy
pip install python-docx
pip install pandas

if ls docx2csv >/dev/null 2>&1; then
    echo "docx2csv exists."
else
    echo "Folder does not exist. Cloning docx2csv."
    git clone https://github.com/ivbeg/docx2csv.git
fi

docx2csv exists.


In [3]:
%%bash

source .env
cd docx2csv && echo "$PASSWORD" | sudo -S python3 setup.py install

running install
running bdist_egg
running egg_info
writing docx2csv.egg-info/PKG-INFO
writing dependency_links to docx2csv.egg-info/dependency_links.txt
writing entry points to docx2csv.egg-info/entry_points.txt
writing requirements to docx2csv.egg-info/requires.txt
writing top-level names to docx2csv.egg-info/top_level.txt
reading manifest file 'docx2csv.egg-info/SOURCES.txt'
writing manifest file 'docx2csv.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/converter.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/core.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/__init__.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/__main__.py -> build/bdist.linux-x86_64/egg/docx2csv
byte-compiling build/bdist.linux-x86_64/egg/docx2csv/converter.py to co

[sudo] password for edward: 

In [4]:
# ----- TEST DATA INPUT -----

# Computer Science Test Data.
# CURRENT_MAPPING="Lists_ComputerScience.docx"
# ORIGINAL_MAPPING="Original-Mapping-ComputerScience.csv"

# InformationSecurity Test Data.
CURRENT_MAPPING="Lists_MonashEngineering.docx"
ORIGINAL_MAPPING="Original-Mapping-MonashEngineering.csv"

In [5]:
# importing necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [6]:
# extract tables from word document
from docx2csv import extract_tables, extract
tables = extract_tables(CURRENT_MAPPING)

In [7]:
from docx import Document
document = Document(CURRENT_MAPPING)

In [8]:
def read_docx_table(document,table_num):
  table = document.tables[table_num-1]
  data = [[cell.text for cell in row.cells] for row in table.rows]
  df = pd.DataFrame(data)
  return df

## PLO TABLE

In [9]:
# Creating a dataframe for PLOs and it will accept 'n' number of PLOs
table_num=1
df = read_docx_table(document,table_num)
df.head(n=12)

Unnamed: 0,0,1
0,PO1,"Apply knowledge of mathematics, natural scienc..."
1,PO2,"Identify, formulate, survey research literatur..."
2,PO3,Design solutions for complex Software engineer...
3,PO4,Conduct investigations of complex Software eng...
4,PO5,"Create, select and apply appropriate technique..."
5,PO6,Apply reasoning informed by contextual knowled...
6,PO7,Understand and evaluate the sustainability and...
7,PO8,Apply ethical principles and commit to profess...
8,PO9,Communicate effectively on complex Software en...
9,PO10,"Function effectively as an individual, and as ..."


In [10]:
q1 = df.copy()
q1

Unnamed: 0,0,1
0,PO1,"Apply knowledge of mathematics, natural scienc..."
1,PO2,"Identify, formulate, survey research literatur..."
2,PO3,Design solutions for complex Software engineer...
3,PO4,Conduct investigations of complex Software eng...
4,PO5,"Create, select and apply appropriate technique..."
5,PO6,Apply reasoning informed by contextual knowled...
6,PO7,Understand and evaluate the sustainability and...
7,PO8,Apply ethical principles and commit to profess...
8,PO9,Communicate effectively on complex Software en...
9,PO10,"Function effectively as an individual, and as ..."


In [11]:
# assigning count vectorizer
count_vectorizer = CountVectorizer(stop_words='english', min_df=0.005)

In [12]:
# Remove integers

# Data preprocessing for PLO dataframe
q1[1] = q1[1].str.lower()
corpus = q1[1].tolist()
corpii = count_vectorizer.fit_transform(corpus)
corpus

['apply knowledge of mathematics, natural science, engineering fundamentals and specialization in software engineering to the solution of complex engineering problems;\n',
 'identify, formulate, survey research literature and analyze complex software engineering problems reaching substantiated conclusions using first principles of mathematics, natural sciences and engineering sciences;\n',
 'design solutions for complex software engineering problems and design systems, components or processes that meet specified needs;',
 'conduct investigations of complex software engineering problems using research-based knowledge and research methods including design of experiments, analysis and interpretation of data, and synthesis of information to provide valid conclusions;',
 'create, select and apply appropriate techniques, resources, and modern engineering and it tools, including prediction and modelling, to complex software engineering problems, with an understanding of the limitations;',
 'a

In [13]:
corpii

<12x128 sparse matrix of type '<class 'numpy.int64'>'
	with 182 stored elements in Compressed Sparse Row format>

In [14]:
# extracting features names from PLO table
feature_names = count_vectorizer.get_feature_names_out()
feature_names

array(['ability', 'able', 'activities', 'analysis', 'analyze', 'apply',
       'appropriate', 'assess', 'based', 'broadest', 'change', 'clear',
       'commit', 'communicate', 'community', 'complex', 'components',
       'comprehend', 'conclusions', 'conduct', 'consequent', 'context',
       'contexts', 'contextual', 'create', 'cultural', 'data', 'decision',
       'demonstrate', 'design', 'disciplinary', 'diverse',
       'documentation', 'economic', 'effective', 'effectively', 'engage',
       'engineering', 'environmental', 'ethical', 'ethics', 'evaluate',
       'experiments', 'formulate', 'function', 'fundamentals', 'health',
       'identify', 'impact', 'including', 'independent', 'individual',
       'information', 'informed', 'instructions', 'interpretation',
       'investigations', 'issues', 'knowledge', 'large', 'leader',
       'learning', 'legal', 'life', 'limitations', 'literature', 'long',
       'make', 'making', 'manage', 'management', 'mathematics', 'meet',
       'me

In [15]:
len(feature_names)

128

In [16]:
# Converting features to vector form and create a dataframe
X1 = pd.DataFrame(corpii.toarray(), columns=feature_names)

## CLO TABLE

In [17]:
# Creating a dataframe for CLOs and it will accept 'n' number of CLOs
table_num=2
df1 = read_docx_table(document,table_num)
p1 = df1.copy()
p1

Unnamed: 0,0,1
0,LO1-MA,Describe the underlying theoretical basis of t...
1,LO2-MA,Design a relational database model based on th...
2,LO3-MA,Implement a database based on a sound database...
3,LO4-MA,Contrast the differences between non-relationa...
4,LO5-MA,Develop programming structures within a databa...
...,...,...
82,LO2-MA,Evaluate and apply the basic tools and techniq...
83,LO3-MA,"Evaluate the interconnection of quality, heath..."
84,LO4-MA,Understand and evaluate the need for sustainab...
85,LO5-MA,Explain the contract laws and apply ethical pr...


In [18]:
# Data preprocessing for CLO dataframe
p1[1] = p1[1].str.lower()
corpus11 = p1[1].tolist()
corpii11 = count_vectorizer.fit_transform(corpus11)

In [19]:
# extracting features names from CLO table
feature_names1 = count_vectorizer.get_feature_names_out()

In [20]:
len(feature_names1)

454

In [21]:
# Converting features to vector form and create a dataframe
X2 = pd.DataFrame(corpii11.toarray(), columns=feature_names1)
X2

Unnamed: 0,ability,abstract,acceptable,accordance,achieve,achieved,actions,activities,adopt,advanced,...,visualisations,vocabulary,wired,wireless,work,working,worst,write,writing,written
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
83,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
84,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
85,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
X2.head()

Unnamed: 0,ability,abstract,acceptable,accordance,achieve,achieved,actions,activities,adopt,advanced,...,visualisations,vocabulary,wired,wireless,work,working,worst,write,writing,written
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# adding column index to the CLO table
U2 = pd.concat([df1[0], X2], axis=1)
U2.set_index(0, inplace=True)

In [24]:
U2.head()

Unnamed: 0_level_0,ability,abstract,acceptable,accordance,achieve,achieved,actions,activities,adopt,advanced,...,visualisations,vocabulary,wired,wireless,work,working,worst,write,writing,written
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LO1-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO2-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO3-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO4-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO5-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
# adding column index to the PLO table
U1 = pd.concat([df[0], X1], axis=1)
U1.set_index(0, inplace=True)

In [26]:
U1

Unnamed: 0_level_0,ability,able,activities,analysis,analyze,apply,appropriate,assess,based,broadest,...,teams,techniques,technological,tools,understand,understanding,using,valid,work,write
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PO1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PO2,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
PO3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PO4,0,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,1,1,0,0
PO5,0,0,0,0,0,1,1,0,0,0,...,0,1,0,1,0,1,0,0,0,0
PO6,0,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
PO7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
PO8,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PO9,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
PO10,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


## Intersection method for both CLOs and PLOs

### Generalised list of words

In [27]:
append_words = list(map(str.lower,['Cite', 'Define', 'Describe', 'Draw', 'Enumerate', 'Identify' 'Index', 'Indicate', 'Label', 'List', 'Match', 'Meet', 'Name', 'Outline', 'Point', 'Quote', 'Read', 'Recall', 'Recite', 'Recognize', 'Record', 'Repeat', 'Reproduce','Review',
'Select', 'State', 'Study', 'Tabulate', 'Trace', 'Write', 'Add', 'Approximate', 'Articulate', 'Associate', 'Characterize', 'Clarify', 'Classify', 'Compare', 'Compute', 'Contrast', 'Convert', 'Defend', 'Detail', 'Differentiate',
'Discuss', 'Distinguish', 'Elaborate', 'Estimate', 'Example', 'Explain', 'Express', 'Extend', 'Extrapolate', 'Factor', 'Generalize', 'Give', 'Infer', 'Interact', 'Interpolate', 'Interpret', 'Observe', 'Paraphrase', 'Picture graphically',
'Predict', 'Rewrite', 'Subtract', 'Summarize', 'Translate', 'Visualize', 'Acquire', 'Adapt', 'Allocate', 'Alphabetize', 'Apply', 'Ascertain', 'Assign', 'Attain', 'Avoid', 'Back up', 'Calculate', 'Capture', 'Change', 'Complete', 'Construct', 
'Customize', 'Demonstrate', 'Depreciate', 'Derive', 'Determine', 'Diminish', 'Discover', 'Employ', 'Examine', 'Exercise', 'Explore', 'Expose', 'Figure', 'Graph', 'Handle', 'Illustrate', 'Interconvert', 'Investigate', 'Manipulate', 'Modify', 
'Operate', 'Personalize', 'Plot','Practice', 'Prepare', 'Price', 'Process', 'Produce', 'Project', 'Provide', 'Relate', 'Round off', 'Sequence', 'Show', 'Simulate', 'Sketch', 'Solve', 'Subscribe', 'Transcribe', 'Use', 'Analyze', 'Audit', 
'Blueprint', 'Breadboard', 'Break down', 'Confirm', 'Correlate', 'Detect', 'Diagnose', 'Diagram', 'Discriminate', 'Dissect', 'Document', 'Ensure', 'Figure out', 'File', 'Group', 'Interrupt', 'Inventory', 'Layout', 'Manage', 'Maximize', 
'Minimize', 'Optimize', 'Order', 'Point out', 'Prioritize', 'Proofread', 'Query', 'Separate', 'Subdivide', 'Train', 'Transform', 'Appraise', 'Assess', 'Conclude', 'Counsel', 'Criticize', 'Critique', 'Evaluate', 'Grade', 'Hire', 'Judge', 
'Justify', 'Measure', 'Prescribe', 'Rank', 'Rate', 'Recommend', 'Release', 'Support', 'Test', 'Validate', 'Verify', 'Abstract', 'Animate', 'Arrange', 'Assemble', 'Budget', 'Categorize', 'Code', 'Combine', 'Compile', 'Compose', 'Cope', 
'Correspond', 'Create', 'Cultivate', 'Debug', 'Depict', 'Design', 'Develop', 'Devise', 'Dictate', 'Enhance', 'Facilitate', 'Format', 'Formulate', 'Generate', 'Import', 'Improve', 'Incorporate', 'Integrate', 'Interface', 'Join', 'Lecture', 
'Model', 'Network', 'Organize', 'Overhaul', 'Plan', 'Portray', 'Program', 'Rearrange', 'Reconstruct', 'Reorganize', 'Revise', 'Specify']))

In [28]:
# using + operator to concat the generalised list of words to the PLO list
train_column = list(feature_names) + append_words

In [29]:
# CLO list of words
test_column = feature_names1
test_column

array(['ability', 'abstract', 'acceptable', 'accordance', 'achieve',
       'achieved', 'actions', 'activities', 'adopt', 'advanced', 'agreed',
       'algebra', 'algorithmic', 'algorithms', 'allocate', 'analyse',
       'analysis', 'applications', 'apply', 'approach', 'approaches',
       'appropriate', 'appropriately', 'architectural', 'architectures',
       'artifacts', 'aspects', 'assembly', 'assess', 'assumptions',
       'assurance', 'attributes', 'australia', 'automated',
       'availability', 'available', 'backend', 'balances', 'based',
       'basic', 'basis', 'beam', 'behaviour', 'behaviours', 'best', 'big',
       'board', 'boundaries', 'brief', 'business', 'c3', 'c4', 'c5', 'c6',
       'calculus', 'capability', 'carry', 'case', 'chemical', 'choose',
       'circuit', 'circuits', 'civil', 'classes', 'client', 'clients',
       'code', 'codes', 'collaboration', 'collection', 'combination',
       'commit', 'commitment', 'common', 'communicate', 'communicated',
       'comm

In [30]:
# Intersection method for extracting common column names from the tables (both CLO AND PLO)
# comparing whether the CLO column name is present in the PLO column names or not
train_column = list(feature_names) + append_words # (PLO table ) (# using + operator to concat PLO words and list of generalized words)
test_column = list(feature_names1)   # (CLO table)

In [31]:
# This is the column names from both the tables (using intersection)
common_column = list(set(train_column).intersection(set(test_column)))
common_column

['allocate',
 'contrast',
 'needs',
 'discuss',
 'processes',
 'techniques',
 'model',
 'meet',
 'community',
 'systems',
 'professional',
 'justify',
 'explain',
 'limitations',
 'context',
 'relevant',
 'effectively',
 'work',
 'consequent',
 'investigate',
 'ability',
 'define',
 'health',
 'activities',
 'recognize',
 'validate',
 'including',
 'support',
 'sciences',
 'multi',
 'norms',
 'compare',
 'solutions',
 'evaluate',
 'resources',
 'environmental',
 'manage',
 'use',
 'analysis',
 'mathematics',
 'devise',
 'responsibilities',
 'select',
 'member',
 'ethical',
 'software',
 'plan',
 'using',
 'life',
 'understand',
 'design',
 'solve',
 'ethics',
 'issues',
 'verify',
 'components',
 'problems',
 'communicate',
 'conduct',
 'modern',
 'develop',
 'natural',
 'write',
 'solution',
 'identify',
 'abstract',
 'debug',
 'engineering',
 'cultural',
 'need',
 'based',
 'safety',
 'legal',
 'network',
 'apply',
 'project',
 'sustainability',
 'knowledge',
 'practice',
 'principle

In [32]:
print(common_column)

['allocate', 'contrast', 'needs', 'discuss', 'processes', 'techniques', 'model', 'meet', 'community', 'systems', 'professional', 'justify', 'explain', 'limitations', 'context', 'relevant', 'effectively', 'work', 'consequent', 'investigate', 'ability', 'define', 'health', 'activities', 'recognize', 'validate', 'including', 'support', 'sciences', 'multi', 'norms', 'compare', 'solutions', 'evaluate', 'resources', 'environmental', 'manage', 'use', 'analysis', 'mathematics', 'devise', 'responsibilities', 'select', 'member', 'ethical', 'software', 'plan', 'using', 'life', 'understand', 'design', 'solve', 'ethics', 'issues', 'verify', 'components', 'problems', 'communicate', 'conduct', 'modern', 'develop', 'natural', 'write', 'solution', 'identify', 'abstract', 'debug', 'engineering', 'cultural', 'need', 'based', 'safety', 'legal', 'network', 'apply', 'project', 'sustainability', 'knowledge', 'practice', 'principles', 'leader', 'management', 'process', 'produce', 'code', 'documentation', 'det

In [33]:
len(common_column)

105

In [34]:
# Filter the common column values from the CLO table
U3 = U2.filter(list(common_column), axis=1)

In [35]:
U3.head()

Unnamed: 0_level_0,allocate,contrast,needs,discuss,processes,techniques,model,meet,community,systems,...,organize,tools,data,complex,commit,ensure,fundamentals,assess,societal,diverse
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
LO1-MA,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO2-MA,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO3-MA,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
LO4-MA,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
LO5-MA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
# extracting first row from PLO table and make a dataframe
Cs = []
for x in range(len(df)):
    Cs.append(U1.loc[['PO'+str(x+1)]])

In [37]:
# Concatenating these extracted each PLOs with 'n' number of CLOs
Dds = []
for x in range(len(df)):
    Dds.append(pd.concat([Cs[x],U3], sort=True))

In [38]:
# Filling the nan values of the concatenated dataframes
Ds = []
for x in range(len(df)):
    Ds.append(Dds[x].fillna(0))

## Calculate Cosine similarity

In [39]:
# Calculate cosine similarity for concatenated dataframes and create a new dataframe
for x in range(len(df)):
    Dds[x] = pd.DataFrame(cosine_similarity(Ds[x], dense_output=True))
Ds

[        ability  able  abstract  activities  allocate  analysis  analyze  \
 0                                                                          
 PO1           0   0.0       0.0           0       0.0         0      0.0   
 LO1-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO2-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO3-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO4-MA        0   0.0       0.0           0       0.0         0      0.0   
 ...         ...   ...       ...         ...       ...       ...      ...   
 LO2-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO3-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO4-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO5-MA        0   0.0       0.0           0       0.0         0      0.0   
 LO6-MA        0   0.0       0.0           1       0.0         0      0.0   

In [40]:
# Extract the '0'th column because it has the CLO-PLO  cosine similarity values. We are neglecting the remaining ones.
# Renaming the '0'th column name to 'Pn' ['P1, P2, P3, P4, ... 'Pn']
for x in range(len(df)):
    Dds[x].rename(columns = {0 :'PO'+str(x+1)}, inplace = True)

Dds

[         PO1         1         2         3         4    5    6         7  \
 0   1.000000  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.372678   
 1   0.000000  1.000000  0.577350  0.000000  0.707107  0.0  0.0  0.000000   
 2   0.000000  0.577350  1.000000  0.666667  0.408248  0.0  0.0  0.000000   
 3   0.000000  0.000000  0.666667  1.000000  0.000000  0.0  0.0  0.000000   
 4   0.000000  0.707107  0.408248  0.000000  1.000000  0.0  0.0  0.000000   
 ..       ...       ...       ...       ...       ...  ...  ...       ...   
 83  0.070711  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.000000   
 84  0.212132  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.105409   
 85  0.461880  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.258199   
 86  0.269680  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.100504   
 87  0.521749  0.000000  0.000000  0.000000  0.000000  0.0  0.0  0.222222   
 
            8         9  ...        78        79        80        81      

In [41]:
# Concatenating each  '0'th column from different cosine similarity dataframes
Ddn = []
for x in range(len(df)):
    Ddn.append(Dds[x]['PO'+str(x+1)])

d = pd.concat(Ddn, axis=1)
d

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.298142,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.447214,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
83,0.070711,0.064550,0.081650,0.000000,0.212132,0.063246,0.079057,0.100000,0.000000,0.000000,0.000000,0.182574
84,0.212132,0.129099,0.081650,0.063246,0.141421,0.569210,0.316228,0.400000,0.121716,0.000000,0.000000,0.182574
85,0.461880,0.263523,0.200000,0.154919,0.288675,0.309839,0.839146,0.244949,0.198762,0.000000,0.143223,0.149071
86,0.269680,0.184637,0.077850,0.060302,0.202260,0.361814,0.226134,0.953463,0.116052,0.000000,0.000000,0.261116


In [42]:
# '0'th column gives us 1 which means each PLO map with own PLO.
# So we are removing that column.
dd = d[1:]
dd

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.298142,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.447214,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
83,0.070711,0.064550,0.081650,0.000000,0.212132,0.063246,0.079057,0.100000,0.000000,0.000000,0.000000,0.182574
84,0.212132,0.129099,0.081650,0.063246,0.141421,0.569210,0.316228,0.400000,0.121716,0.000000,0.000000,0.182574
85,0.461880,0.263523,0.200000,0.154919,0.288675,0.309839,0.839146,0.244949,0.198762,0.000000,0.143223,0.149071
86,0.269680,0.184637,0.077850,0.060302,0.202260,0.361814,0.226134,0.953463,0.116052,0.000000,0.000000,0.261116


In [43]:
# resetting index
dd.reset_index(inplace = True)
dd.drop(['index'], axis=1, inplace = True)
dd

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd.drop(['index'], axis=1, inplace = True)


Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.298142,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.447214,0.230940,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
82,0.070711,0.064550,0.081650,0.000000,0.212132,0.063246,0.079057,0.100000,0.000000,0.000000,0.000000,0.182574
83,0.212132,0.129099,0.081650,0.063246,0.141421,0.569210,0.316228,0.400000,0.121716,0.000000,0.000000,0.182574
84,0.461880,0.263523,0.200000,0.154919,0.288675,0.309839,0.839146,0.244949,0.198762,0.000000,0.143223,0.149071
85,0.269680,0.184637,0.077850,0.060302,0.202260,0.361814,0.226134,0.953463,0.116052,0.000000,0.000000,0.261116


In [44]:
# store the matrix into csv file
dd.to_csv('pseudocodematrix.csv', index=False)

In [45]:
# print the matrix
print(dd)

         PO1       PO2       PO3       PO4       PO5       PO6       PO7  \
0   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1   0.000000  0.000000  0.298142  0.230940  0.000000  0.000000  0.000000   
2   0.000000  0.000000  0.447214  0.230940  0.000000  0.000000  0.000000   
3   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
4   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
..       ...       ...       ...       ...       ...       ...       ...   
82  0.070711  0.064550  0.081650  0.000000  0.212132  0.063246  0.079057   
83  0.212132  0.129099  0.081650  0.063246  0.141421  0.569210  0.316228   
84  0.461880  0.263523  0.200000  0.154919  0.288675  0.309839  0.839146   
85  0.269680  0.184637  0.077850  0.060302  0.202260  0.361814  0.226134   
86  0.521749  0.340207  0.258199  0.200000  0.372678  0.333333  0.416667   

         PO8       PO9      PO10      PO11      PO12  
0   0.000000  0.000000  0.000000

In [52]:
# TODO: Classify the verbs in each of the learning outcomes
p1

# Iteration 1: Assume both CLOs and POs are classifiable into Cognitive, Affective, Psychomotor Levels. 
# If levels match, boost the CLO to PO coefficient by 0.1
# If levels different, don't boost

# Iteration 2: 

# for x in range(len(df)):
#     print(dd['PO'+str(x+1)])

Unnamed: 0,0,1
0,LO1-MA,describe the underlying theoretical basis of t...
1,LO2-MA,design a relational database model based on th...
2,LO3-MA,implement a database based on a sound database...
3,LO4-MA,contrast the differences between non-relationa...
4,LO5-MA,develop programming structures within a databa...
...,...,...
82,LO2-MA,evaluate and apply the basic tools and techniq...
83,LO3-MA,"evaluate the interconnection of quality, heath..."
84,LO4-MA,understand and evaluate the need for sustainab...
85,LO5-MA,explain the contract laws and apply ethical pr...


## Setting threshold value (taking min and max of each column and divided by 2)
## threshold value = (min +max)/2

In [47]:

# TODO: Alter the threshold based on matching hierarchy type & bloom verb instead of simply using (column_max+column_min)/2

# Setting threshold value 
# Taking min max average of each column and set that as a threshold value

# This will change the coefficients into 0 or 1 mappings in the dd dataframe
for x in range(len(df)):
    tes = dd['PO'+str(x+1)].values.min()
    tes1 = dd['PO'+str(x+1)].values.max()
    tt1 = (tes+tes1)/2
    
    if tt1 == 0:
      dd['PO'+str(x+1)] = dd['PO'+str(x+1)] 
    else:
      dd['PO'+str(x+1)] = dd['PO'+str(x+1)].apply(lambda x: 1 if x >= tt1 else 0)
# dd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd['PO'+str(x+1)] = dd['PO'+str(x+1)].apply(lambda x: 1 if x >= tt1 else 0)


In [48]:
# dd

dd.to_csv('PLO-CLOmapping.csv', index=False)

PermissionError: [Errno 13] Permission denied: 'PLO-CLOmapping.csv'

In [None]:
dd.head()

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,0,0
2,0,0,1,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
# human generated output
d= pd.read_csv(ORIGINAL_MAPPING)
d.head()

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,1,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,0,0


In [None]:
df3 = d.copy()

In [None]:
dd

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,0,0
2,0,0,1,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
82,0,0,0,0,0,0,0,0,0,0,0,0
83,0,0,0,0,0,1,0,0,0,0,0,0
84,1,0,0,0,1,1,1,0,0,0,1,0
85,0,0,0,0,0,1,0,1,0,0,0,1


In [None]:
df3

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,1,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
82,0,0,0,0,0,0,0,0,0,0,0,1
83,0,0,0,0,0,1,0,0,0,0,0,0
84,0,0,0,0,0,0,1,0,0,0,0,0
85,0,0,0,0,0,0,0,1,0,0,0,0


In [None]:
for x in range(len(df)):
  df3['PO'+str(x+1)] = np.where(dd['PO'+str(x+1)] == df3['PO'+str(x+1)], 'True', 'False')

In [None]:
df3.head()

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,False,True,True,True,True,True,True,True,True,True,True,True
1,True,False,True,False,True,True,True,True,True,True,True,True
2,True,True,True,False,True,True,True,True,True,True,True,True
3,False,True,True,True,True,True,True,True,True,True,True,True
4,True,True,False,True,True,True,True,True,True,True,True,True


In [None]:
for x in range(len(df)):
  df3['PO'+str(x+1)] = df3['PO'+str(x+1)].replace('True', 1)
  df3['PO'+str(x+1)] = df3['PO'+str(x+1)].replace('False', 0)

In [None]:
df3.head()

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,0,1,1,1,1,1,1,1,1,1,1,1
1,1,0,1,0,1,1,1,1,1,1,1,1
2,1,1,1,0,1,1,1,1,1,1,1,1
3,0,1,1,1,1,1,1,1,1,1,1,1
4,1,1,0,1,1,1,1,1,1,1,1,1


In [None]:
# calculating accuracy of the table
df3['acc'] = df3.mean(axis=1)
df3.head(n=100)

Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12,acc
0,0,1,1,1,1,1,1,1,1,1,1,1,0.916667
1,1,0,1,0,1,1,1,1,1,1,1,1,0.833333
2,1,1,1,0,1,1,1,1,1,1,1,1,0.916667
3,0,1,1,1,1,1,1,1,1,1,1,1,0.916667
4,1,1,0,1,1,1,1,1,1,1,1,1,0.916667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,1,1,1,1,1,1,1,1,1,1,1,0,0.916667
83,1,1,1,1,1,1,1,1,1,1,1,1,1.000000
84,0,1,1,1,0,0,1,1,1,1,0,1,0.666667
85,1,1,1,1,1,0,1,1,1,1,1,0,0.833333


In [None]:
df4 = pd.concat([df1[0], df3], axis=1)
df4.head(n=100)

Unnamed: 0,0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12,acc
0,LO1-MA,0,1,1,1,1,1,1,1,1,1,1,1,0.916667
1,LO2-MA,1,0,1,0,1,1,1,1,1,1,1,1,0.833333
2,LO3-MA,1,1,1,0,1,1,1,1,1,1,1,1,0.916667
3,LO4-MA,0,1,1,1,1,1,1,1,1,1,1,1,0.916667
4,LO5-MA,1,1,0,1,1,1,1,1,1,1,1,1,0.916667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,LO2-MA,1,1,1,1,1,1,1,1,1,1,1,0,0.916667
83,LO3-MA,1,1,1,1,1,1,1,1,1,1,1,1,1.000000
84,LO4-MA,0,1,1,1,0,0,1,1,1,1,0,1,0.666667
85,LO5-MA,1,1,1,1,1,0,1,1,1,1,1,0,0.833333


In [None]:
df4.set_index(0, inplace=True)
df4.head(n=100)

df4.to_csv('WasMappingSuccessful.csv')

In [None]:
df4['acc'].mean()

0.8544061302681993