In [1]:
import pandas as pd
import numpy as np

import nltk 
from nltk.corpus import wordnet as wn

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from nltk.corpus import wordnet_ic

In [2]:
pos_df = pd.read_csv('#pos.csv',header=None)
pos_df.columns=['POs']
pos_df.shape

(12, 1)

In [3]:
pos_df.head()

Unnamed: 0,POs
0,Engineering knowledge: Apply the knowledge of ...
1,"Problem analysis: Identify, formulate, review ..."
2,Design/development of solutions: Design soluti...
3,Conduct investigations of complex problems: Us...
4,"Modern tool usage: Create, select, and apply a..."


In [4]:
po_tokens=[]
for po in pos_df['POs']:
    po_tokens.append(word_tokenize(po.lower()))

In [5]:
print(pos_df['POs'][0])

Engineering knowledge: Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems.


In [6]:
print(po_tokens[0])

['engineering', 'knowledge', ':', 'apply', 'the', 'knowledge', 'of', 'mathematics', ',', 'science', ',', 'engineering', 'fundamentals', ',', 'and', 'an', 'engineering', 'specialization', 'to', 'the', 'solution', 'of', 'complex', 'engineering', 'problems', '.']


In [7]:
eng_stopwords = stopwords.words('english')
eng_stopwords.extend([',','.',"'",":"])

In [8]:
words = []
for list_1 in po_tokens:
    main_words = []
    for word in list_1:
        if word not in eng_stopwords:
            if word not in main_words:
                main_words.append(word)
    words.append(main_words)

In [9]:
print(words[0])

['engineering', 'knowledge', 'apply', 'mathematics', 'science', 'fundamentals', 'specialization', 'solution', 'complex', 'problems']


In [10]:
wnet = WordNetLemmatizer()

In [11]:
for i in range(len(words)):
    for j in range(len(words[i])):
        lemm = wnet.lemmatize(words[i][j], pos='n')
        words[i][j] = lemm

In [12]:
print(words[0])

['engineering', 'knowledge', 'apply', 'mathematics', 'science', 'fundamental', 'specialization', 'solution', 'complex', 'problem']


In [13]:
po_synonym=[]
for i in range(len(words)):
    po_syn = []
    for j in range(len(words[i])):
        for syn in wn.synsets(words[i][j]): 
#             print("--WORD-- : ",words[i][j])
            if syn not in po_syn:
#                 print("syn : ",syn.name())
                po_syn.append(syn)
    po_synonym.append(po_syn)

In [14]:
print(po_synonym[0])

[Synset('technology.n.01'), Synset('engineering.n.02'), Synset('engineering.n.03'), Synset('engineer.v.01'), Synset('mastermind.v.01'), Synset('cognition.n.01'), Synset('use.v.01'), Synset('apply.v.02'), Synset('apply.v.03'), Synset('put_on.v.07'), Synset('lend_oneself.v.01'), Synset('give.v.20'), Synset('practice.v.04'), Synset('enforce.v.01'), Synset('apply.v.09'), Synset('apply.v.10'), Synset('mathematics.n.01'), Synset('science.n.01'), Synset('skill.n.02'), Synset('fundamental.n.01'), Synset('fundamental.n.02'), Synset('cardinal.s.01'), Synset('fundamental.s.02'), Synset('fundamental.s.03'), Synset('specialization.n.01'), Synset('specialization.n.02'), Synset('specialization.n.03'), Synset('solution.n.01'), Synset('solution.n.02'), Synset('solution.n.03'), Synset('solution.n.04'), Synset('solution.n.05'), Synset('complex.n.01'), Synset('complex.n.02'), Synset('complex.n.03'), Synset('building_complex.n.01'), Synset('complex.a.01'), Synset('problem.n.01'), Synset('problem.n.02'), Sy

In [15]:
## EXAMPLE
dog=wn.synsets('dog', pos=wn.NOUN)[0] #get the first noun synonym of the word "dog"
cat=wn.synsets('cat', pos=wn.NOUN)[0]
rose=wn.synsets('rose', pos=wn.NOUN)[0]
flower=wn.synsets('flower', pos=wn.NOUN)[0]

brown_ic = wordnet_ic.ic('ic-brown.dat') #load the brown corpus to compute the IC

print(rose.res_similarity(flower, brown_ic),rose.res_similarity(dog, brown_ic),cat.res_similarity(dog, brown_ic))

6.0283161048744525 2.2241504712318556 7.911666509036577


In [16]:
cos_df = pd.read_csv('#new_cos.csv',header=None,encoding = 'unicode_escape')
cos_df.columns=['category','sub_code','Cos']

In [17]:
cos_df.shape

(287, 3)

In [18]:
cos_df.head()

Unnamed: 0,category,sub_code,Cos
0,c1,ETMA 101 CO1,Students will be able to apply the knowledge o...
1,c1,ETMA 101 CO2,Students will gain thorough knowledge of the a...
2,c1,ETMA 101 CO3,"Students will learn theory of matrices, its in..."
3,c1,ETMA 101 CO4,Students will be acquainted with first order d...
4,c6,ETPH 103 CO1,Will be able to understand the concepts of int...


In [19]:
cos = cos_df['Cos']
cos.shape

(287,)

In [20]:
co_tokens=[]
for co in cos_df['Cos']:
    co_tokens.append(word_tokenize(co.lower()))

In [21]:
print(cos_df['Cos'][0])

Students will be able to apply the knowledge of successive differentiation in various problems, different tests to find the nature of the infinite series and Taylors and Maclaurins series to find the expansion of mathematical functions.


In [22]:
print(co_tokens[0])

['students', 'will', 'be', 'able', 'to', 'apply', 'the', 'knowledge', 'of', 'successive', 'differentiation', 'in', 'various', 'problems', ',', 'different', 'tests', 'to', 'find', 'the', 'nature', 'of', 'the', 'infinite', 'series', 'and', 'taylor\x92s', 'and', 'maclaurin\x92s', 'series', 'to', 'find', 'the', 'expansion', 'of', 'mathematical', 'functions', '.']


In [23]:
co_words = []
for list_1 in co_tokens:
    co_main_words = []
    for word in list_1:
        if word not in eng_stopwords:
            if word not in co_main_words:
                co_main_words.append(word)
    co_words.append(co_main_words)

In [24]:
print(co_words[0])

['students', 'able', 'apply', 'knowledge', 'successive', 'differentiation', 'various', 'problems', 'different', 'tests', 'find', 'nature', 'infinite', 'series', 'taylor\x92s', 'maclaurin\x92s', 'expansion', 'mathematical', 'functions']


In [25]:
for i in range(len(co_words)):
    for j in range(len(co_words[i])):
        lemm = wnet.lemmatize(co_words[i][j], pos='n')
        co_words[i][j] = lemm

In [26]:
print(co_words[0])

['student', 'able', 'apply', 'knowledge', 'successive', 'differentiation', 'various', 'problem', 'different', 'test', 'find', 'nature', 'infinite', 'series', 'taylor\x92s', 'maclaurin\x92s', 'expansion', 'mathematical', 'function']


In [27]:
cat=['c1','c2','c3','c4','c5','c6','c7','c8','c9']
cat_df = pd.read_csv('#categories.csv',encoding = 'unicode_escape',dtype=np.float64)
cat_df.head()

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,0.75,0.5,0.15,0.2,0.4,0.625,0.175,0.3,0.45
1,0.75,0.5,0.15,0.2,0.4,0.625,0.175,0.3,0.45
2,0.75,0.5,0.15,0.2,0.4,0.625,0.175,0.3,0.45
3,0.75,0.5,0.15,0.2,0.4,0.625,0.175,0.3,0.45
4,0.5,0.85,0.1,0.15,0.5,0.675,0.125,0.325,0.383333


In [28]:
cat_df.shape

(12, 9)

In [29]:
similarity = np.zeros([cos_df.shape[0],pos_df.shape[0]])
for x in range(len(co_words)):
    print("CO:",x)
    for y in range(len(co_words[x])):
        if len(wn.synsets(co_words[x][y])) != 0 :
            for i in range(len(po_synonym)):
                for j in range(len(po_synonym[i])):
                    if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) != None:
                        similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))

CO: 0
CO: 1
CO: 2
CO: 3
CO: 4
CO: 5
CO: 6
CO: 7
CO: 8
CO: 9
CO: 10
CO: 11
CO: 12
CO: 13
CO: 14
CO: 15
CO: 16
CO: 17
CO: 18
CO: 19
CO: 20
CO: 21
CO: 22
CO: 23
CO: 24
CO: 25
CO: 26
CO: 27
CO: 28
CO: 29
CO: 30
CO: 31
CO: 32
CO: 33
CO: 34
CO: 35
CO: 36
CO: 37
CO: 38
CO: 39
CO: 40
CO: 41
CO: 42
CO: 43
CO: 44
CO: 45
CO: 46
CO: 47
CO: 48
CO: 49
CO: 50
CO: 51
CO: 52
CO: 53
CO: 54
CO: 55
CO: 56
CO: 57
CO: 58
CO: 59
CO: 60
CO: 61
CO: 62
CO: 63
CO: 64
CO: 65
CO: 66
CO: 67
CO: 68
CO: 69
CO: 70
CO: 71
CO: 72
CO: 73
CO: 74
CO: 75
CO: 76
CO: 77
CO: 78
CO: 79
CO: 80
CO: 81
CO: 82
CO: 83
CO: 84
CO: 85
CO: 86
CO: 87
CO: 88
CO: 89
CO: 90
CO: 91
CO: 92
CO: 93
CO: 94
CO: 95
CO: 96
CO: 97
CO: 98
CO: 99
CO: 100
CO: 101
CO: 102
CO: 103
CO: 104
CO: 105
CO: 106
CO: 107
CO: 108
CO: 109
CO: 110
CO: 111
CO: 112
CO: 113
CO: 114
CO: 115
CO: 116
CO: 117
CO: 118
CO: 119
CO: 120
CO: 121
CO: 122
CO: 123
CO: 124
CO: 125
CO: 126
CO: 127
CO: 128
CO: 129
CO: 130
CO: 131
CO: 132
CO: 133
CO: 134
CO: 135
CO: 136
CO: 137
CO: 13

In [30]:
category = list(cos_df['category'])
sim = np.zeros([cos_df.shape[0],pos_df.shape[0]])
for i in range(similarity.shape[0]):
    for j in range(similarity.shape[1]):
        for k in range(len(cat)):
            if category[i]==cat[k]:
                c=list(cat_df[cat[k]])
                sim[i][j] = (similarity[i][j]*c[j])/(len(co_words[i])*len(po_synonym[j]))
                sim[i][j] = (sim[i][j]*100/4.85)
                sim[i][j] = round(sim[i][j],ndigits=1)

In [31]:
map_df = pd.DataFrame(sim, columns = ['PO1', 'PO2', 'PO3', 'PO4', 'PO5', 'PO6', 'PO7', 'PO8', 'PO9', 'PO10', 'PO11', 'PO12'])
map_df.insert(0,"Subject code",cos_df['sub_code'])
map_df.insert(1,"Category",cos_df['category'])

In [32]:
map_df.head(10)

Unnamed: 0,Subject code,Category,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,ETMA 101 CO1,c1,2.0,1.6,1.7,2.1,1.0,0.3,0.4,0.3,0.3,0.3,0.6,0.6
1,ETMA 101 CO2,c1,2.5,2.1,2.1,2.6,1.3,0.4,0.5,0.4,0.4,0.4,0.7,0.8
2,ETMA 101 CO3,c1,2.4,2.0,2.1,2.5,1.3,0.4,0.4,0.4,0.5,0.4,0.7,0.8
3,ETMA 101 CO4,c1,1.8,1.5,1.5,1.8,0.9,0.3,0.3,0.3,0.3,0.3,0.5,0.6
4,ETPH 103 CO1,c6,1.9,1.7,1.7,1.9,1.8,0.4,0.4,0.4,0.9,0.8,1.2,1.3
5,ETPH 103 CO2,c6,1.8,1.4,1.5,1.9,1.4,0.3,0.4,0.3,0.6,0.4,0.9,0.9
6,ETPH 103 CO3,c6,2.5,2.3,2.3,2.6,2.3,0.5,0.6,0.5,1.1,1.1,1.7,1.8
7,ETPH 103 CO4,c6,2.0,1.9,1.9,2.0,2.0,0.5,0.5,0.4,1.1,1.0,1.4,1.6
8,ETME 105 CO1,c6,2.5,2.2,2.2,2.5,2.2,0.5,0.6,0.5,1.2,1.1,1.6,1.7
9,ETME 105 CO2,c6,1.9,1.7,1.8,2.0,1.8,0.4,0.5,0.4,0.9,0.7,1.2,1.3


In [33]:
for i in range(sim.shape[1]):
    a = i+1
    b = max(s[1] for s in sim)
    c = min(r[1] for r in sim)
    print(a,b,c)

1 3.0 0.3
2 3.0 0.3
3 3.0 0.3
4 3.0 0.3
5 3.0 0.3
6 3.0 0.3
7 3.0 0.3
8 3.0 0.3
9 3.0 0.3
10 3.0 0.3
11 3.0 0.3
12 3.0 0.3


In [34]:
map_df.to_csv('map.csv',index=False)