## Installing necessary libraries

In [132]:
!pip install docx2txt



In [133]:
!pip install strsimpy



In [134]:
!pip install python-docx



In [135]:
%%bash

if ls docx2csv >/dev/null 2>&1; then
    echo "docx2csv exists."
else
    echo "Folder does not exist. Cloning docx2csv."
    git clone https://github.com/ivbeg/docx2csv.git
fi

docx2csv exists.


In [136]:
%%bash

source .env
echo "Linux Password: $PASSWORD"
cd docx2csv && echo "$PASSWORD" | sudo -S python3 setup.py install

Linux Password: lori12345
running install
running bdist_egg
running egg_info
writing docx2csv.egg-info/PKG-INFO
writing dependency_links to docx2csv.egg-info/dependency_links.txt
writing entry points to docx2csv.egg-info/entry_points.txt
writing requirements to docx2csv.egg-info/requires.txt
writing top-level names to docx2csv.egg-info/top_level.txt
reading manifest file 'docx2csv.egg-info/SOURCES.txt'
writing manifest file 'docx2csv.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build/bdist.linux-x86_64/egg
creating build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/converter.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/core.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/__init__.py -> build/bdist.linux-x86_64/egg/docx2csv
copying build/lib/docx2csv/__main__.py -> build/bdist.linux-x86_64/egg/docx2csv
byte-compiling build/bdist.linux-x86_64/egg/d

In [137]:
# ----- TEST DATA INPUT -----

# Computer Science Test Data.
# CURRENT_MAPPING="Lists_ComputerScience.docx"
# ORIGINAL_MAPPING="Original-Mapping-ComputerScience.csv"

# InformationSecurity Test Data.
CURRENT_MAPPING="Lists_InformationSecurity.docx"
ORIGINAL_MAPPING="Original-Mapping-InfoSecurity.csv"

In [138]:
# extract tables from word document
from docx2csv import extract_tables, extract
tables = extract_tables(CURRENT_MAPPING)

In [139]:
from docx import Document
document = Document(CURRENT_MAPPING)

In [140]:
def read_docx_table(document,table_num):
  table = document.tables[table_num-1]
  data = [[cell.text for cell in row.cells] for row in table.rows]
  df = pd.DataFrame(data)
  return df

In [141]:
# importing necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

## PLO TABLE

In [142]:
# Creating a dataframe for PLOs and it will accept 'n' number of PLOs
table_num=1
df = read_docx_table(document,table_num)
df.head()

Unnamed: 0,0,1
0,P1,Analyze a complex computing problem and to app...
1,P2,"Design, implement, and evaluate a computing-ba..."
2,P3,Communicate effectively in a variety of profes...
3,P4,Recognize professional responsibilities and ma...
4,P5,Function effectively as a member or leader of ...


In [143]:
q1 = df.copy()

In [144]:
# assigning count vectorizer
count_vectorizer = CountVectorizer(stop_words='english', min_df=0.005)

In [145]:
# Data preprocessing for PLO dataframe
q1[1] = q1[1].str.lower()
corpus = q1[1].tolist()
corpii = count_vectorizer.fit_transform(corpus)

In [146]:
corpii

<6x49 sparse matrix of type '<class 'numpy.int64'>'
	with 59 stored elements in Compressed Sparse Row format>

In [147]:
# extracting features names from PLO table
feature_names = count_vectorizer.get_feature_names_out()
feature_names

array(['activities', 'analyze', 'apply', 'appropriate', 'based',
       'communicate', 'complex', 'computing', 'context', 'contexts',
       'design', 'discipline', 'disciplines', 'effectively', 'engaged',
       'ethical', 'evaluate', 'function', 'given', 'identify',
       'implement', 'informed', 'judgments', 'leader', 'legal',
       'maintain', 'make', 'meet', 'member', 'operations', 'practice',
       'practices', 'presence', 'principles', 'problem', 'professional',
       'program', 'recognize', 'relevant', 'requirements',
       'responsibilities', 'risks', 'security', 'set', 'solution',
       'solutions', 'team', 'threats', 'variety'], dtype=object)

In [148]:
len(feature_names)

49

In [149]:
# Converting features to vector form and create a dataframe
X1 = pd.DataFrame(corpii.toarray(), columns=feature_names)

## CLO TABLE

In [150]:
# Creating a dataframe for CLOs and it will accept 'n' number of CLOs
table_num=2
df1 = read_docx_table(document,table_num)
p1 = df1.copy()

In [151]:
# Data preprocessing for CLO dataframe
p1[1] = p1[1].str.lower()
corpus11 = p1[1].tolist()
corpii11 = count_vectorizer.fit_transform(corpus11)

In [152]:
# extracting features names from CLO table
feature_names1 = count_vectorizer.get_feature_names_out()

In [153]:
len(feature_names1)

433

In [154]:
# Converting features to vector form and create a dataframe
X2 = pd.DataFrame(corpii11.toarray(), columns=feature_names1)

In [155]:
X2.head()

Unnamed: 0,access,accuracy,achieve,active,activities,address,addressing,affecting,algebra,algorithm,...,viruses,vpn,vulnerabilities,vulnerability,web,workers,workplace,write,writing,written
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [156]:
# adding column index to the CLO table
U2 = pd.concat([df1[0], X2], axis=1)
U2.set_index(0, inplace=True)

In [157]:
U2.head()

Unnamed: 0_level_0,access,accuracy,achieve,active,activities,address,addressing,affecting,algebra,algorithm,...,viruses,vpn,vulnerabilities,vulnerability,web,workers,workplace,write,writing,written
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [158]:
# adding column index to the PLO table
U1 = pd.concat([df[0], X1], axis=1)
U1.set_index(0, inplace=True)

In [159]:
U1

Unnamed: 0_level_0,activities,analyze,apply,appropriate,based,communicate,complex,computing,context,contexts,...,requirements,responsibilities,risks,security,set,solution,solutions,team,threats,variety
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P1,0,1,1,0,0,0,1,2,0,0,...,0,0,0,0,0,0,1,0,0,0
P2,0,0,0,0,1,0,0,2,1,0,...,1,0,0,0,1,1,0,0,0,0
P3,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1
P4,0,0,0,0,1,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
P5,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
P6,0,0,1,0,0,0,0,0,0,0,...,0,0,1,1,0,0,0,0,1,0


## Intersection method for both CLOs and PLOs

### Generalised list of words

In [160]:
append_words = list(map(str.lower,['Cite', 'Define', 'Describe', 'Draw', 'Enumerate', 'Identify' 'Index', 'Indicate', 'Label', 'List', 'Match', 'Meet', 'Name', 'Outline', 'Point', 'Quote', 'Read', 'Recall', 'Recite', 'Recognize', 'Record', 'Repeat', 'Reproduce','Review',
'Select', 'State', 'Study', 'Tabulate', 'Trace', 'Write', 'Add', 'Approximate', 'Articulate', 'Associate', 'Characterize', 'Clarify', 'Classify', 'Compare', 'Compute', 'Contrast', 'Convert', 'Defend', 'Detail', 'Differentiate',
'Discuss', 'Distinguish', 'Elaborate', 'Estimate', 'Example', 'Explain', 'Express', 'Extend', 'Extrapolate', 'Factor', 'Generalize', 'Give', 'Infer', 'Interact', 'Interpolate', 'Interpret', 'Observe', 'Paraphrase', 'Picture graphically',
'Predict', 'Rewrite', 'Subtract', 'Summarize', 'Translate', 'Visualize', 'Acquire', 'Adapt', 'Allocate', 'Alphabetize', 'Apply', 'Ascertain', 'Assign', 'Attain', 'Avoid', 'Back up', 'Calculate', 'Capture', 'Change', 'Complete', 'Construct', 
'Customize', 'Demonstrate', 'Depreciate', 'Derive', 'Determine', 'Diminish', 'Discover', 'Employ', 'Examine', 'Exercise', 'Explore', 'Expose', 'Figure', 'Graph', 'Handle', 'Illustrate', 'Interconvert', 'Investigate', 'Manipulate', 'Modify', 
'Operate', 'Personalize', 'Plot','Practice', 'Prepare', 'Price', 'Process', 'Produce', 'Project', 'Provide', 'Relate', 'Round off', 'Sequence', 'Show', 'Simulate', 'Sketch', 'Solve', 'Subscribe', 'Transcribe', 'Use', 'Analyze', 'Audit', 
'Blueprint', 'Breadboard', 'Break down', 'Confirm', 'Correlate', 'Detect', 'Diagnose', 'Diagram', 'Discriminate', 'Dissect', 'Document', 'Ensure', 'Figure out', 'File', 'Group', 'Interrupt', 'Inventory', 'Layout', 'Manage', 'Maximize', 
'Minimize', 'Optimize', 'Order', 'Point out', 'Prioritize', 'Proofread', 'Query', 'Separate', 'Subdivide', 'Train', 'Transform', 'Appraise', 'Assess', 'Conclude', 'Counsel', 'Criticize', 'Critique', 'Evaluate', 'Grade', 'Hire', 'Judge', 
'Justify', 'Measure', 'Prescribe', 'Rank', 'Rate', 'Recommend', 'Release', 'Support', 'Test', 'Validate', 'Verify', 'Abstract', 'Animate', 'Arrange', 'Assemble', 'Budget', 'Categorize', 'Code', 'Combine', 'Compile', 'Compose', 'Cope', 
'Correspond', 'Create', 'Cultivate', 'Debug', 'Depict', 'Design', 'Develop', 'Devise', 'Dictate', 'Enhance', 'Facilitate', 'Format', 'Formulate', 'Generate', 'Import', 'Improve', 'Incorporate', 'Integrate', 'Interface', 'Join', 'Lecture', 
'Model', 'Network', 'Organize', 'Overhaul', 'Plan', 'Portray', 'Program', 'Rearrange', 'Reconstruct', 'Reorganize', 'Revise', 'Specify']))

In [164]:
# using + operator to concat the generalised list of words to the PLO list
train_column = list(feature_names) + append_words

In [165]:
# CLO list of words
test_column = feature_names1

In [168]:
# Intersection method for extracting common column names from the tables (both CLO AND PLO)
# comparing whether the CLO column name is present in the PLO column names or not
train_column = list(feature_names) + append_words # (PLO table ) (# using + operator to concat PLO words and list of generalized words)
test_column = list(feature_names1)   # (CLO table)

In [169]:
# This is the column names from both the tables (using intersection)
common_column = list(set(train_column).intersection(set(test_column)))
common_column

['write',
 'identify',
 'define',
 'ethical',
 'manage',
 'prepare',
 'develop',
 'graph',
 'project',
 'professional',
 'practices',
 'sequence',
 'solutions',
 'network',
 'defend',
 'given',
 'communicate',
 'solve',
 'security',
 'assess',
 'cultivate',
 'integrate',
 'derive',
 'criticize',
 'principles',
 'evaluate',
 'convert',
 'discover',
 'test',
 'contexts',
 'discuss',
 'use',
 'file',
 'plan',
 'investigate',
 'threats',
 'problem',
 'process',
 'explain',
 'code',
 'customize',
 'computing',
 'effectively',
 'manipulate',
 'solution',
 'requirements',
 'apply',
 'relevant',
 'team',
 'responsibilities',
 'demonstrate',
 'recognize',
 'program',
 'activities',
 'translate',
 'operations',
 'compare',
 'appropriate',
 'create',
 'legal',
 'produce',
 'function',
 'based',
 'interface',
 'implement',
 'model',
 'select',
 'query',
 'design',
 'analyze',
 'diagram']

In [170]:
print(common_column)

['write', 'identify', 'define', 'ethical', 'manage', 'prepare', 'develop', 'graph', 'project', 'professional', 'practices', 'sequence', 'solutions', 'network', 'defend', 'given', 'communicate', 'solve', 'security', 'assess', 'cultivate', 'integrate', 'derive', 'criticize', 'principles', 'evaluate', 'convert', 'discover', 'test', 'contexts', 'discuss', 'use', 'file', 'plan', 'investigate', 'threats', 'problem', 'process', 'explain', 'code', 'customize', 'computing', 'effectively', 'manipulate', 'solution', 'requirements', 'apply', 'relevant', 'team', 'responsibilities', 'demonstrate', 'recognize', 'program', 'activities', 'translate', 'operations', 'compare', 'appropriate', 'create', 'legal', 'produce', 'function', 'based', 'interface', 'implement', 'model', 'select', 'query', 'design', 'analyze', 'diagram']


In [171]:
len(common_column)

71

In [172]:
# Filter the common column values from the CLO table
U3 = U2.filter(list(common_column), axis=1)

In [173]:
U3.head()

Unnamed: 0_level_0,write,identify,define,ethical,manage,prepare,develop,graph,project,professional,...,function,based,interface,implement,model,select,query,design,analyze,diagram
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
C5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [174]:
# extracting first row from PLO table and make a dataframe
Cs = []
for x in range(len(df)):
    Cs.append(U1.loc[['P'+str(x+1)]])

In [175]:
# Concatenating these extracted each PLOs with 'n' number of CLOs
Dds = []
for x in range(len(df)):
    Dds.append(pd.concat([Cs[x],U3], sort=True))

In [176]:
# Filling the nan values of the concatenated dataframes
Ds = []
for x in range(len(df)):
    Ds.append(Dds[x].fillna(0))

## Calculate Cosine similarity

In [177]:
# Calculate cosine similarity for concatenated dataframes and create a new dataframe
for x in range(len(df)):
    Dds[x] = pd.DataFrame(cosine_similarity(Ds[x], dense_output=True))

In [178]:
# Extract the '0'th column because it has the CLO-PLO  cosine similarity values. We are neglecting the remaining ones.
# Renaming the '0'th column name to 'Pn' ['P1, P2, P3, P4, ... 'Pn']
for x in range(len(df)):
    Dds[x].rename(columns = {0 :'P'+str(x+1)}, inplace = True)


In [179]:
# Concatenating each  '0'th column from different cosine similarity dataframes
Ddn = []
for x in range(len(df)):
    Ddn.append(Dds[x]['P'+str(x+1)])

d = pd.concat(Ddn, axis=1)

In [180]:
# '0'th column gives us 1 which means each PLO map with own PLO.
# So we are removing that column.
dd = d[1:]

In [181]:
# resetting index
dd.reset_index(inplace = True)
dd.drop(['index'], axis=1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd.drop(['index'], axis=1, inplace = True)


In [182]:
# store the matrix into csv file
dd.to_csv('pseudocodematrix.csv', index=False)

In [183]:
# print the matrix
print(dd)

           P1        P2        P3        P4        P5       P6
0    0.320256  0.288675  0.000000  0.166667  0.000000  0.00000
1    0.554700  0.500000  0.000000  0.288675  0.000000  0.00000
2    0.277350  0.000000  0.000000  0.000000  0.000000  0.00000
3    0.000000  0.000000  0.000000  0.204124  0.000000  0.00000
4    0.138675  0.125000  0.000000  0.000000  0.158114  0.00000
..        ...       ...       ...       ...       ...      ...
168  0.160128  0.000000  0.000000  0.166667  0.000000  0.19245
169  0.000000  0.000000  0.000000  0.000000  0.000000  0.00000
170  0.000000  0.000000  0.000000  0.000000  0.000000  0.00000
171  0.000000  0.250000  0.000000  0.000000  0.000000  0.00000
172  0.000000  0.000000  0.516398  0.000000  0.365148  0.00000

[173 rows x 6 columns]


## Setting threshold value (taking min and max of each column and divided by 2)
## threshold value = (min +max)/2

In [184]:
# Setting threshold value 
# Taking min max average of each column and set that as a threshold value
for x in range(len(df)):
    tes = dd['P'+str(x+1)].values.min()
    tes1 = dd['P'+str(x+1)].values.max()
    tt1 = (tes+tes1)/2
    if tt1 == 0:
      dd['P'+str(x+1)] = dd['P'+str(x+1)] 
    else:
      dd['P'+str(x+1)] = dd['P'+str(x+1)].apply(lambda x: 1 if x >= tt1 else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dd['P'+str(x+1)] = dd['P'+str(x+1)].apply(lambda x: 1 if x >= tt1 else 0)


In [185]:
dd.to_csv('PLO-CLOmapping.csv', index=False)

In [186]:
dd.head()

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,1,1,0,0,0,0
1,1,1,0,1,0,0
2,1,0,0,0,0,0
3,0,0,0,1,0,0
4,0,0,0,0,0,0


In [187]:
# human generated output
d= pd.read_csv(ORIGINAL_MAPPING)
d.head()

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,0,0,0,0,0,0
1,0,0,0,0,0,0
2,0,0,0,0,0,0
3,0,0,0,1,0,0
4,0,0,0,0,0,0


In [188]:
df3 = d.copy()

In [189]:
dd

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,1,1,0,0,0,0
1,1,1,0,1,0,0
2,1,0,0,0,0,0
3,0,0,0,1,0,0
4,0,0,0,0,0,0
...,...,...,...,...,...,...
168,0,0,0,0,0,0
169,0,0,0,0,0,0
170,0,0,0,0,0,0
171,0,1,0,0,0,0


In [190]:
df3

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,0,0,0,0,0,0
1,0,0,0,0,0,0
2,0,0,0,0,0,0
3,0,0,0,1,0,0
4,0,0,0,0,0,0
...,...,...,...,...,...,...
168,0,0,0,0,0,0
169,0,0,0,0,0,0
170,1,0,0,0,0,0
171,0,1,0,0,0,0


In [191]:
for x in range(len(df)):
  df3['P'+str(x+1)] = np.where(dd['P'+str(x+1)] == df3['P'+str(x+1)], 'True', 'False')

In [192]:
df3.head()

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,False,False,True,True,True,True
1,False,False,True,False,True,True
2,False,True,True,True,True,True
3,True,True,True,True,True,True
4,True,True,True,True,True,True


In [193]:
for x in range(len(df)):
  df3['P'+str(x+1)] = df3['P'+str(x+1)].replace('True', 1)
  df3['P'+str(x+1)] = df3['P'+str(x+1)].replace('False', 0)

In [194]:
df3.head()

Unnamed: 0,P1,P2,P3,P4,P5,P6
0,0,0,1,1,1,1
1,0,0,1,0,1,1
2,0,1,1,1,1,1
3,1,1,1,1,1,1
4,1,1,1,1,1,1


In [195]:
# calculating accuracy of the table
df3['acc'] = df3.mean(axis=1)
df3.head()

Unnamed: 0,P1,P2,P3,P4,P5,P6,acc
0,0,0,1,1,1,1,0.666667
1,0,0,1,0,1,1,0.5
2,0,1,1,1,1,1,0.833333
3,1,1,1,1,1,1,1.0
4,1,1,1,1,1,1,1.0


In [196]:
df4 = pd.concat([df1[0], df3], axis=1)
df4.head()

Unnamed: 0,0,P1,P2,P3,P4,P5,P6,acc
0,C1,0,0,1,1,1,1,0.666667
1,C2,0,0,1,0,1,1,0.5
2,C3,0,1,1,1,1,1,0.833333
3,C4,1,1,1,1,1,1,1.0
4,C5,1,1,1,1,1,1,1.0


In [197]:
df4.set_index(0, inplace=True)
df4.head()

Unnamed: 0_level_0,P1,P2,P3,P4,P5,P6,acc
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C1,0,0,1,1,1,1,0.666667
C2,0,0,1,0,1,1,0.5
C3,0,1,1,1,1,1,0.833333
C4,1,1,1,1,1,1,1.0
C5,1,1,1,1,1,1,1.0


In [198]:
df4['acc'].mean()

0.884393063583815