## Loading Data

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import os
import markdown

data = dict()
# Load all the files present in the data folder
for root, dirs, files in os.walk('../../data'):
    for file in files:
        # with open(os.path.join(root, file), 'rb') as f:
        #     # print(f.read())
        #     text = markdown.markdown(f.read())
        #     data[file] = text
            # break
        with open(os.path.join(root, file), "r", encoding="utf-8") as file:
            data[file] = file.read()

In [5]:
import os

for root, dirs, files in os.walk('../../data'):
    for file in files:
        print(root,dirs,file)

../../data\business_docs [] Business Proposal.md
../../data\business_docs [] Marketing Plan.md
../../data\business_docs [] Progress Report.md
../../data\company_bylaws [] About Instagram.md
../../data\company_bylaws [] Board of Directors.md
../../data\company_bylaws [] Diversity, Equity, and Inclusion.md
../../data\company_bylaws [] Shareholders.md
../../data\financial_docs [] Balance Sheet.md
../../data\financial_docs [] Income Statement.md
../../data\financial_docs [] Tax Return.md
../../data\hr_docs_filled ['employee_contracts', 'employee_esops', 'employee_payslips'] Employee Handbook.md
../../data\hr_docs_filled\employee_contracts [] employee_contract_Ava Thomas.md
../../data\hr_docs_filled\employee_contracts [] employee_contract_David Lee.md
../../data\hr_docs_filled\employee_contracts [] employee_contract_Emily Brown.md
../../data\hr_docs_filled\employee_contracts [] employee_contract_Ethan Rodriguez.md
../../data\hr_docs_filled\employee_contracts [] employee_contract_Eva Kim.md


In [6]:
len(data)

77

## PKE Models

In [1]:
import pke
from pke.lang import stopwords
import string

In [7]:
def keyword(text, method='Yake'):
    if method=='Yake':
        extractor = pke.unsupervised.YAKE()
        stoplist = stopwords.get('english')
        extractor.load_document(input=text,
                                language='en',
                                stoplist=stoplist,
                                normalization=None)
        extractor.candidate_selection(n=3)
        window = 2
        use_stems = False
        extractor.candidate_weighting(window=window,
                                    use_stems=use_stems)
        threshold = 0.8
        keyphrases = extractor.get_n_best(n=10, threshold=threshold)
    elif method=='TextRank':
        pos = {'NOUN', 'PROPN', 'ADJ'}
        extractor = pke.unsupervised.TextRank()
        extractor.load_document(input=text,
                                language='en',
                                normalization=None)
        extractor.candidate_weighting(window=2,
                                    pos=pos,
                                    top_percent=0.33)
        keyphrases = extractor.get_n_best(n=10)
    elif method=='SingleRank':
        pos = {'NOUN', 'PROPN', 'ADJ'}
        extractor = pke.unsupervised.SingleRank()
        extractor.load_document(input=text,
                                language='en',
                                normalization=None)
        extractor.candidate_selection(pos=pos)
        extractor.candidate_weighting(window=10,
                                    pos=pos)
        keyphrases = extractor.get_n_best(n=10)
    elif method=='TopicRank':
        extractor = pke.unsupervised.TopicRank()
        stoplist = list(string.punctuation)
        stoplist += pke.lang.stopwords.get('en')
        extractor.load_document(input=text,
                                stoplist=stoplist)
        pos = {'NOUN', 'PROPN', 'ADJ'}
        extractor.candidate_selection(pos=pos)
        extractor.candidate_weighting(threshold=0.74, method='average')
        keyphrases = extractor.get_n_best(n=10)
    elif method=='PositionRank':
        pos = {'NOUN', 'PROPN', 'ADJ'}
        grammar = "NP: {<ADJ>*<NOUN|PROPN>+}"
        extractor = pke.unsupervised.PositionRank()
        extractor.load_document(input=text,
                                language='en',
                                normalization=None)
        extractor.candidate_selection(grammar=grammar,
                                    maximum_word_number=3)
        extractor.candidate_weighting(window=10,
                                    pos=pos)
        keyphrases = extractor.get_n_best(n=10)
    elif method=='MultipartiteRank':
        extractor = pke.unsupervised.MultipartiteRank()
        stoplist = list(string.punctuation)
        stoplist += pke.lang.stopwords.get('en')
        extractor.load_document(input=text,
                                stoplist=stoplist)
        pos = {'NOUN', 'PROPN', 'ADJ'}
        extractor.candidate_selection(pos=pos)
        extractor.candidate_weighting(alpha=1.1,
                                    threshold=0.74,
                                    method='average')
        keyphrases = extractor.get_n_best(n=10)
    else:
        pass
    return keyphrases

In [8]:
import pandas as pd
df = pd.DataFrame(data.items(), columns=['File', 'Text'])
df

Unnamed: 0,File,Text
0,Business Proposal.md,"<p>b""\r\n### Company Name: Instagram\r\n### Ca..."
1,Marketing Plan.md,<p>b'\r\n### Company Name: Instagram\r\n### Ca...
2,Progress Report.md,<p>b'# Instagram Progress Report\r\n\r\n| Proj...
3,About Instagram.md,"<p>b""# About Instagram\r\n\r\n<strong>Overview..."
4,Board of Directors.md,"<p>b""# Board of Directors\r\n\r\nThe Board of ..."
...,...,...
72,employee_payslip_William Anderson.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...
73,employee_payslip_Zoe Lee.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...
74,employee_payslip_template.md,<p>b'# Employee Payslip\r\n\r\n## Employee Inf...
75,employee_stock_ownership_plan_template.md,<p>b'# Employee Stock Ownership Plan (ESOP)\r\...


In [9]:
keyword_methods = ['Yake', 'TextRank', 'SingleRank', 'TopicRank', 'PositionRank', 'MultipartiteRank']
keywords = []
for method in keyword_methods:
    df[method] = df['Text'].apply(lambda x: keyword(x, method=method))
    keywords.append(df[method].values)



In [10]:
df

Unnamed: 0,File,Text,Yake,TextRank,SingleRank,TopicRank,PositionRank,MultipartiteRank
0,Business Proposal.md,"<p>b""\r\n### Company Name: Instagram\r\n### Ca...","[(social media marketing, 0.002417962165297132...","[(visual content, 0.05635809323251077), (# # #...",[(comprehensive social media marketing service...,"[(strong, 0.10865335318122646), (visual conten...","[(# company name, 0.07905466811226274), (socia...","[(strong, 0.08479113790373548), (brand presenc..."
1,Marketing Plan.md,<p>b'\r\n### Company Name: Instagram\r\n### Ca...,"[(social media marketing, 1.007278240849013e-0...",[(marketing objectives:</strong>\r\n- increase...,"[(current social media marketing trends, 0.088...","[(strong, 0.1533084555366908), (relevant influ...","[(# company name, 0.08739246617167683), (# doc...","[(strong, 0.1237602253731655), (relevant influ..."
2,Progress Report.md,<p>b'# Instagram Progress Report\r\n\r\n| Proj...,"[(direct messaging update, 8.708934970056568e-...","[(| user feedback, 0.19982836181623714), (| us...",[(video duration limit | content moderation po...,"[(developers, 0.22126828410600863), (designers...","[(progress |, 0.2822538647359515), (| integrat...","[(developers, 0.22126828410600868), (designers..."
3,About Instagram.md,"<p>b""# About Instagram\r\n\r\n<strong>Overview...","[(instagram, 0.02607871663367337), (strong, 0....","[(visual content, 0.03562114105346172), (socia...","[(advocacy campaigns\r\n\r\n < strong, 0.05634...","[(users, 0.05817594536900966), (instagram, 0.0...","[(instagram community today, 0.055395335208253...","[(users, 0.05473342842746924), (instagram, 0.0..."
4,Board of Directors.md,"<p>b""# Board of Directors\r\n\r\nThe Board of ...","[(instagram, 0.01280533000974738), (board, 0.0...","[(brand awareness.\r\n\r\n # # contributions, ...","[(# board composition\r\n\r\nthe board, 0.0950...","[(instagram, 0.0979706959405189), (board membe...","[(# board, 0.1028890006977222), (directors\r\n...","[(instagram, 0.08740494042510544), (board memb..."
...,...,...,...,...,...,...,...,...
72,employee_payslip_William Anderson.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[(employee payslip, 0.0022734633413415174), (c...","[(# # employee, 0.19218371052598202), (# # net...",[(customer support\r\n\r\n # # pay period:\r\n...,"[(strong, 0.21439941069804597), (september, 0....","[(b'\r\n # employee, 0.2145245199455731), (wil...","[(strong, 0.21620878655182965), (september, 0...."
73,employee_payslip_Zoe Lee.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[(employee payslip, 0.002274198026492155), (cu...","[(# # employee, 0.19452824309451516), (# # net...",[(customer success\r\n\r\n # # pay period:\r\n...,"[(strong, 0.2323191868586359), (pay period, 0....","[(b'\r\n # employee, 0.22242752466112778), (# ...","[(strong, 0.24799160485023408), (pay period, 0..."
74,employee_payslip_template.md,<p>b'# Employee Payslip\r\n\r\n## Employee Inf...,"[(employee payslip, 0.0022529423857244176), (s...","[(# # employee, 0.1728828564205594), (# # net,...","[(# employee information:\r\n- < strong, 0.266...","[(strong, 0.18167362159262998), (earnings, 0.1...","[(# employee payslip\r\n\r\n, 0.19604071167975...","[(strong, 0.18641698980184895), (earnings, 0.1..."
75,employee_stock_ownership_plan_template.md,<p>b'# Employee Stock Ownership Plan (ESOP)\r\...,"[(employee stock ownership, 0.0001152205051629...","[(# # employee information:\r\n- <, 0.23273271...","[(# employee information:\r\n- < strong, 0.204...","[(employee, 0.11620143584420564), (strong, 0.0...","[(# esop details:\r\n-, 0.11220397399007889), ...","[(employee, 0.11684321550977161), (strong, 0.0..."


In [43]:
# df['Yake'][0]
for i in range(len(df)):
    df['Yake'][i] = [x[0] for x in df['Yake'][i]]
    df['TextRank'][i] = [x[0] for x in df['TextRank'][i]]
    df['SingleRank'][i] = [x[0] for x in df['SingleRank'][i]]
    df['TopicRank'][i] = [x[0] for x in df['TopicRank'][i]]
    df['PositionRank'][i] = [x[0] for x in df['PositionRank'][i]]
    df['MultipartiteRank'][i] = [x[0] for x in df['MultipartiteRank'][i]]

In [44]:
df

Unnamed: 0,File,Text,Yake,TextRank,SingleRank,TopicRank,PositionRank,MultipartiteRank
0,Business Proposal.md,"<p>b""\r\n### Company Name: Instagram\r\n### Ca...","[social media marketing, media marketing servi...","[visual content, # # # company, influencer col...",[comprehensive social media marketing services...,"[strong, visual content, brand presence, influ...","[# company name, social media strategy, social...","[strong, brand presence, engagement, visual co..."
1,Marketing Plan.md,<p>b'\r\n### Company Name: Instagram\r\n### Ca...,"[social media marketing, media marketing trend...",[marketing objectives:</strong>\r\n- increase ...,"[current social media marketing trends, social...","[strong, relevant influencers, brand advocacy,...","[# company name, # document title, content mar...","[strong, relevant influencers, marketing plan,..."
2,Progress Report.md,<p>b'# Instagram Progress Report\r\n\r\n| Proj...,"[direct messaging update, explore algorithm en...","[| user feedback, | user adoption, | content m...",[video duration limit | content moderation pol...,"[developers, designers, hold, encryption, secu...","[progress |, | integration issues, moderation ...","[developers, designers, hold, encryption, secu..."
3,About Instagram.md,"<p>b""# About Instagram\r\n\r\n<strong>Overview...","[instagram, strong, users, platform, community...","[visual content, social media platform, user e...","[advocacy campaigns\r\n\r\n < strong, instagra...","[users, instagram, platform, features, strong,...","[instagram community today, social media platf...","[users, instagram, platform, features, strong,..."
4,Board of Directors.md,"<p>b""# Board of Directors\r\n\r\nThe Board of ...","[instagram, board, chief marketing officer, hi...","[brand awareness.\r\n\r\n # # contributions, c...","[# board composition\r\n\r\nthe board, popular...","[instagram, board members, directors, strong, ...","[# board, directors\r\n\r\nthe board, board me...","[instagram, board members, directors, company,..."
...,...,...,...,...,...,...,...,...
69,employee_payslip_Robert Martinez.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[employee payslip, robert martinez, strong, em...","[# # employee, # # net, # # pay, # employee, #...","[# employee information:\r\n- < strong, # pay ...","[strong, july, pay period, earnings, deduction...","[b'\r\n # employee, employee name:</strong, ro...","[strong, july, pay period, earnings, robert ma..."
70,employee_payslip_Sarah Wilson.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[employee payslip, sarah wilson, strong, emplo...","[human resources\r\n\r\n # # pay, # # employee...","[# employee information:\r\n- < strong, # pay ...","[strong, june, pay period, earnings, employee,...","[b'\r\n # employee, employee id:</strong, empl...","[strong, june, pay period, employee, earnings,..."
71,employee_payslip_Sophia Garcia.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[employee payslip, sophia garcia, strong, empl...","[# # employee, # # net, # # pay, # employee, #...","[# employee information:\r\n- < strong, # pay ...","[strong, november, pay period, earnings, deduc...","[b'\r\n # employee, sophia garcia\r\n- <, 5,45...","[strong, november, pay period, earnings, deduc..."
72,employee_payslip_William Anderson.md,<p>b'\r\n# Employee Payslip\r\n\r\n## Employee...,"[employee payslip, customer support, william a...","[# # employee, # # net, # # pay, # #, # employ...",[customer support\r\n\r\n # # pay period:\r\n-...,"[strong, september, pay period, earnings, dedu...","[b'\r\n # employee, william anderson\r\n- <, #...","[strong, september, pay period, employee, earn..."


In [45]:
df.drop('Text', axis=1, inplace=True)

In [46]:
df.to_csv('keywords.csv', index=False)

## Evaluation

In [51]:
import pandas as pd
df_true = pd.read_csv('keywords_truelabels.csv')
df_pred = pd.read_csv('keywords.csv')

In [52]:
df_true

Unnamed: 0,File,Gold Standard Keywords
0,Board of Directors.md,"['instagram', 'board members', 'strategic gu..."
1,"Diversity, Equity, and Inclusion.md","['diverse', 'equity', 'inclusive workplace', '..."
2,Balance Sheet.md,"['current assests', 'non-current assests', 'in..."
3,employee_esop_Eva Kim.md,"['employee stock ownership', 'content moderati..."
4,employee_payslip_Ethan Rodriguez.md,"['employee payslip', 'security officer', 'allo..."
5,Marketing Plan.md,"['social media marketing', 'advertising campai..."
6,Business Proposal.md,"['social media marketing', 'brand presence', '..."
7,Tax Return.md,"['tax rate', 'total income', 'employment incom..."
8,employee_contract_David Lee.md,"['employment contract', 'administration', 'res..."
9,Employee Handbook.md,"['employees', 'mission', 'discrimination', 'ha..."


In [53]:
print(df_true.dtypes)

File                      object
Gold Standard Keywords    object
dtype: object


In [54]:
import ast

# df['Your_Column'] = df['Your_Column'].apply(ast.literal_eval)
df_true['Gold Standard Keywords'] = df_true['Gold Standard Keywords'].apply(ast.literal_eval)
df_pred['Yake'] = df_pred['Yake'].apply(ast.literal_eval)
df_pred['TextRank'] = df_pred['TextRank'].apply(ast.literal_eval)
df_pred['SingleRank'] = df_pred['SingleRank'].apply(ast.literal_eval)
df_pred['TopicRank'] = df_pred['TopicRank'].apply(ast.literal_eval)
df_pred['PositionRank'] = df_pred['PositionRank'].apply(ast.literal_eval)
df_pred['MultipartiteRank'] = df_pred['MultipartiteRank'].apply(ast.literal_eval)


In [55]:
type(df_true['Gold Standard Keywords'][0])

list

In [56]:
type(df_pred['Yake'][0])

list

In [57]:
df_merged = df_true.merge(df_pred, how='left', on='File')

In [58]:
df_merged

Unnamed: 0,File,Gold Standard Keywords,Yake,TextRank,SingleRank,TopicRank,PositionRank,MultipartiteRank
0,Board of Directors.md,"[instagram, board members, strategic guidance,...","[instagram, board, chief marketing officer, hi...","[brand awareness.\r\n\r\n # # contributions, c...","[# board composition\r\n\r\nthe board, popular...","[instagram, board members, directors, strong, ...","[# board, directors\r\n\r\nthe board, board me...","[instagram, board members, directors, company,..."
1,"Diversity, Equity, and Inclusion.md","[diverse, equity, inclusive workplace, dei, br...","[diverse, strong, equity, dei, instagram, dive...","[diverse content, diverse candidate, diverse u...","[< strong, diverse content creators, diverse c...","[inclusive workplace, diverse, committed, equi...","[# diversity, diverse content creators, divers...","[inclusive workplace, diverse, equity, committ..."
2,Balance Sheet.md,"[current assests, non-current assests, intangi...","[non-current assets, current assets, total ass...","[|\r\n| non-current assets |, |\r\n| non-curre...","[|\r\n| non-current assets |, |\r\n| non-curre...","[december, strong, category, intangible assets...","[|\r\n| current assets, |\r\n| total assets, |...","[december, strong, category, intangible assets..."
3,employee_esop_Eva Kim.md,"[employee stock ownership, content moderation,...","[employee stock ownership, stock ownership pla...","[# # employee information:\r\n- <, # employee ...","[# employee information:\r\n- < strong, b'\r\n...","[employee, rights, strong, hereinafter, alloca...","[# esop details:\r\n-, employee name:</strong,...","[employee, strong, rights, hereinafter, alloca..."
4,employee_payslip_Ethan Rodriguez.md,"[employee payslip, security officer, allowance...","[employee payslip, ethan rodriguez, strong, em...","[# # employee, # # net, # # pay, # #, # employ...","[# employee information:\r\n- < strong, # pay ...","[strong, december, pay period, earnings, emplo...","[b'\r\n # employee, employee id:</strong, empl...","[strong, december, pay period, employee, earni..."
5,Marketing Plan.md,"[social media marketing, advertising campaigns...","[social media marketing, media marketing trend...",[marketing objectives:</strong>\r\n- increase ...,"[current social media marketing trends, social...","[strong, relevant influencers, brand advocacy,...","[# company name, # document title, content mar...","[strong, relevant influencers, marketing plan,..."
6,Business Proposal.md,"[social media marketing, brand presence, visua...","[social media marketing, media marketing servi...","[visual content, # # # company, influencer col...",[comprehensive social media marketing services...,"[strong, visual content, brand presence, influ...","[# company name, social media strategy, social...","[strong, brand presence, engagement, visual co..."
7,Tax Return.md,"[tax rate, total income, employment income, in...","[employment income, investment income, taxable...","[|\r\n| tax, |\r\n| total, |\r\n|, total incom...","[|\r\n| employment income, |\r\n| taxable inco...","[category, december, , , , , , , , ]","[|\r\n| tax owed, |\r\n| tax rate, |\r\n| empl...","[december, category, , , , , , , , ]"
8,employee_contract_David Lee.md,"[employment contract, administration, responsi...","[david lee, intellectual property, hereinafter...","[# # employee information:\r\n- <, # #, # empl...","[# employee information:\r\n- < strong, compan...","[strong, employee, employer, hereinafter, empl...","[# terms, employee, employment contract, compa...","[strong, employee, employer, hereinafter, empl..."
9,Employee Handbook.md,"[employees, mission, discrimination, harassmen...","[employee, table, company, employee handbook, ...","[contact information\r\n\r\n # #, # # company,...","[# employee handbook\r\n\r\n, # company name, ...","[employees, time, company policies, benefits, ...","[# employee handbook\r\n\r\n, # company name, ...","[employees, time, company policies, mission, c..."


In [59]:
keyword_methods = ['Yake', 'TextRank', 'SingleRank', 'TopicRank', 'PositionRank', 'MultipartiteRank']

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

def evaluate_keywords(gold_standard, model_predictions):
    gold_binary = [1 if keyword in gold_standard else 0 for keyword in model_predictions]
    model_binary = [1 if keyword in model_predictions else 0 for keyword in gold_standard]
    cm = confusion_matrix(gold_binary, model_binary)
    # Calculate precision, recall, and F1-score
    precision = precision_score(gold_binary, model_binary)
    recall = recall_score(gold_binary, model_binary)
    f1 = f1_score(gold_binary, model_binary)

    return precision, recall, f1,cm

In [69]:
results = {}

for method in keyword_methods:
    init_results = []
    for i in range(len(df_merged)):
        precision, recall, f1, cm = evaluate_keywords(df_merged['Gold Standard Keywords'][i], df_merged[method][i])
        init_results.append({'Precision': precision, 'Recall': recall, 'F1-score': f1})
    results[method] = init_results

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

In [70]:
results

{'Yake': [{'Precision': 0.3333333333333333,
   'Recall': 0.3333333333333333,
   'F1-score': 0.3333333333333333},
  {'Precision': 0.6666666666666666,
   'Recall': 0.6666666666666666,
   'F1-score': 0.6666666666666666},
  {'Precision': 0.6, 'Recall': 0.6, 'F1-score': 0.6},
  {'Precision': 0.75, 'Recall': 0.75, 'F1-score': 0.75},
  {'Precision': 0.75, 'Recall': 0.75, 'F1-score': 0.75},
  {'Precision': 0.6666666666666666,
   'Recall': 0.6666666666666666,
   'F1-score': 0.6666666666666666},
  {'Precision': 0.5, 'Recall': 0.5, 'F1-score': 0.5},
  {'Precision': 0.8888888888888888,
   'Recall': 0.8888888888888888,
   'F1-score': 0.8888888888888888},
  {'Precision': 0.5, 'Recall': 0.5, 'F1-score': 0.5},
  {'Precision': 0.6666666666666666,
   'Recall': 0.8,
   'F1-score': 0.7272727272727273}],
 'TextRank': [{'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0},
  {'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0},
  {'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0},
  {'Precision': 0.0, 'Recall': 

In [71]:
average_results = {}

for method in keyword_methods:
    precision_list = [result['Precision'] for result in results[method]]
    recall_list = [result['Recall'] for result in results[method]]
    f1_list = [result['F1-score'] for result in results[method]]

    average_precision = sum(precision_list) / len(precision_list)
    average_recall = sum(recall_list) / len(recall_list)
    average_f1 = sum(f1_list) / len(f1_list)

    average_results[method] = {'Average Precision': average_precision, 'Average Recall': average_recall, 'Average F1-score': average_f1}

average_results

{'Yake': {'Average Precision': 0.6322222222222222,
  'Average Recall': 0.6455555555555554,
  'Average F1-score': 0.6382828282828282},
 'TextRank': {'Average Precision': 0.0,
  'Average Recall': 0.0,
  'Average F1-score': 0.0},
 'SingleRank': {'Average Precision': 0.05,
  'Average Recall': 0.05,
  'Average F1-score': 0.05},
 'TopicRank': {'Average Precision': 0.46797619047619043,
  'Average Recall': 0.4875000000000001,
  'Average F1-score': 0.47681623931623934},
 'PositionRank': {'Average Precision': 0.06666666666666667,
  'Average Recall': 0.06666666666666667,
  'Average F1-score': 0.06666666666666667},
 'MultipartiteRank': {'Average Precision': 0.3373809523809524,
  'Average Recall': 0.3576190476190476,
  'Average F1-score': 0.3464468864468865}}

In [72]:
df = pd.DataFrame(average_results)

In [73]:
df

Unnamed: 0,Yake,TextRank,SingleRank,TopicRank,PositionRank,MultipartiteRank
Average Precision,0.632222,0.0,0.05,0.467976,0.066667,0.337381
Average Recall,0.645556,0.0,0.05,0.4875,0.066667,0.357619
Average F1-score,0.638283,0.0,0.05,0.476816,0.066667,0.346447


In [74]:
df.to_csv('keyword_method_results.csv', index=False)

## Overall Yake algorithm gives the best results over keywords, followed by TopicRank and Multipartite Rank