In [46]:
import pandas as pd

In [47]:
# load the data
df = pd.read_excel('../data/nlp4re-tools-data.xlsx', sheet_name='Potential Articles', skiprows=2)

# filter for articles that have been included
dfinc = df[df['Verdict']=='included']

In [48]:
# determine the used licenses
license_codes = pd.read_excel('../data/nlp4re-tools-data.xlsx', sheet_name='Codes License', usecols=['Code', 'License Link'], skipfooter=1).set_index('Code')
licenses: dict = license_codes.to_dict()['License Link']

# remove irrelevant entries
del licenses['Unknown']
del licenses['None']

In [49]:
header = f"# NLP4RE Tools\n\nThis list contains the {len(dfinc)} NLP4RE (natural langauge processing for requirements engineering) tools extracted from the 12 most prominent venues according to Zhao et al. [1] from 2019-2023.\n\n[1] Zhao, L., Alhoshan, W., Ferrari, A., Letsholo, K. J., Ajagbe, M. A., Chioasca, E. V., & Batista-Navarro, R. T. (2021). Natural language processing for requirements engineering: A systematic mapping study. ACM Computing Surveys (CSUR), 54(3), 1-41."

In [50]:
tools = "## Tools\n\nIn the following list, the R-ID column refers to the [reference](#references) in which the respective tool has been presented.\n\n| R-ID | Name | License | Description | RE Activity | Task |\n|---|---|---|---|---|---|"

for index, row in dfinc.iterrows():
    # add the reference to the containing article
    tool = f"\n| {row['ID']} | "

    # add the tool's name (and optionally a link to the tool's source, if available)
    if row['Availability'] in ['Archived', 'Open Source', 'Reachable']:
        tool += f"[{row['Name']}]({row['Source']}) |"
        # in case the tool's license is specified, name and link to the license
        if row['License'] in licenses:
            tool += f"[{row['License']}]({licenses[row['License']]}) |"
        else:
            tool += " |"
    else:
        tool += f"{row['Name']} | | "
    
    # add the tool's description, activity, and task type attribute
    tool += f"{row['Description']} | {row['RE Activity']} | {row['Tool Task']} |"
    tools += tool

In [54]:
references = "## References\n\nThe tools were extracted from the following publications.\n\n| R-ID | Citation | Venue | Year|\n|---|---|---|---|"

# add the ID, citation string, venue and year of each reference
for index, row in dfinc.iterrows():
    references += f"\n| {row['ID']} | {row['Reference']} | {row['Venue']} | {row['Year']} |"

In [56]:
# assemble the three paragraphs
paragraphs = [header, tools, references]

# output the paragraphs to the markdown file
with open('./../../tools/nlp4re-tools.md', 'w', encoding='utf-8') as f:
    f.write("\n\n".join(paragraphs))