# WorkArena Knowledge Base
Author: Alexandre Drouin (alexandre.drouin@servicenow.com)


This notebook contains code to generate:
* Knowledge base articles given a list of facts
* Questions to query those articles for given facts
* Multiple alternative wordings of the expected answers

In [None]:
!pip install openai

In [None]:
import json
import openai
import os


client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])


def chat(messages, model="gpt-4-1106-preview"):
    return (
        client.chat.completions.create(model=model, messages=messages)
        .choices[0]
        .message
    )

In [None]:
def generate_article(item, value, all_facts, n_retries=5):
    prompt = f"""
<task>
You are in charge of writing knowledge-base articles to document important company and workplace information.
Your articles will be used as reference by the employees of the company.
</task>

Here is a list of facts in the knowledge base:
<all_facts>
{all_facts}
</all_facts>

You need to write an article about this specific fact:
<article_fact>
The {item} is {value}.
</article_fact>

<instructions>
* Generate a knowledge-base article that contains the <article_fact> as-is. Do not modify or add to its text.
* Hide the fact inside a bunch of other related information, but do not source the related information from <all_facts>. Make stuff up.
* Make sure that nothing you write contradicts <all_facts>
* You must use HTML format. Generate only the <body> tag.
* Don't include information about the knowledge base itself or headers like \"welcome to our knowledge base\".
</instructions>

"""
    messages = [
        {
            "role": "system",
            "content": "You are a ServiceNow system administrator in charge of writing knowledge base articles to help employees in your organization.",
        },
        {"role": "user", "content": prompt},
    ]

    print("... attempting to produce article")
    for retry in range(n_retries):
        try:
            print(f"... try {retry}")
            messages.append({"role": "assistant", "content": chat(messages).content})
            article = (
                messages[-1]["content"]
                .replace("<body>", "")
                .replace("</body>", "")
                .replace("```HTML", "")
                .replace("```", "")
                .replace("<strong>", "")
                .replace("</strong>", "")
                .strip()
            )

            # Validate that the fact is included without modification
            assert (
                f"the {item} is {value}".lower() in article.lower()
            ), f'Error: Could not find the string "The {item} is {value}" in this article.'
            print("... valid article found.")
            break
        except AssertionError as e:
            messages.append(
                {"role": "user", "content": str(e) + "\nDon't apologize and try again."}
            )

    if messages[-1]["role"] == "user":
        raise RuntimeError(
            f"Failed to produce a valid article after {n_retries} retries."
        )

    return article

In [None]:
facts = [
    {"item": "password to conference room A-561", "value": "roo918k"},
    {"item": "address of office #456", "value": "42, Pizza street, New York, USA"},
    {"item": "number of employees in department X", "value": "75"},
    {"item": "CEO's name", "value": "Alex Johnson"},
    {"item": "year of company establishment", "value": "1998"},
    {"item": "Wi-Fi network name in office #456", "value": "Office456_WiFi"},
    {"item": "Wi-Fi password for office #456", "value": "456SecureNet!"},
    {"item": "average annual revenue", "value": "$50 million"},
    {"item": "number of branches worldwide", "value": "18"},
    {"item": "name of the head of HR department", "value": "Samantha Green"},
    {"item": "brand of coffee machine in kitchen #3", "value": "Delonghi Magnifica"},
    {"item": "company's stock ticker symbol", "value": "COMPX"},
    {"item": "company's main product", "value": "Advanced Analytics Software"},
    {"item": "color of the carpet in conference room A-561", "value": "Navy Blue"},
    {"item": "annual budget for marketing department", "value": "$2 million"},
    {"item": "capacity of conference room B-762", "value": "30 people"},
    {"item": "number of floors in the main office building", "value": "10"},
    {"item": "type of plants in the lobby of office #456", "value": "Ficus Lyrata"},
    {
        "item": "catering service provider for office events",
        "value": "Gourmet Caterers",
    },
    {"item": "software used for payroll management", "value": "QuickBooks Payroll"},
    {"item": "number of parking spaces in office #456 parking lot", "value": "150"},
    {"item": "company's first product", "value": "Data Analysis Toolkit"},
    {"item": "brand of computers used in IT department", "value": "Lenovo ThinkPad"},
    {"item": "annual CSR budget", "value": "$500,000"},
    {"item": "make of company's official car", "value": "Tesla Model X"},
    {"item": "name of the company's auditor", "value": "PriceWaterhouseCoopers"},
    {"item": "number of customer service representatives", "value": "40"},
    {
        "item": "type of air conditioning system in office #456",
        "value": "Centralized HVAC",
    },
    {"item": "name of the cafeteria manager in office #456", "value": "Miguel Torres"},
    {"item": "duration of lunch break in office #456", "value": "1 hour"},
    {"item": "official language for company communication", "value": "English"},
    {"item": "number of conference rooms in office #456", "value": "8"},
    {"item": "average time for IT support response", "value": "15 minutes"},
    {"item": "brand of printers in office #456", "value": "HP LaserJet Pro"},
    {"item": "capacity of the largest meeting room", "value": "50 people"},
    {"item": "number of patents held by the company", "value": "35"},
    {"item": "company's main competitor", "value": "TechRivals Inc."},
    {"item": "company's slogan", "value": "Innovating the Future"},
    {"item": "number of countries the company operates in", "value": "12"},
    {
        "item": "type of security system at office #456",
        "value": "Biometric Access Control",
    },
    {"item": "name of the employee of the month", "value": "Elena Rodriguez"},
    {"item": "brand of the photocopier in office #456", "value": "Canon ImageRunner"},
    {"item": "type of coffee provided in the kitchen", "value": "Arabica Beans"},
    {"item": "annual IT budget", "value": "$1.5 million"},
    {"item": "name of the legal counsel firm", "value": "Baker & McKenzie"},
    {"item": "average employee satisfaction score", "value": "8.5/10"},
    {"item": "number of departments in the company", "value": "12"},
    {"item": "company's largest client", "value": "GlobalTech Industries"},
    {
        "item": "type of gym equipment in fitness center",
        "value": "Life Fitness machines",
    },
    {"item": "average number of yearly hires", "value": "100"},
    {"item": "brand of air purifiers used in the office", "value": "Dyson Pure Cool"},
    {
        "item": "name of the employee health insurance provider",
        "value": "BlueCross BlueShield",
    },
    {"item": "number of projects completed last year", "value": "22"},
    {"item": "company's main export market", "value": "European Union"},
    {
        "item": "name of the office cleaning service provider",
        "value": "CleanSweep Inc.",
    },
    {"item": "type of video conferencing software used", "value": "Zoom"},
    {"item": "color of the company logo", "value": "Royal Blue and Silver"},
    {"item": "average client retention rate", "value": "85%"},
    {"item": "name of the company's largest shareholder", "value": "David Thompson"},
    {"item": "total square footage of office #456", "value": "25,000 square feet"},
    {
        "item": "type of backup power system in office #456",
        "value": "Diesel Generators",
    },
    {"item": "average yearly expenditure on office supplies", "value": "$80,000"},
    {"item": "name of the cafeteria food supplier", "value": "FreshFoods Ltd."},
    {
        "item": "type of fire safety system in office #456",
        "value": "Automatic Sprinkler System",
    },
    {"item": "company's primary industry", "value": "Technology"},
    {"item": "number of annual company-wide meetings", "value": "4"},
    {"item": "brand of smartphones provided to employees", "value": "Samsung Galaxy"},
    {"item": "name of the software used for project management", "value": "Trello"},
    {"item": "average number of business trips per employee annually", "value": "3"},
    {"item": "number of IT support staff", "value": "15"},
    {
        "item": "company's primary social media platform for marketing",
        "value": "LinkedIn",
    },
    {"item": "number of active contracts", "value": "40"},
    {"item": "annual expenditure on R&D", "value": "$3 million"},
    {
        "item": "type of chairs used in conference room A-561",
        "value": "Ergonomic Office Chairs",
    },
    {
        "item": "name of the most used software by the design team",
        "value": "Adobe Creative Suite",
    },
    {"item": "average delivery time for company products", "value": "5 business days"},
    {"item": "type of lighting in office #456", "value": "LED Lights"},
    {"item": "average monthly electricity bill for office #456", "value": "$10,000"},
    {"item": "company's largest expense category", "value": "Employee Salaries"},
    {"item": "brand of the refrigerator in the kitchen", "value": "LG InstaView"},
    {
        "item": "type of health and safety training provided",
        "value": "First Aid and Fire Safety",
    },
    {
        "item": "name of the corporate social responsibility program",
        "value": "TechForGood",
    },
    {"item": "annual water consumption in office #456", "value": "50,000 gallons"},
    {"item": "company's policy on remote work", "value": "Hybrid Model"},
    {"item": "number of customer complaints last year", "value": "120"},
    {"item": "name of the most popular product", "value": "Smart Analytics Pro"},
    {
        "item": "type of snacks available in the kitchen",
        "value": "Healthy and Organic Snacks",
    },
    {"item": "average duration of employee tenure", "value": "5 years"},
    {
        "item": "brand of the security cameras in office #456",
        "value": "Axis Communications",
    },
    {"item": "type of heating system in office #456", "value": "Forced Air Heating"},
    {
        "item": "name of the internet service provider for office #456",
        "value": "Comcast Xfinity",
    },
    {
        "item": "company's policy on environmental sustainability",
        "value": "Zero-Waste Initiatives",
    },
    {
        "item": "name of the annual team-building retreat location",
        "value": "Lakeview Resort",
    },
    {
        "item": "type of vending machines in office #456",
        "value": "Cashless Payment Vending",
    },
    {"item": "number of team leaders in the company", "value": "35"},
    {"item": "annual spending on employee training programs", "value": "$250,000"},
    {"item": "CEO's direct phone line", "value": "+1-555-1010-2020"},
    {"item": "Wi-Fi network name in the lobby", "value": "CorporateGuest123"},
    {"item": "last fire drill date at main building", "value": "2023-06-15"},
    {"item": "number of employees in marketing department", "value": "35"},
]

print("Number of facts:", len(facts))

for i, f in enumerate(facts):
    f["article"] = generate_article(f["item"], f["value"], all_facts=facts)
    print("... Article", i + 1, f["item"], f["value"])
    print(f["article"])
    print("\n" * 2)

In [None]:
for f in facts:
    f["article"] = (
        f["article"]
        .replace("```HTML", "")
        .replace("```", "")
        .replace("<strong>", "")
        .replace("</strong>", "")
        .strip()
    )

json.dump(facts, open("knowledge_base.json", "w"))

# Generate questions and answers

In [None]:
import concurrent.futures
import re

from collections import Counter
from tqdm.notebook import tqdm


# Reload the generated KB
kb = json.load(open("knowledge_base.json", "r"))
print("Loaded", len(kb), "articles")


# Clear all questions
for kb_i in kb:
    if "questions" in kb_i:
        kb_i["questions"] = []
        kb_i["alternative_answers"] = []

## Generate several variants of the question

In [None]:
def get_most_frequent_items(my_list):
    # Count the frequency of each item in the list
    frequency = Counter(my_list)

    # Find the maximum frequency
    max_frequency = max(frequency.values())

    # Get all items with the maximum frequency
    most_frequent = [
        item for item, count in frequency.items() if count == max_frequency
    ]

    return most_frequent


def generate_question(article, item, value, n_questions=10, n_retries=5):
    prompt = f"""
    Here is an article taken from a company knowledge base.

    <article>
    {article}
    </article>

    This article contains the following fact:
    <fact>
    The {item} is {value}.
    </fact>

    Here is a question about this fact to which the answer is \"{value}\":
    <question>
    What is the {item}?
    </question>

    Produce {n_questions} rephrasings of this question.

    <instructions>
    * Make sure that your questions are precise and unambiguous.
    * It must be clear that the questions are asking about \"{item}\" and
      their answer must still be exactly \"{value}\".
    * Make sure that you provide clear and specific instructions on the expected
      format for the answer (e.g., Day, Month, Year).
    * You cannot, in any circumstances, reveal information from the answer in the question or instructions.
    * Make sure they are questions that end with a question mark.
    * Make sure that your questions do not mention the article (e.g., \"in the article\").
    * Answer with one per line and do not number them.
    </instructions>

    <example>
    Suppose the question is \"On which day was the company founded?\" and the answer is \"January 26, 2024\",
    then a good rephrasing would be \"When was the company founded? Answer with Month Day, Year\".
    </example>

    <example>
    Suppose the question is \"What kind of fridge do we have in the cafeteria?\" and the answer is \"LG X456\",
    then a good rephrasing would be \"Which type of fridge is in the cafeteria? Answer with Brand followed by Model name\".
    </example>

    <example>
    Suppose the question is \"Where is the company headquarter located?\" and the answer is \"123, Banana Street, Montreal, Canada\",
    then a good rephrasing would be \"What's the address of the company headquarter? Answer with Number, Street, City, Country\".
    </example>

    <example>
    Suppose the question is \"What were the total sales of the company in 2023?\" and the answer is \"150B$\",
    then a good rephrasing would be \"What do the company sales for 2023 sum up to? Answer with NumberB$, where B is billions\".
    </example>
    """
    good_questions = set([])
    messages = [{"role": "user", "content": prompt}]
    while len(good_questions) < n_questions:
        try:
            # Generate questions
            questions = chat(messages).content

            # Parse output
            # ... check the article is not mentioned
            assert (
                "article" not in questions.lower()
            ), "You are not allowed to mention the article in your rephrasing of the questions."
            # ... check the number of actual questions included
            assert (
                questions.count("?") == n_questions
            ), f"I couldn't find {n_questions} question marks in the output. Make sure all questions end with ?"
            # ... heuristic to detect numbered questions
            assert not all(
                f"{x}." in questions for x in range(4)
            ), "The questions appear to be numbered, but they should not."
            questions = [q.strip() for q in questions.split("\n") if q != ""]
            # ... check one question per line
            assert (
                len(questions) == n_questions
            ), f"Your answer is not {n_questions} lines long. Make sure it contains {n_questions} questions and one per line."
            # ... check each question has instructions
            assert all(
                len(q.split("?")) > 1 and len("".join(q.split("?")[1:]).strip()) > 5
                for q in questions
            ), f'Make sure you provide clear formatting instructions for each question (e.g., "Question? Answer with").'
            # ... check that answer is not mentioned
            for q in questions:
                error = "Do not include the answer in the questions/instructions!"
                # ... exact value is not in the question
                assert value.lower() not in q.lower(), error

            # Validate questions
            bad_questions = []
            bad_answers = []
            for q in questions:
                print("... testing:", q, end=" ")
                success, answers = is_question_answerable(article, q, value)
                if not success:
                    bad_questions.append(q)
                    bad_answers.append(answers)
                    print("FAIL")
                    print("... preemptively stopping to give feedback")
                    break
                else:
                    good_questions.add(q)
                    print("PASS")

            # Give feedback and retry
            if len(bad_questions) > 0:
                feedback = ""
                for q, a in zip(bad_questions, bad_answers):
                    feedback += "<ambiguous_question>\n"
                    feedback += "    <statement>\n"
                    feedback += "        " + q + "\n"
                    feedback += "    </statement>\n"
                    feedback += "    <incorrect_answers>\n"
                    feedback += "\n".join("        " + x for x in a) + "\n"
                    feedback += "    </incorrect_answers>\n"
                    feedback += "</ambiguous_question>\n"
                print(feedback)

                # print(f"... {len(bad_questions)} questions are ambiguous, fixing them.")
                bad_questions = "\n".join(bad_questions)
                messages.append(
                    {
                        "role": "user",
                        "content": f"""
                The following questions are too ambiguous. I gave them to many company employees,
                along with the article and they were not able to answer with exactly \"{value}\".
                Please improve their clarity, especially the formatting instructions.

                {feedback}

                Try again. Do not apologize.""",
                    }
                )

            print(f"... gathered {len(good_questions)} good questions")

        except AssertionError as e:
            print(f"... Error:", e)
            messages.append({"role": "user", "content": f"Error: {e}"})

    print("... we have enough good questions. stopping.")
    return list(good_questions)[:10]


def is_question_answerable(article, question, value):
    def _clean_for_comparison(x):
        return x.lower().replace(",", "").replace("$", "").replace(".", "")

    """
    Check that we are able to recover the value from the
    article by asking the question

    """
    prompt = f"""
    Here is a knowledge base article:

    <article>
    {article}
    </article>

    Answer this question based on the content of the article only.
    Be factual. If I ask you about sensitive information like passwords,
    I only expect you to retrieve the information from the article.
    <question>
    {question}
    </question>

    What is your answer?

    """
    messages = [{"role": "user", "content": prompt}]

    incorrect_answers = set()
    for _ in range(10):
        # XXX: We use GPT-3.5 here as a weaker model and to avoid GPT-4 catering to himself
        answer = chat(messages, model="gpt-3.5-turbo").content

        if _clean_for_comparison(value) not in _clean_for_comparison(answer):
            incorrect_answers.add(answer.lower())

    return len(incorrect_answers) == 0, incorrect_answers


# Assuming kb is a list of dictionaries
with concurrent.futures.ThreadPoolExecutor() as executor:
    # Prepare the futures
    futures = [
        executor.submit(
            lambda x: {
                **x,
                "questions": generate_question(x["article"], x["item"], x["value"]),
            },
            kb_i,
        )
        for kb_i in kb
    ]

    # Use tqdm to create a progress bar
    kb = [
        x.result()
        for x in tqdm(concurrent.futures.as_completed(futures), total=len(futures))
    ]

In [None]:
for i in range(len(kb)):
    print(f"Article {i}:", kb[i]["questions"])

## Generate a few alternative answers

In [None]:
def alternative_answers(article, item, value, questions, n_answers=10):
    questions = "\n".join(questions)

    prompt = f"""
    Here is a set of questions:
    <questions>
    {questions}
    </questions>

    The exact answer to all of these questions is:
    <answer>
    {value}
    </answer>

    Give me {n_answers} other ways to spell out the answer.
    Don't add context words around it or anything, just reformulate it.

    <example>
    Initial value: 5.5/10
    Reformulated: 5.5 out of 10
    Reformulated: 55%
    Reformulated: fifty-five percent
    </example>

    <example>
    Initial value: Tesla Model X
    Reformulated: Model X by Tesla
    Reformulated: Tesla's Model X
    </example>

    <example>
    Initial value: 150$
    Reformulated: one hundred fifty dollars
    Reformulated: 150.00$
    Reformulated: $150
    </example>

    Answer with one per line. Don't number them.

    """
    messages = [{"role": "user", "content": prompt}]

    while True:
        try:
            answers = chat(messages).content

            # Validation
            # ... heuristic to detect numbered questions
            assert not all(
                f"{x}." in answers for x in range(4)
            ), "The answers appear to be numbered, but they should not."
            answers = [a.strip() for a in answers.split("\n") if a != ""]
            # ... check one question per line
            assert (
                len(answers) == n_answers
            ), f"Your response is not {answers} lines long. Make sure it contains {answers} answers and one per line."
            assert (
                len(set(answers)) == n_answers
            ), f"You provided duplicate values. There were only {len(set(answers))} unique values."
            break
        except AssertionError as e:
            print(f"... Error:", e)
            messages.append({"role": "user", "content": f"Error: {e}"})

    return answers


for kb_i in tqdm(kb, total=len(kb)):
    kb_i["alternative_answers"] = alternative_answers(
        kb_i["article"], kb_i["item"], kb_i["value"], kb_i["questions"]
    )

In [None]:
for kb_i in kb:
    print(kb_i["alternative_answers"])

## Save it

In [None]:
json.dump(kb, open("knowledge_base.json", "w"))