# User-Story Driven Requirements with GPT-4o
This notebook explores the use of utilizing GPT 4o on a domain-specific set of requirements. This model will be used downstream to manage requirements changes provided by user stories.

### Setting Up

In [44]:
# Constants
MODEL_NAME = "gpt-4o"
IEEE_830 = "inputs/IEEE 830-1998.pdf"
BRIDGE_INSPECTION = "inputs/a-state-of-the-art-review-of-bridge-inspection-planning-current-situation-and-future-needs.pdf"
BRIDGE_CONDITION = "inputs/BridgeConditionEvaluation.pdf"
BRIDGE_CONFERENCE = "inputs/bridge-management-conference.pdf"
EARS_SYNTAX = "inputs/EARS_syntax.md"
EDIT_MD_PATH = "inputs/reformatted_srs.md"
USER_STORIES_PATH = "inputs/user_story_set.json"
RAG_DOCUMENTS = [EDIT_MD_PATH, IEEE_830, EARS_SYNTAX, BRIDGE_INSPECTION, BRIDGE_CONDITION, BRIDGE_CONFERENCE]

In [45]:
# Read the instructions from the prompt file
SYSTEM_INSTRUCTIONS = open("prompts/system_instructions.txt", "r").read()
VERIFIABILITY_INSTRUCTIONS = open("prompts/verifiable_instructions.txt", "r").read()
UNAMBIGUOUS_INSTRUCTIONS = open("prompts/unambiguous_instructions.txt", "r").read()
SYNTAX_INSTRUCTIONS = open("prompts/syntax_instructions.txt", "r").read()
CONSISTENCY_INSTRUCTIONS = open("prompts/consistency_instructions.txt", "r").read()
COMPARISON_INSTRUCTIONS = open("prompts/comparison_instructions.txt", "r").read()

In [46]:
# Get the OpenAI api key
import json

config_data = json.load(open("config.json"))
openai_api_key = config_data["OPENAI_API_KEY"]

In [47]:
# Create the OpenAI Client
from openai import OpenAI
client = OpenAI(api_key=openai_api_key)

In [48]:
# Get the user story and save location (update for each run to chaneg the loaded user story)
import json

CURRENT_STORY = 3
USER_STORIES = json.load(open(USER_STORIES_PATH))
USER_STORY = USER_STORIES["user-stories"][CURRENT_STORY]["user-story"]
LOG_FILE = USER_STORIES["user-stories"][CURRENT_STORY]["log-file-path"]
OUTPUT_FILE = USER_STORIES["user-stories"][CURRENT_STORY]["output-file-path"]
ITERATION_FILE = USER_STORIES["user-stories"][CURRENT_STORY]["iteration-file-path"]

# Some feedback
print("Running Story2Req for:\n",USER_STORY)


Running Story2Req for:
 As an advanced user, I want to export bridge analysis results to a PowerPoint presentation, so that I can present risk analysis and recommendations to management personnel in a digestible format.


# Creating Custom Virtual Assistants

Create a new assistant with access to the existing system requirements

#### Virtual Assistant Agents for Story2Req
- ```StoryInterpreter```: converts user stories into a use case and a set of comprehensive functional requirements and nonfunctional requirements.
- ```BetterReqSelector```: Compares two sets of requirements for the same user story and chooses the better one
- ```AmbiguityRemover```: Rewrites a given requirement statement into an unambiguous requirement
- ```VerifyEnhancer```: Rewrites a given requirement statement to be verifiable
- ```SyntaxFormatter```: Rewrites a given requirement statement to be formatted into the EARS requirement syntax
- ```ConsistencyEnforcer```: Rewrites a given set of requirements to use consistent terminology and sentence flow

In [49]:
# StoryInterpreter
def create_story_assistant():
    assistant = client.beta.assistants.create(
        name="User-Story Driven Requirements Generator",
        instructions=SYSTEM_INSTRUCTIONS,
        model=MODEL_NAME,
        tools=[{"type": "file_search"}]
    )
    return assistant

In [50]:
# BetterReqSelector
def get_better_requirement_set(requirement_1: str, requirement_2: str, user_story: str):
    prompt = COMPARISON_INSTRUCTIONS.format(
        requirement_1=requirement_1,
        requirement_2=requirement_2,
        user_story=user_story
    )
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=200,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )

    if (response.choices[0].message.content) == "1":
        return requirement_1
    else:
        return requirement_2

In [51]:
# AmbiguityRemover
def get_unambiguous_requirement(requirement: str, user_story: str):
    prompt = UNAMBIGUOUS_INSTRUCTIONS.format(
        requirement=requirement,
        user_story=user_story
    )
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=200,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response.choices[0].message.content

In [52]:
# VerifyEnhancer
def get_verifiable_requirement(requirement: str, user_story: str, requirement_type: str):
    prompt = VERIFIABILITY_INSTRUCTIONS.format(
        requirement=requirement,
        user_story=user_story,
        requirement_type=requirement_type
    )
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=200,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response.choices[0].message.content

In [53]:
# SyntaxFormatter
def get_formatted_requirements(requirement: str):
    prompt = SYNTAX_INSTRUCTIONS.format(
        requirement=requirement
    )
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=200,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response.choices[0].message.content

In [54]:
# ConsistencyEnforcer
def get_consistent_requirements(requirement_artifacts: str, user_story: str):
    prompt = CONSISTENCY_INSTRUCTIONS.format(
        requirement_artifacts=requirement_artifacts,
        user_story=user_story
    )
    response = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "user", "content": prompt}],
        response_format={"type": "json_object"},
        temperature=0,
        max_tokens=1500,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response.choices[0].message.content


In [55]:
# Function to summarize differencees between strings (for notes & evaluation)
import diff_match_patch as dmp_module

def difference(string1, string2):
    dmp = dmp_module.diff_match_patch()
    diff = dmp.diff_main(string1, string2)
    dmp.diff_cleanupSemantic(diff)
    temp = []
    for item in diff:
        if item[0] != 0:
            temp.append(item)

    if len(temp) == 0:
        output = "No changes detected"
    else:
        output = "\n".join(str(element) for element in temp)
    
    return output

#### Supporting Functions for Virtual Assistants

In [56]:
# Format the set of requiremnets into a digestible string of text for the evaluator
def get_requirement_set_string(req_json):
    requirement_set = "General Use Case:"
    for uc in req_json["use-cases"]:
        requirement_set += f"\n{uc['description']}"
        requirement_set += f"\n{uc['success-end-condition']}"

    ifr = 1
    requirement_set += f"\n\nFunctional Requirements:"
    for fr in req_json["functional-requirements"]:
        requirement_set += f"\n{ifr}. {fr['description']}"
        ifr += 1

    infr = 1
    requirement_set += f"\n\nNon-Functional Requirements:"
    for nfr in req_json["non-functional-requirements"]:
        requirement_set += f"\n{infr}. {nfr['description']}"
        infr += 1
    
    return requirement_set

Upload the file and add them to a Vector Store. Then, update the assistant with the new vector store.

In [57]:
# Inject the StoryInterpreter assistant with domain knowledge and necessary context information
def knowledge_injection(vector_name, file_paths_list, assistant):
    # Create a vector store
    vector_store = client.beta.vector_stores.create(name=vector_name)
    
    # Ready the files for upload to OpenAI
    file_paths = file_paths_list
    file_streams = [open(path, "rb") for path in file_paths]
    
    # Use the upload and poll SDK helper to upload the files, add them to the vector store,
    # and poll the status of the file batch for completion.
    file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id, files=file_streams
    )
    
    # You can print the status and the file counts of the batch to see the result of this operation.
    print(file_batch.status)
    print(file_batch.file_counts)
    domain_assistant = client.beta.assistants.update(
        assistant_id=assistant.id,
        tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
    )
    return domain_assistant

Create a thread

In [58]:

# Create a thread and attach the file to the message
def create_thread(user_story):
    # Upload the user provided file to OpenAI
    message_file = client.files.create(
    file=open(EDIT_MD_PATH, "rb"), purpose="assistants"
    )
    
    thread = client.beta.threads.create(
    messages=[
        {
        "role": "user",
        "content": user_story,
        # Attach the new file to the message.
        "attachments": [
            { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
        ],
        }
    ]
    )
    # The thread now has a vector store with that file in its tool resources.
    # print(thread.tool_resources.file_search)
    return(thread)
 


Create a run and check the output

In [59]:
# Use the create and poll SDK helper to create a run and poll the status of
# the run until it's in a terminal state.
def generate_requirements(thread, assistant):
    run = client.beta.threads.runs.create_and_poll(
        thread_id=thread.id, assistant_id=assistant.id
    )

    messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

    message_content = messages[0].content[0].text
    annotations = message_content.annotations
    citations = []
    for index, annotation in enumerate(annotations):
        message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f"[{index}] {cited_file.filename}")
            
    return message_content.value

# print(message_content.value)
# print("\n".join(citations))

In [60]:
# Create and configure the requirements storyTr assistant
story_interpreter = create_story_assistant()
story_interpreter = knowledge_injection(
    vector_name="Software Requirements and Domain Knowledge",
    file_paths_list=RAG_DOCUMENTS,
    assistant=story_interpreter
)

completed
FileCounts(cancelled=0, completed=6, failed=0, in_progress=0, total=6)


# Generate High-Quality Requirements from User Story

Create the virtual assistant for user story translation to requirements and choose the best requirement set out of 5 iterations

In [61]:
# Generate requirements artifacts from the same user story multiple times
import re

# Just grab the json output (sometimes the model likes to provide reasoning before and after)
delim_start = "```json"
delim_end = "```"

previous_requirements = ""
best_requirement_set = ""
better_requirements_log = ""
requirement_iterations = []
for i in range(5):
    better_requirements_log += f"\n{'-'*25}"
    # Create a thread using the original SRS and the chosen user story
    thread = create_thread(USER_STORY)

    # Run the thread to generate the requirements artifacts
    response = generate_requirements(thread, story_interpreter)


    try:
        
        # Convert to JSON object
        req_string = "".join(response.split(delim_start)[1].split(delim_end)[0])
        req_json = json.loads(req_string)
        requirement_set = get_requirement_set_string(req_json)
        requirement_iterations.append(requirement_set)
        better_requirements_log += f"\nIteration {i+1}:\n{req_string}"
        better_requirements_log += f"\n==="

        # Determine which is the best requirement set so far (winner used downstream)
        if (previous_requirements == ""):
            best_requirement_set = requirement_set
            best_requirement_json = req_json
        else:
            best_requirement_set = get_better_requirement_set(best_requirement_set, requirement_set, USER_STORY)
            if (best_requirement_set == requirement_set):
                better_requirements_log += f"\nNew best found!"
                best_requirement_json = req_json
            else:
                better_requirements_log += f"\nPrevious set was better..."
        
    except:
        better_requirements_log += f"\nERROR: Failed to convert to JSON and evaluate"
        better_requirements_log += f"\n--"
        better_requirements_log += f"\nRESPONSE"
        better_requirements_log += f"\n{response}"
    
    print(f"Completed iteration {i}")

    previous_requirements = requirement_set

Completed iteration 0
Completed iteration 1
Completed iteration 2
Completed iteration 3
Completed iteration 4


### Validate and optimize the requirement artifacts [ReqValidate]

In [62]:
# Improve the functional requirements
functional_improvement_log = ""
for fr in best_requirement_json['functional-requirements']:
    unambiguous_fr = get_unambiguous_requirement(fr["description"], USER_STORY)
    verifiable_fr = get_verifiable_requirement(unambiguous_fr, USER_STORY, "functional")
    formatted_fr = get_formatted_requirements(verifiable_fr)
    functional_improvement_log += f"\n- Original: {fr['description']}\n- Unambiguous: {unambiguous_fr}\n- Verifiable: {verifiable_fr}\n- Formatted: {formatted_fr}\n---"
    fr["description"] = formatted_fr

In [63]:
# Improve the non-functional requirements
nonfunctional_improvement_log = ""
for nfr in best_requirement_json['non-functional-requirements']:
    unambiguous_nfr = get_unambiguous_requirement(nfr["description"], USER_STORY)
    verifiable_nfr = get_verifiable_requirement(unambiguous_nfr, USER_STORY, "nonfunctional")
    formatted_nfr = get_formatted_requirements(verifiable_nfr)
    nonfunctional_improvement_log += f"\n- Original: {nfr['description']}\n- Unambiguous: {unambiguous_nfr}\n- Verifiable: {verifiable_nfr}\n- Formatted: {formatted_nfr}\n---"
    nfr["description"] = formatted_nfr
    

In [64]:
# Improve the entire set of requirements for consistency and output as a JSON object
consistency_improvement_log = ""
consistent_req_json = json.loads(get_consistent_requirements(json.dumps(best_requirement_json, indent=4), USER_STORY))
changes = difference(json.dumps(best_requirement_json), json.dumps(consistent_req_json))
consistency_improvement_log = changes
best_requirement_json = consistent_req_json

### Save the iteration content for review to refine future iterations

In [65]:
# Find next iteration increment for saving the output response
import os

i = 1
while os.path.exists(f"{LOG_FILE.format(i=i)}"):
    i += 1

output_filename = f"{LOG_FILE.format(i=i)}"

In [66]:
# Document the inputs into this set of generated requirements

# Add a placeholder for observations
final_output = "# Observations\nPlaceholder for developer observations...\n\n"

# Capture the list of documents shared for RAG
document_input_header = "# Configuration"
document_list = ""
for document in RAG_DOCUMENTS:
    document_list += f"{document}\n"
final_output += f"{document_input_header}\n## RAG Files:\n{document_list}"
final_output += f"## Model Name\n{MODEL_NAME}\n"
final_output += f"## Prompt\n{USER_STORY}"

# Capture the system instructions prompt
system_instructions_header = "# System Instructions"
final_output += f"\n{system_instructions_header}\n{SYSTEM_INSTRUCTIONS}\n"

# Capture the intermediate changes made to improve requirement statements
better_requirements_header = "# Requirement Comparison Log"
final_output += f"\n{better_requirements_header}\n{better_requirements_log}"

# Capture the intermediate changes for functional requirement improvements
functional_improvement_header = "# Functional Requirement Improvement Log"
final_output += f"\n{functional_improvement_header}\n{functional_improvement_log}"

# Capture the intermediate changes for nonfunctional requirement improvements
nonfunctional_improvement_header = "# Non-Functional Requirement Improvement Log"
final_output += f"\n{nonfunctional_improvement_header}\n{nonfunctional_improvement_log}"

# Capture the intermediate changes for consistency requirement improvements
consistency_improvement_header = "# Consistency Requirement Improvement Log"
final_output += f"\n{consistency_improvement_header}\n{consistency_improvement_log}"

# Capture the final output message
llm_output_header = "# Final Output Message"
pretty_message = json.dumps(best_requirement_json, indent=4)
final_output += f"\n{llm_output_header}\n```json\n{pretty_message}\n```"

# Save the final message to a file
open(output_filename, "w").write(final_output)

41604

In [67]:
# Save the final output to a JSON for further evaluation
open(OUTPUT_FILE, "w").write(json.dumps(best_requirement_json, indent=4))

5025

In [68]:
# Save the iterations of requirements sets for sensitivity analysis
open(ITERATION_FILE, "w").write(json.dumps(requirement_iterations, indent=4))

9648