# Continuous Requirements Generation with GPT-4
This notebook explores the use of utilizing GPT 4 on a domain-specific set of requirements. This model will be used downstream to manage requirements changes provided by external sources.

### Setting Up

In [162]:
# %pip install pymupdf4llm

In [163]:
# Constants
MODEL_NAME = "gpt-4-turbo-preview"

IEEE_830 = "data/IEEE 830-1998.pdf"
# BRIDGE_INSPECTION = "data/a-state-of-the-art-review-of-bridge-inspection-planning-current-situation-and-future-needs.pdf"
SRS_PDF_PATH = "data/2005 - pontis.pdf"
SRS_MD_PATH = "data/output_srs.md"
SRS_TXT_PATH = "data/output_srs.txt"
FINAL_OUTPUT_PATH = "outputs/final_output.md"

JIRA_ISSUE = """
    As a highway information analyst, 
    I want to see traffic data in the map display
    so that I can make better decisions for traffic impacts on bridges.
"""

# RAG_DOCUMENTS = [SRS_PDF_PATH, SRS_TXT_PATH, IEEE_830, BRIDGE_INSPECTION]
RAG_DOCUMENTS = [SRS_PDF_PATH, SRS_MD_PATH, IEEE_830]

In [164]:
# Get the OpenAI api key
import json

config_data = json.load(open("config.json"))
openai_api_key = config_data["OPENAI_API_KEY"]

In [165]:
# Create the OpenAI Client
from openai import OpenAI
client = OpenAI(api_key=openai_api_key)

In [166]:
# Read the system instructions from the prompt file
with open("prompts/system_instructions.txt", "r") as file:
    system_instructions = file.read()

### Data Pre-Processing
The data format for fine-tuning GPT 3.5 must be in the conversational chat format seen below. The data will undergo cleaning and pre-processing to match the expected data format.
```
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
```

In [167]:
# Convert the SRS pdf file into a markdown file
import pymupdf4llm
import pathlib

# Parse the pdf file into markdown format
srs_md_text = pymupdf4llm.to_markdown(SRS_PDF_PATH)

# Remove the repeating footers from the file
srs_md_text = srs_md_text.replace("_Functional Requirements Specification v1.0_", "")
srs_md_text = srs_md_text.replace("_Pontis 5.0_", "")

# Save as a markdown file and text file for later usee
pathlib.Path(SRS_MD_PATH).write_bytes(srs_md_text.encode())
pathlib.Path(SRS_TXT_PATH).write_bytes(srs_md_text.encode())

125977

# Promping GPT 3.5 Turbo for Requirements Change Management

In [168]:
# User prompt
user_story_example = """
    As a user, 
    I want to add donation centers as favorites on my profile, 
    so that I can view them later.
"""

In [169]:
response = client.chat.completions.create(
    model=MODEL_NAME, 
    messages = [
        {
            "role" : "system",
            "content": system_instructions
        },
        {
            "role" : "user",
            "content": user_story_example
        }
    ],
    temperature = 0.7,
    max_tokens = 150,
    top_p = 1
)
print(response)

ChatCompletion(id='chatcmpl-9iWHPmXWXg52U0mNB9260y5cNkUd7', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content="For the user story provided, we can break down the requirements into several artifacts as follows:\n\n### Functional Requirements:\n\n1. **FR1:** The system must allow users to add donation centers to a list of favorites.\n   - **Location:** [output_srs.md: Functional Requirements Section]\n\n2. **FR2:** The system must provide users with the ability to view their list of favorited donation centers.\n   - **Location:** [output_srs.md: Functional Requirements Section]\n\n### Non-Functional Requirements:\n\n1. **NFR1:** The system should update the user's list of favorites in real-time to ensure immediate access after a donation center is added.\n   - **Location:** [output_srs.md: Non-Functional Requirements Section", role='assistant', function_call=None, tool_calls=None))], created=1720397543, model='gpt-4-0125-preview', objec

# Creating a Custom Assistant with File Search Capabilities

Create a new assistant with access to the existing system requirements

In [170]:
assistant = client.beta.assistants.create(
    name="Continuous Requirements Generator",
    instructions=system_instructions,
    model=MODEL_NAME,
    tools=[{"type": "file_search"}]
)

Upload the file and add them to a Vector Store

In [171]:
# Create a vector store caled "Financial Statements"
vector_store = client.beta.vector_stores.create(name="Gemini Software System Requirements")
 
# Ready the files for upload to OpenAI
file_paths = RAG_DOCUMENTS
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=3, failed=0, in_progress=0, total=3)


Update the assistand to use the new Vector Store

In [172]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

Create a thread

In [173]:
# Upload the user provided file to OpenAI
message_file = client.files.create(
  file=open(SRS_TXT_PATH, "rb"), purpose="assistants"
)
 
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": JIRA_ISSUE,
      # Attach the new file to the message.
      "attachments": [
        { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
      ],
    }
  ]
)
 
# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

ToolResourcesFileSearch(vector_store_ids=['vs_VxhElziE5mCs5myTkmgaZ140'])


Create a run and check the output

In [174]:
# Use the create and poll SDK helper to create a run and poll the status of
# the run until it's in a terminal state.

run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

print(message_content.value)
print("\n".join(citations))

Based on the user story provided by the highway information analyst desiring to see traffic data in the map display for making better traffic impact decisions on bridges, the following requirement artifacts have been prepared:

### Functional Requirements

1. **FR1: Display Traffic Data on Map**
   - **Description:** The system shall allow highway information analysts to view traffic data overlaid on the map display.
   - **Rationale:** To enable analysts to assess traffic impacts on bridge infrastructure visually.
   - **Priority:** High
   - **Location:** Section 4.0 Functional Requirements, after requirement 1.8 .

### Use Case

1. **Use Case: View Traffic Data on Map Display**
   - **Goal:** Enable highway information analysts to make informed decisions regarding traffic impacts on bridges.
   - **Actors:** Highway information analysts.
   - **Preconditions:** The user is logged into the system with highway information analyst permissions.
   - **Triggers:** The user selects the op

In [175]:
# Document the inputs into this set of generated requirements

# Add a placeholder for observations
final_output = "# Observations\nPlaceholder for developer observations...\n\n"

# Capture the list of documents shared for RAG
document_input_header = "# Configuration"
document_list = ""
for document in RAG_DOCUMENTS:
    document_list += f"{document}\n"
final_output += f"{document_input_header}\n## RAG Files:\n{document_list}"
final_output += f"## Model Name\n{MODEL_NAME}\n"
final_output += f"## Prompt{JIRA_ISSUE}"

# Capture the system instructions prompt
system_instructions_header = "# System Instructions"
final_output += f"\n{system_instructions_header}\n{system_instructions}\n"

# Capture the final output message
llm_output_header = "# Final Output Message"
final_output += f"\n{llm_output_header}\n{message_content.value}\n"

# Save the final message to a file
pathlib.Path(FINAL_OUTPUT_PATH).write_bytes(final_output.encode())

8273

In [176]:
# from typing_extensions import override
# from openai import AssistantEventHandler, OpenAI
 
# client = OpenAI(api_key=openai_api_key)
 
# class EventHandler(AssistantEventHandler):
#     @override
#     def on_text_created(self, text) -> None:
#         print(f"\nassistant > ", end="", flush=True)

#     @override
#     def on_tool_call_created(self, tool_call):
#         print(f"\nassistant > {tool_call.type}\n", flush=True)

#     @override
#     def on_message_done(self, message) -> None:
#         # print a citation to the file searched
#         message_content = message.content[0].text
#         annotations = message_content.annotations
#         citations = []
#         for index, annotation in enumerate(annotations):
#             message_content.value = message_content.value.replace(
#                 annotation.text, f"[{index}]"
#             )
#             if file_citation := getattr(annotation, "file_citation", None):
#                 cited_file = client.files.retrieve(file_citation.file_id)
#                 citations.append(f"[{index}] {cited_file.filename}")

#         print(message_content.value)
#         print("\n".join(citations))


# # Then, we use the stream SDK helper
# # with the EventHandler class to create the Run
# # and stream the response.

# with client.beta.threads.runs.stream(
#     thread_id=thread.id,
#     assistant_id=assistant.id,
#     instructions=system_instructions,
#     event_handler=EventHandler(),
# ) as stream:
#     stream.until_done()