## Content Enrichment Prompt

In [None]:
import json

def build_chunk_prompt(requirements_chunk):
    requirements_text = json.dumps(requirements_chunk, indent=2)

    prompt = f"""

You are an expert business analyst specializing in requirement analysis and natural language processing. Your task is to convert technical requirements into clear, understandable natural language descriptions.
You are given multiple requirements in JSON format:
{requirements_text}
INSTRUCTIONS:
1. Read and analyze the provided requirement content thoroughly
2. Identify the core business intent and functionality described
3. Convert any technical variable names, method names, or system identifiers into human-readable terms by understanding their semantic meaning
4. Transform camelCase, PascalCase, or snake_case variables into natural language (e.g., "HornActivationRequest" → "horn activation request", "getUserProfile" → "get user profile", "payment_status_check" → "payment status check")
6. Remove all technical jargon, code syntax, and variable references
7. Use active voice and clear, concise language
8. Ensure the output is understandable to non-technical stakeholders

OUTPUT FORMAT:
Return your response as a JSON object with the following structure:
{{
  "req_id":<"requirement_id>",
  "original_content": "<The exact original requirement text>",
  "natural_language_description": "<Clear, jargon-free description of what the requirement means in business terms>"
}}

EXAMPLES:
Input: "When HornActivationRequest is triggered and vehicleSpeed > 0, execute soundAlert() function"
Output: {{
  "original_content": "When HornActivationRequest is triggered and vehicleSpeed > 0, execute soundAlert() function",
  "natural_language_description": "When a horn activation request is made while the vehicle is moving, the system should produce a sound alert",
}}
"""

    return prompt


In [None]:
import json
import ollama

def load_requirements(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def chunk_requirements(reqs, chunk_size=2):
    for i in range(0, len(reqs), chunk_size):
        yield reqs[i:i+chunk_size]

def call_llm_model(chunk):
    prmt=build_chunk_prompt(chunk)
    response = ollama.chat(
        model="mistral",  # Use any installed model like llama2, etc.
        messages=[{"role": "user", "content": prmt}],options={"temperature": 0}
    )
    sponse_text = response['message']['content']
    return sponse_text

def main(input_file, output_file, chunk_size=2):
    requirements = load_requirements(input_file)
    all_results = []

    for chunk in chunk_requirements(requirements, chunk_size):
        # Call your LLM model here with the chunk
        result = call_llm_model(chunk)
        all_results.append(result)
    return all_results

if __name__ == "__main__":
    input_json_file = "requirements.json"  # your input file
    output_json_file = "processed_requirements.json"  # final output file
    y=main(input_json_file, output_json_file, chunk_size=2)

In [None]:
with open("req_ids.json", "w") as f:
    json.dump(y, f, indent=2)

In [None]:
import re
scenario_json1=[]
for i in y:
    match = re.search(r"```json\n(.*?)\n```", i, re.DOTALL)
    if match:
        json_str = match.group(1)
        scenario_json = json.loads(json_str)
    else:
        scenario_json = json.loads(i)
    scenario_json1.extend(scenario_json)

with open("actor_pupose1.json", "w") as json_file:
    json.dump(scenario_json1, json_file, indent=4)

###Extract Noun verbs

In [None]:
def prompt(requirements_chunk):
  
  requirements_text = json.dumps(requirements_chunk, indent=2)
  prompt = f"""
You are a requirements analysis expert. Your task is to extract actors, targets, verbs, and nouns from the given requirement content.

You are given multiple requirements in JSON format:
{requirements_text}

STRICT RULES:
1. Analyze ONLY the provided requirement content.
2. Do NOT add any external information or assumptions.
3. Do NOT include variable names, method names, or technical identifiers in any category.
4. Convert technical terms to natural language concepts only.
5. Extract only meaningful business entities and actions.
6. Output must be in strict JSON format.
7. Use lowercase for consistency.

DEFINITIONS:
- actors: who performs the action (users, systems, roles that initiate or perform actions)
- targets: what is being acted upon (objects, systems, data that receive actions)
- verbs: business actions or operations (what is being done)
- nouns: business entities or concepts (things involved in the process)

EXTRACTION GUIDELINES:
- Convert "UserAuthenticationService" → actor: "authentication service", noun: "authentication"
- Convert "validateCredentials()" → verb: "validate", noun: "credentials"
- Convert "HornActivationRequest" → actor: "driver/user", target: "horn system", verb: "activate", noun: "horn"
- Convert "updateAccountBalance" → verb: "update", noun: "account balance"

FORBIDDEN:
- Do not include variable names, class names, method names, technical identifiers
- Do not include programming syntax, camelCase terms, function calls
- Do not add information not present in the requirement

OUTPUT FORMAT (strict JSON, arrays must use brackets []):

  {{
    "req_id": "<requirement_id>",
    "natural_language_description": "<natural_language_description>",
    "actors": ["actor1", "actor2"],
    "targets": ["target1", "target2"],
    "verbs": ["verb1", "verb2"],
    "nouns": ["noun1", "noun2"]
  }}

EXAMPLE:

Input: "When HornActivationRequest is triggered and vehicleSpeed > 0, execute soundAlert() function"

Output:
  {{
    "req_id": "REQ-001",
    "natural_language_description": "When horn activation request is triggered and vehicle speed is greater than zero, execute the sound alert",
    "actors": ["driver", "system"],
    "targets": ["horn", "alert system"],
    "verbs": ["trigger", "activate", "execute", "alert"],
    "nouns": ["horn", "vehicle", "speed", "sound", "alert"]
  }}


NOW GENERATE STRICTLY JSON OUTPUT FOR ALL REQUIREMENTS ABOVE. DO NOT ADD ANYTHING ELSE.
"""

  return prompt


In [None]:
import json
import ollama

def load_requirements(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def chunk_requirements(reqs, chunk_size=2):
    for i in range(0, len(reqs), chunk_size):
        yield reqs[i:i+chunk_size]

def call_llm_model(chunk):
    prmt=prompt(chunk)
    response = ollama.chat(
        model="mistral",  # Use any installed model like llama2, etc.
        messages=[{"role": "user", "content": prmt}],options={"temperature": 0}
    )
    sponse_text = response['message']['content']
    return sponse_text

def main(input_file, output_file, chunk_size=2):
    requirements = load_requirements(input_file)
    all_results = []

    for chunk in chunk_requirements(requirements, chunk_size):
        # Call your LLM model here with the chunk
        result = call_llm_model(chunk)
        all_results.append(result)
    return all_results

if __name__ == "__main__":
    input_json_file = "actor_pupose.json"  # your input file
    output_json_file = "processed_requirements.json"  # final output file
    y=main(input_json_file, output_json_file, chunk_size=2)

In [None]:
y[0]

In [None]:
import re
scenario_json1=[]
for i in y:
    match = re.search(r"```json\n(.*?)\n```", i, re.DOTALL)
    if match:
        json_str = match.group(1)
        scenario_json = json.loads(json_str)
    else:
        scenario_json = json.loads(i)
    scenario_json1.extend(scenario_json)

with open("actor_target_nouns_verbs1.json", "w") as json_file:
    json.dump(scenario_json1, json_file, indent=4)

###Inputs and outputs 

In [1]:
def build_prmpt_input(requirements_chunk):
  requirements_text = json.dumps(requirements_chunk, indent=2)
  prompt=f"""You are an expert requirements analyst. Your task is to carefully analyze the given requirement content and extract ALL inputs and outputs mentioned or implied in the requirement.
You are given multiple requirements in JSON format:
{requirements_text}
STRICT INSTRUCTIONS:
1. Read the requirement original content CAREFULLY and COMPLETELY
2. Extract ONLY inputs and outputs that are explicitly mentioned or directly implied in the provided content
3. Do NOT add any external information, assumptions, or related requirements
4. Process ONLY the given requirement ID and its content
7. Output must be in strict JSON format

DEFINITIONS:
- INPUTS: Any data, conditions, parameters, or triggers that go INTO the system/process described in the requirement
- OUTPUTS: Any data, results, actions, responses, or effects that come OUT OF the system/process described in the requirement

EXTRACTION RULES:
- If requirement mentions "when X happens" → X is an input
- If requirement mentions "system receives Y" → Y is an input  
- If requirement mentions "check Z condition" → Z is an input
- If requirement mentions "system produces A" → A is an output
- If requirement mentions "display B" → B is an output
- If requirement mentions "update C" → C is an output
- If requirement mentions "send notification" → "notification" is an output

OUTPUT FORMAT (strict JSON):
{{
  "req_id": "<the provided requirement ID>",
  "inputs": ["list of all inputs found in the requirement"],
  "outputs": ["list of all outputs found in the requirement"]
}}
NOW GENERATE STRICTLY JSON OUTPUT FOR ALL REQUIREMENTS ABOVE. DO NOT ADD ANYTHING ELSE.
"""
  return prompt


In [2]:
import json
import ollama

def load_requirements(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def chunk_requirements(reqs, chunk_size=2):
    for i in range(0, len(reqs), chunk_size):
        yield reqs[i:i+chunk_size]

def call_llm_model(chunk):
    prmt=build_prmpt_input(chunk)
    response = ollama.chat(
        model="mistral",  # Use any installed model like llama2, etc.
        messages=[{"role": "user", "content": prmt}],options={"temperature": 0}
    )
    sponse_text = response['message']['content']
    return sponse_text

def main(input_file, output_file, chunk_size=2):
    requirements = load_requirements(input_file)
    all_results = []

    for chunk in chunk_requirements(requirements, chunk_size):
        # Call your LLM model here with the chunk
        result = call_llm_model(chunk)
        all_results.append(result)
    return all_results

if __name__ == "__main__":
    input_json_file = "actor_pupose1.json"  # your input file
    output_json_file = "processed_requirements.json"  # final output file
    y=main(input_json_file, output_json_file, chunk_size=2)

In [3]:
import re
scenario_json1=[]
for i in y:
    match = re.search(r"```json\n(.*?)\n```", i, re.DOTALL)
    if match:
        json_str = match.group(1)
        scenario_json = json.loads(json_str)
    else:
        scenario_json = json.loads(i)
    scenario_json1.extend(scenario_json)

with open("Inputs_outputs.json", "w") as json_file:
    json.dump(scenario_json1, json_file, indent=4)

In [None]:
##Merging_Content_inputs_ouputs_verbs_nouns

In [41]:
import json

# Load JSON files
with open("actor_target_nouns_verbs1.json", "r") as f1:
    semantic_data = json.load(f1)  # [{"req_id": ..., "inputs": [...], "outputs": [...]}]

with open("Inputs_outputs.json", "r") as f2:
    io_data = json.load(f2)  # [{"req_id": ..., "actors": [...], "targets": [...], "verbs": [...], "nouns": [...]}]

for i in io_data:
    temp = i['req_id'].split(" ")

    if len(temp) > 2 and temp[2].strip():  # check if third part exists and not empty
        i['req_id'] = temp[0] + " " + temp[2]
        t = temp[2]
    else:
        if len(temp) > 1:
            i['req_id'] = temp[0] + " " + temp[1]
        else:
            i['req_id'] = temp[0]  # fallback if only one part exists
   
for i in semantic_data:
    temp = i['req_id'].split(" ")

    if len(temp) > 2 and temp[2].strip():  # check if third part exists and not empty
        i['req_id'] = temp[0] + " " + temp[2]
        t = temp[2]
    else:
        if len(temp) > 1:
            i['req_id'] = temp[0] + " " + temp[1]
        else:
            i['req_id'] = temp[0]



# Create a lookup dict for semantic data
semantic_dict = {item["req_id"]: item for item in semantic_data}

# Merge entries
merged_data = []
for io_item in io_data:
    req_id = io_item["req_id"]
    merged_item = {
        "req_id": req_id,
        "inputs": io_item.get("inputs", []),
        "outputs": io_item.get("outputs", [])
    }

    # Add semantic info if exists
    semantic_item = semantic_dict.get(req_id, {})
    merged_item.update({"Content":semantic_item.get("natural_language_description"),
        "actors": list(set(semantic_item.get("actors", []))),
        "targets": list(set(semantic_item.get("targets", []))),
        "verbs": list(set(semantic_item.get("verbs", []))),
        "nouns": list(set(semantic_item.get("nouns", [])))
    })

    merged_data.append(merged_item)

# Save merged JSON
with open("merged_requirements.json", "w") as fout:
    json.dump(merged_data, fout, indent=2)

# print("Merged JSON created successfully!")