In [1]:
from dotenv import load_dotenv
import os
import pandas as pd
from PyPDF2 import PdfReader
load_dotenv()
key = os.getenv("GROQ_API_KEY")
print("GROQ_API_KEY loaded:", bool(os.getenv("GROQ_API_KEY")))
# print(key)


GROQ_API_KEY loaded: True


In [2]:
from groq import Groq
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer


In [3]:

import numpy as np

def infer_text_columns(df, min_avg_len=30):
    """
    Detect columns that likely contain requirement text
    """
    text_cols = []

    for col in df.columns:
        if df[col].dtype == object:
            avg_len = df[col].dropna().astype(str).str.len().mean()
            if avg_len and avg_len > min_avg_len:
                text_cols.append(col)

    return text_cols


def load_csv_semantic(file_path):
    df = pd.read_csv(file_path)

    # 1️⃣ Identify text-rich columns
    text_columns = infer_text_columns(df)

    if not text_columns:
        raise ValueError("No suitable text columns found in CSV")

    # 2️⃣ Identify metadata columns
    meta_columns = [c for c in df.columns if c not in text_columns]

    lines = []

    # 3️⃣ Build semantic text per row
    for _, row in df.iterrows():
        text_parts = [str(row[col]) for col in text_columns if pd.notna(row[col])]

        meta_parts = [
            f"{col}: {row[col]}"
            for col in meta_columns
            if pd.notna(row[col])
        ]

        combined = " | ".join(meta_parts + text_parts)
        lines.append(combined)

    return "\n".join(lines)


In [4]:


def load_requirements(file_path):
    if file_path.endswith(".txt"):
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()

    elif file_path.endswith(".pdf"):
        reader = PdfReader(file_path)
        return "\n".join(
            page.extract_text()
            for page in reader.pages
            if page.extract_text()
        )

    elif file_path.endswith(".csv"):
        return load_csv_semantic(file_path)

    else:
        raise ValueError("Unsupported file format")

In [5]:
file_path = "software_requirements_extended.csv"  # or requirements.txt / requirements.csv

requirements_text = load_requirements(file_path)

print(requirements_text[:1000])  # preview


Type: PE | The system shall refresh the display every 60 seconds.
Type: LF | The application shall match the color of the schema set forth by Department of Homeland Security
Type: US |  If projected  the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30
Type: A |  The product shall be available during normal business hours. As long as the user has access to the client PC  the system will be available 99% of the time during the first six months of operation.
Type: US |  If projected  the data must be understandable. On a 10x10 projection screen  90% of viewers must be able to determine that Events or Activities are occuring in current time from a viewing distance of 100
Type: SE | The product shall ensure that it can only be accessed by authorized users.  The product will be able to distinguish between authorized and unauthorized users in all access attempts
Type: US | The product shall be intuit

In [6]:
def chunk_requirements(
    text,
    max_tokens=350
):
    lines = [
        line.strip()
        for line in text.split("\n")
        if line.strip()
    ]

    chunks = []
    current_chunk = []
    token_count = 0

    for line in lines:
        tokens = len(line.split())

        if token_count + tokens > max_tokens:
            chunks.append("\n".join(current_chunk))
            current_chunk = []
            token_count = 0

        current_chunk.append(line)
        token_count += tokens

    if current_chunk:
        chunks.append("\n".join(current_chunk))

    return chunks




In [7]:
chunks = chunk_requirements(requirements_text)


In [8]:
print("✅ Total chunks created:", len(chunks))
print("\n--- First chunk ---\n")
print(chunks[0])


✅ Total chunks created: 71

--- First chunk ---

Type: PE | The system shall refresh the display every 60 seconds.
Type: LF | The application shall match the color of the schema set forth by Department of Homeland Security
Type: US |  If projected  the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30
Type: A |  The product shall be available during normal business hours. As long as the user has access to the client PC  the system will be available 99% of the time during the first six months of operation.
Type: US |  If projected  the data must be understandable. On a 10x10 projection screen  90% of viewers must be able to determine that Events or Activities are occuring in current time from a viewing distance of 100
Type: SE | The product shall ensure that it can only be accessed by authorized users.  The product will be able to distinguish between authorized and unauthorized users in all acces

In [9]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")

embeddings = embedder.encode(chunks)
embeddings.shape

(71, 384)

In [10]:

DIM = embeddings.shape[1]

index = faiss.IndexFlatL2(DIM)
index.add(np.array(embeddings))

print("Total chunks indexed:", index.ntotal)


Total chunks indexed: 71


In [11]:
def retrieve_context(query, k=3):
    q_emb = embedder.encode([query])
    distances, indices = index.search(np.array(q_emb), k)
    return [chunks[i] for i in indices[0]]


In [12]:
retrieve_context("password reset")


['Type: FR | Required information for registration Given the restaurant owner wants to create an account And the restaurant owner does not have an account When the restaurant owner registers on the web-portal by providing user-name And password And address And e-mail address And phone number Then the restaurant owner should be able to apply for verification\nType: FR | Full information for registration Given the restaurant owner wants to create an account And the restaurant owner does not have an account When the restaurant owner registers on the web-portal by providing user name And password And address And e-mail address And phone number And mobile number Then the restaurant owner should be able to apply for verification\nType: FR | Confirmed registration Given the restaurant owner has applied for verification And has not received a confirmation e-mail after registration When the restaurant owner receives a confirmation e-mail Then the restaurant owner should be able to log in\nType:

In [13]:


client = Groq(api_key=os.getenv("GROQ_API_KEY"))


In [24]:
def generate_jira_story(context):
    prompt = f"""
You are a Product Owner.

Using the context below, generate:
- Jira User Story
- Description
- Acceptance Criteria
- Definition of Done
- Epics
Context:
{context}
"""

    response = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2
    )

    return response.choices[0].message.content


In [None]:
query = "user authentication and password reset"

retrieved_chunks = retrieve_context(query)
final_output = generate_jira_story("\n".join(retrieved_chunks))

print(final_output)   


Here are the generated artifacts:

**Epic: Restaurant Owner Registration and Login**

* **User Story: Restaurant Owner Registration**
	+ **Description:** As a restaurant owner, I want to be able to register on the web-portal by providing required information so that I can apply for verification.
	+ **Acceptance Criteria:**
		- The system allows restaurant owners to register with a username, password, address, email address, and phone number.
		- The system sends a confirmation email to the restaurant owner after registration.
		- The restaurant owner can apply for verification after registration.
	+ **Definition of Done:** The restaurant owner can successfully register and apply for verification.

**Epic: Restaurant Owner Account Management**

* **User Story: Restaurant Owner Login**
	+ **Description:** As a restaurant owner, I want to be able to log in to the web-portal with my account so that I can manage my information.
	+ **Acceptance Criteria:**
		- The system allows restaurant ow