In [1]:
#  Install and import required packages

!pip install -q transformers sentence-transformers torch spacy nltk tqdm
!pip install -U -q google-generativeai

import nltk, spacy
nltk.download('punkt')
nltk.download('punkt_tab')
!python -m spacy download en_core_web_sm


# importing the required libraries
import os, re, glob, time, csv, math, random
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import sent_tokenize
import google.generativeai as genai


# Load spaCy English model (for later optional use)
nlp = spacy.load("en_core_web_sm")

print("....Packages installed and models loaded successfully!....")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
....Packages installed and models loaded successfully!....


In [None]:
# helper functions

# Defining vague word list
VAGUE_LEXICON = [
    "fast", "quick", "efficient", "easy", "simple", "user-friendly", "user friendly", "intuitive",
    "robust", "reliable", "secure", "flexible", "scalable", "sufficient", "adequate",
    "as soon as possible", "soon", "minimize", "optimize", "better", "improve",
    "high performance", "low latency", "fastly"
]

# Precompiling regex patterns for all vague words 
vague_patterns = [re.compile(r"\b" + re.escape(x) + r"\b", flags=re.I) for x in VAGUE_LEXICON]

print(f"printing vague patterns: {vague_patterns}")


def clean_text(raw_text: str) -> str:
    """Cleans text: removes artifacts, newlines, and extra spaces."""
    text = raw_text.replace('\x00', ' ')                         # remove null chars
    text = re.sub(r'(\w)-\s*\n\s*(\w)', r'\1\2', text)          # fix hyphenation across lines
    text = re.sub(r'[\r\n]+', ' ', text)                        # remove newlines
    text = re.sub(r'\s+', ' ', text)                            # normalize spaces
    text = re.sub(r'Page\s*\d+\s*of\s*\d+', ' ', text, flags=re.I)
    text = re.sub(r'^\s*Confidential', '', text, flags=re.I)
    text = re.sub(r'[\u2022\u2023\-\*•◦]', ' ', text)           # remove bullets
    text = re.sub(r'\b\d+(\.\d+)+\b', ' ', text)                # remove section numbers like 3.2.1
    text = re.sub(r'\b\d+\.\b', ' ', text)
    return text.strip()


def is_valid_sentence(s: str) -> bool:
    """Filters out too-short or meaningless sentences."""
    s = s.strip()
    if len(s.split()) < 4:     # too short
        return False
    if len(s) > 600:           # too long (likely a paragraph)
        return False
    if not re.search(r'[a-zA-Z]', s):  # no text
        return False
    return True


def find_vague_terms(sentence: str):
    """Returns list of vague words found in a sentence."""
    found = []
    for pattern in vague_patterns:
        if pattern.search(sentence):
            found.append(pattern.pattern.strip("\\b").strip("\\b"))
    return list(set(found))  # unique only



printing vague patterns: [re.compile('\\bfast\\b', re.IGNORECASE), re.compile('\\bquick\\b', re.IGNORECASE), re.compile('\\befficient\\b', re.IGNORECASE), re.compile('\\beasy\\b', re.IGNORECASE), re.compile('\\bsimple\\b', re.IGNORECASE), re.compile('\\buser\\-friendly\\b', re.IGNORECASE), re.compile('\\buser\\ friendly\\b', re.IGNORECASE), re.compile('\\bintuitive\\b', re.IGNORECASE), re.compile('\\brobust\\b', re.IGNORECASE), re.compile('\\breliable\\b', re.IGNORECASE), re.compile('\\bsecure\\b', re.IGNORECASE), re.compile('\\bflexible\\b', re.IGNORECASE), re.compile('\\bscalable\\b', re.IGNORECASE), re.compile('\\bsufficient\\b', re.IGNORECASE), re.compile('\\badequate\\b', re.IGNORECASE), re.compile('\\bas\\ soon\\ as\\ possible\\b', re.IGNORECASE), re.compile('\\bsoon\\b', re.IGNORECASE), re.compile('\\bminimize\\b', re.IGNORECASE), re.compile('\\boptimize\\b', re.IGNORECASE), re.compile('\\bbetter\\b', re.IGNORECASE), re.compile('\\bimprove\\b', re.IGNORECASE), re.compile('\\bhig

In [None]:
#  Preprocessing: Read, clean, and save text files
RAW_TXT_DIR = "/content/drive/MyDrive/NLP_ASG/Data/raw_txt"
all_files = [f for f in os.listdir(RAW_TXT_DIR) if f.endswith(".txt")]
print(f"Found {len(all_files)} text files.")

CLEANED_DIR = "/content/drive/MyDrive//NLP_ASG/Data/Cleaned"
os.makedirs(CLEANED_DIR, exist_ok=True)

for file in tqdm(all_files, desc="Cleaning files"):
    in_path = os.path.join(RAW_TXT_DIR, file)
    out_path = os.path.join(CLEANED_DIR, file)

    try:
        with open(in_path, "r", encoding="utf-8", errors="ignore") as f:
            raw_text = f.read()
        cleaned_text = clean_text(raw_text)
        with open(out_path, "w", encoding="utf-8") as f:
            f.write(cleaned_text)
    except Exception as e:
        print(f"Error processing {file}: {e}")

print("Cleaning complete!")
print(f"Cleaned files saved in: {CLEANED_DIR}")



Found 79 text files.


Cleaning files: 100%|██████████| 79/79 [01:06<00:00,  1.20it/s]

Cleaning complete!
Cleaned files saved in: /content/drive/MyDrive/Colab Notebooks/NLP_ASG/Data/Cleaned





In [None]:
# configuring gemini ai for rewriting sentences
from google.colab import userdata

# set this environment inside collab key's 
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

# make sure your key is set
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

model = genai.GenerativeModel("gemini-2.5-flash")



In [10]:

# Paths
CLEANED_DIR = "/content/drive/MyDrive/NLP_ASG/Data/Cleaned"
OUTPUT_PATH = "/content/drive/MyDrive/NLP_ASG/Outputs/Vague_Rewrites.csv"
columns = ["file", "vague_term(s)", "original_sentence", "suggested_rewrite"]

# Start fresh
if os.path.exists(OUTPUT_PATH):
    os.remove(OUTPUT_PATH)
pd.DataFrame(columns=columns).to_csv(OUTPUT_PATH, index=False, encoding="utf-8")

# Prompt template for full-file rewriting
PROMPT_TEMPLATE_FILE = """
You are an expert software requirements engineer.

Below are vague requirement sentences extracted from a software specification file.
Rewrite **each** sentence to be clear, specific, measurable, and testable.

Guidelines:
- Maintain IEEE-SRS tone ("The system shall...", "The software must...").
- Replace vague adjectives such as {all_vague_terms} with quantifiable or objective metrics.
- Return only the rewritten requirements, numbered 1., 2., 3.… (same count as input).
- Do **not** add commentary, introductions, or examples.

Vague requirement sentences:
{sentences}

Rewritten:
"""


cleaned_files = [f for f in os.listdir(CLEANED_DIR) if f.endswith(".txt")]
print(f"Found {len(cleaned_files)} cleaned files.")

for file in tqdm(cleaned_files, desc="Processing files"):
    file_path = os.path.join(CLEANED_DIR, file)
    with open(file_path, "r", encoding="utf-8") as f:
        text = f.read()

    sentences = sent_tokenize(text)
    vague_data = []
    for s in sentences:
        if not is_valid_sentence(s):
            continue
        found_terms = [term for term in VAGUE_LEXICON if re.search(rf"\b{re.escape(term)}\b", s, flags=re.I)]
        if found_terms:
            vague_data.append((found_terms, s))

    print(f"File '{file}' — vague sentences: {len(vague_data)}")
    if not vague_data:
        continue

    all_vague_terms = sorted(set(sum([terms for terms, _ in vague_data], [])))
    joined_sentences = "\n".join([f"{i+1}. {s}" for i, (_, s) in enumerate(vague_data)])
    prompt = PROMPT_TEMPLATE_FILE.format(
        all_vague_terms=", ".join(all_vague_terms), sentences=joined_sentences
    )

    for attempt in range(3):
        try:
            print(f"Sending prompt for file: {file}")
            response = model.generate_content(prompt)
            rewritten_block = response.text.strip()
            print("Gemini response preview:\n", rewritten_block[:400])
            break
        except Exception as e:
            print(f"Rewrite failed for {file}: {e}")
            rewritten_block = f"[Rewrite failed: {e}]"
            time.sleep(2 + random.random())
    else:
        rewritten_block = "[Rewrite failed after retries]"

    rewritten_lines = [re.sub(r"^\d+\.\s*", "", ln).strip() for ln in rewritten_block.splitlines() if ln.strip()]
    vague_rows = []
    for i, (terms, s) in enumerate(vague_data):
        rewrite = rewritten_lines[i] if i < len(rewritten_lines) else "[No rewrite returned]"
        vague_rows.append([file, ", ".join(terms), s.strip(), rewrite])

    if vague_rows:
        pd.DataFrame(vague_rows, columns=columns).to_csv(
            OUTPUT_PATH, mode="a", header=False, index=False, encoding="utf-8"
        )
        print(f"Added {len(vague_rows)} rows for {file}")

    time.sleep(0.5)

print("File-wise detection & rewriting complete!")
print(f"Results saved to: {os.path.abspath(OUTPUT_PATH)}")




Found 79 cleaned files.


Processing files:   0%|          | 0/79 [00:00<?, ?it/s]

File '2004 - philips.txt' — vague sentences: 1
Sending prompt for file: 2004 - philips.txt
Gemini response preview:
 1. The system shall ensure all user-facing text is grammatically correct, free of spelling errors, avoids undefined technical jargon, and achieves a Flesch-Kincaid Grade Level score of 8 or lower.
Added 1 rows for 2004 - philips.txt


Processing files:   1%|▏         | 1/79 [00:07<10:23,  8.00s/it]

File '2007 - get real 0.2.txt' — vague sentences: 5
Sending prompt for file: 2007 - get real 0.2.txt
Gemini response preview:
 1. The Get Real website shall present content tailored for high school teens (ages 14-18) and enable users to locate any career profile or informational article within a maximum of three clicks from the homepage. Each career profile shall include a minimum of 500 words detailing educational requirements, daily tasks, and salary expectations, covering at least 20 distinct computer science career pa
Added 5 rows for 2007 - get real 0.2.txt


Processing files:   3%|▎         | 2/79 [00:22<15:18, 11.93s/it]

File '2008 - keepass.txt' — vague sentences: 11
Sending prompt for file: 2008 - keepass.txt
Gemini response preview:
 1. The system shall store user passwords, data, email accounts, usernames, and URLs in an encrypted database. Access to this database shall be exclusively protected by a user-defined Master Key.
2. The system shall provide an integrated help system accessible from the user interface. This help system shall be structured into logically distinct chapters, each clearly titled, to facilitate user comp
Added 11 rows for 2008 - keepass.txt


Processing files:   4%|▍         | 3/79 [00:54<26:46, 21.14s/it]

File '2004 - colorcast.txt' — vague sentences: 14
Sending prompt for file: 2004 - colorcast.txt
Gemini response preview:
 1. The system shall facilitate customer transitions from discontinued products to alternative products within the product line. This process shall enable customers to identify and select replacement products for a single discontinued item within an average of 3 minutes, with a task completion success rate of 95%.
2. The system shall provide a conversion mechanism for customers to select and order 
Added 14 rows for 2004 - colorcast.txt


Processing files:   5%|▌         | 4/79 [01:35<36:10, 28.94s/it]

File '2003 - pnnl.txt' — vague sentences: 8
Sending prompt for file: 2003 - pnnl.txt
Gemini response preview:
 1. The system shall display its official designation as 'PNNL 14382 Task Report for the Energy Efficient and Affordable Small Commercial and Residential Buildings Research Program Project Pattern Recognition Based Fault Detection and Diagnostics Automated Diagnostics Software Version D.R.' within the 'About' section of the user interface and on all generated report headers.
2. The system shall dis
Added 8 rows for 2003 - pnnl.txt


Processing files:   6%|▋         | 5/79 [02:00<33:56, 27.52s/it]

File '2001 - ctc network.txt' — vague sentences: 0
File '2005 - grid 3D.txt' — vague sentences: 2
Sending prompt for file: 2005 - grid 3D.txt
Gemini response preview:
 1. The system shall enable a user with basic computer proficiency to successfully complete 80% of defined critical tasks within 10 minutes of initial interaction, without prior training or reference to external documentation.
2. The system shall detect and visually update the display orientation within 150 milliseconds of a physical device orientation change.
Added 2 rows for 2005 - grid 3D.txt


Processing files:   9%|▉         | 7/79 [02:09<18:49, 15.68s/it]

File '2004 - watcom.txt' — vague sentences: 6
Sending prompt for file: 2004 - watcom.txt
Gemini response preview:
 1. The software shall complete the specified internal processing task within 100 milliseconds and consume no more than 8 MB of heap memory.
2. The software shall resolve all 'name@GOT' references to their correct runtime memory addresses with 100% accuracy within 50 microseconds per reference during dynamic loading.
3. The LoadELF module shall correctly parse and load ELF executables specifying '/
Added 6 rows for 2004 - watcom.txt


Processing files:  10%|█         | 8/79 [02:37<22:28, 19.00s/it]

File '2002 - evla back.txt' — vague sentences: 8
Sending prompt for file: 2002 - evla back.txt
Gemini response preview:
 1.  The Backend System shall achieve and maintain real-time processing such that its Mean Time To Recovery (MTTR) from hardware or software faults does not exceed [TBD] minutes, and automated fault detection shall report critical issues within [TBD] seconds.
2.  The system shall implement a Fast Fourier Transform (FFT) that operates on complex data, supports input sizes that are a power of two, an
Added 8 rows for 2002 - evla back.txt


Processing files:  11%|█▏        | 9/79 [02:48<19:34, 16.78s/it]

File '2005 - clarus high.txt' — vague sentences: 8
Sending prompt for file: 2005 - clarus high.txt
Gemini response preview:
 1. The Clarus system shall provide integrated surface transportation weather observations with existing observation data such that models utilizing this integrated data reduce the mean absolute error of atmospheric boundary layer and near-surface predictions by at least 15% compared to models utilizing existing observation data alone.
2. The Clarus system shall be capable of ingesting and processi
Added 8 rows for 2005 - clarus high.txt


Processing files:  13%|█▎        | 10/79 [03:10<20:57, 18.22s/it]

File '2009 - warc III.txt' — vague sentences: 7
Sending prompt for file: 2009 - warc III.txt
Gemini response preview:
 1.  The WARC Tools software shall comprise a core software library named "libwarc", a set of command-line tools for end-users, extensions to existing tools, and web applications that provide read-only access to WARC content.
2.  The WARC Tools and Search Tools software shall provide a WARC standard (ISO 28500:2017) compliant implementation, be released under an OSI-approved free software license, 
Added 7 rows for 2009 - warc III.txt


Processing files:  14%|█▍        | 11/79 [03:43<25:41, 22.67s/it]

File '2009 - video search.txt' — vague sentences: 5
Sending prompt for file: 2009 - video search.txt
Gemini response preview:
 1. The software shall incorporate a user feedback submission module, accessible to all authenticated users, enabling them to submit comments and suggestions on software functionality and usability. All submitted feedback shall be stored in a central database for review by authorized development personnel to inform product improvements.
2. The system shall provide a 'System Overview' module contain
Added 5 rows for 2009 - video search.txt


Processing files:  15%|█▌        | 12/79 [04:11<27:05, 24.26s/it]

File '2001 - hats.txt' — vague sentences: 6
Sending prompt for file: 2001 - hats.txt
Gemini response preview:
 1. The HATS GUI shall achieve a System Usability Scale (SUS) score of at least 75 for 90% of a representative user group. The HATS GUI shall be fully functional and render consistently across Windows 10 (Chrome 90+, Firefox 80+, Edge 90+), macOS 11+ (Safari 14+, Chrome 90+, Firefox 80+), and Ubuntu 22.04 (Firefox 80+, Chrome 90+). The HATS GUI shall provide features that enable experienced users t
Added 6 rows for 2001 - hats.txt


Processing files:  16%|█▋        | 13/79 [04:31<25:13, 22.94s/it]

File '2010 - gparted.txt' — vague sentences: 4
Sending prompt for file: 2010 - gparted.txt
Gemini response preview:
 1. The GParted GUI shall enable a user to successfully complete a partition resizing operation with no more than 5 distinct user interactions (e.g., clicks, drag-and-drop, text input) from the main application window.
2. The GParted software shall be developed using a modular architecture with well-defined APIs, enabling developers to modify, extend, and correct defects in specific components with
Added 4 rows for 2010 - gparted.txt


Processing files:  18%|█▊        | 14/79 [04:53<24:34, 22.69s/it]

File '2003 - agentmom.txt' — vague sentences: 2
Sending prompt for file: 2003 - agentmom.txt
Gemini response preview:
 1. The system shall provide functionality for authorized administrators to initiate and implement changes to the organizational structure. These reorganization activities, encompassing the addition, modification, or removal of organizational units and reporting lines, shall be fully applied and visible to all affected system users within [X] hours for a change involving up to [Y] organizational un
Added 2 rows for 2003 - agentmom.txt


Processing files:  19%|█▉        | 15/79 [05:15<23:48, 22.32s/it]

File '0000 - gamma j.txt' — vague sentences: 11
Sending prompt for file: 0000 - gamma j.txt
Gemini response preview:
 1.  The system shall allow a new store owner to complete the initial store setup, including product catalog import and payment gateway configuration, within 30 minutes, requiring no more than 10 manual configuration steps. The system shall enable a new store owner, after 1 hour of training, to process customer orders and manage inventory with an error rate of less than 2% for typical operations.
2
Added 11 rows for 0000 - gamma j.txt


Processing files:  20%|██        | 16/79 [05:48<26:56, 25.66s/it]

File '1995 - gemini.txt' — vague sentences: 74
Sending prompt for file: 1995 - gemini.txt
Gemini response preview:
 1.  The system shall implement a software protocol for command acknowledgements, where subsystems confirm receipt and successful processing of commands within [N] milliseconds.
2.  The Gemini Control System shall be designed to ensure that each subsystem operates with a maximum of [N] direct dependencies on other subsystems, and that system-level operations access subsystem functionality only thro
Added 74 rows for 1995 - gemini.txt


Processing files:  22%|██▏       | 17/79 [07:20<47:02, 45.52s/it]

File '2009 - peppol.txt' — vague sentences: 92
Sending prompt for file: 2009 - peppol.txt
Gemini response preview:
 1. For each of the four stages (pre VCDmapping, VCD simple package, VCD advanced package, and VCD network package), the system documentation shall include a vision statement, a use case description, major features, functional requirements, non-functional quality attributes, a statement of expected benefits for stakeholders, and a definition of scope, limitations, assumptions, and dependencies.
2. 
Added 92 rows for 2009 - peppol.txt


Processing files:  23%|██▎       | 18/79 [08:48<59:05, 58.12s/it]

File '2001 - esa.txt' — vague sentences: 1
Sending prompt for file: 2001 - esa.txt
Gemini response preview:
 1. The system shall ensure all generated notifications communicate a single, specific condition, contain no more than 15 words, and are comprehensible to a user with an 8th-grade reading level as measured by the Flesch-Kincaid Grade Level score.
Added 1 rows for 2001 - esa.txt


Processing files:  24%|██▍       | 19/79 [08:59<43:58, 43.97s/it]

File '2006 - stewards.txt' — vague sentences: 19
Sending prompt for file: 2006 - stewards.txt
Gemini response preview:
 1. The system shall provide technical and operational capabilities that fully satisfy all specified requirements for the CEAP Watershed Assessment Studies, the researchers at watershed sites, and external users, as defined in this specification document.
2. The system requirements, as specified in this document, shall be sufficiently detailed to enable a qualified system developer to define a comp
Added 19 rows for 2006 - stewards.txt


Processing files:  25%|██▌       | 20/79 [09:44<43:36, 44.35s/it]

File '0000 - inventory.txt' — vague sentences: 4
Sending prompt for file: 0000 - inventory.txt
Gemini response preview:
 1. The IUFA system shall require multi-factor authentication for all external access. All data transmitted between external users and the IUFA system shall be encrypted using TLS 1.2 or a higher version. The IUFA system shall maintain 99.9% availability for external users during working hours (08:00 to 17:00 local time, Monday to Friday).
2. The web interface shall utilize terminology consistent w
Added 4 rows for 0000 - inventory.txt


Processing files:  27%|██▋       | 21/79 [09:57<33:39, 34.82s/it]

File '2005 - znix.txt' — vague sentences: 3
Sending prompt for file: 2005 - znix.txt
Gemini response preview:
 1.  The specific requirements shall be documented at a level of detail that permits the unambiguous creation of detailed design specifications and enables the derivation of comprehensive test cases covering all stated functional and non-functional requirements.
2.  The software shall provide an Application Programming Interface (API) that enables a developer to integrate basic ZNIX awareness into 
Added 3 rows for 2005 - znix.txt


Processing files:  28%|██▊       | 22/79 [10:08<26:15, 27.65s/it]

File '2009 - inventory 2.0.txt' — vague sentences: 8
Sending prompt for file: 2009 - inventory 2.0.txt
Gemini response preview:
 1.  The system shall present, in a dedicated 'Use Case Overview' section, a high-level summary for each major functional workflow. This summary shall enable a project manager or business representative to correctly answer three pre-defined questions regarding the use case's primary goal, main actors, and main outcome within 5 minutes of review.
2.  The system shall provide a dedicated link from th
Added 8 rows for 2009 - inventory 2.0.txt


Processing files:  29%|██▉       | 23/79 [10:33<25:05, 26.89s/it]

File '2009 - library - 2.txt' — vague sentences: 6
Sending prompt for file: 2009 - library - 2.txt
Gemini response preview:
 1. The system shall enable system administrators to create report templates. These templates shall consistently generate reports that accurately reflect the specified criteria and data upon execution. The system shall make all created report templates accessible to authorized staff members. Authorized staff members shall be able to execute any available report template without modification, or mod
Added 6 rows for 2009 - library - 2.txt


Processing files:  30%|███       | 24/79 [10:50<21:58, 23.98s/it]

File '2009 - email.txt' — vague sentences: 4
Sending prompt for file: 2009 - email.txt
Gemini response preview:
 1. The system shall be fully operational for all state agencies by June 30, 2013, following a phased migration commencing July 1, 2010, which includes the complete decommissioning of all legacy state agency email systems.
2. The system shall provide disaster recovery capabilities ensuring that critical email services are restored within a Recovery Time Objective (RTO) of 4 hours and data loss is l
Added 4 rows for 2009 - email.txt


Processing files:  32%|███▏      | 25/79 [11:01<18:00, 20.01s/it]

File '2002 - sce api.txt' — vague sentences: 21
Sending prompt for file: 2002 - sce api.txt
Gemini response preview:
 1. The API shall support data transfer rates between simulation and emulation environments exceeding 10 Gigabits per second (Gbps). The API shall adhere to an industry-standard interface specification to ensure interoperability across diverse EDA customer and supplier toolchains.
2. The standard defined by this document shall be finalized within 12 months of its initial publication. The standard s
Added 21 rows for 2002 - sce api.txt


Processing files:  33%|███▎      | 26/79 [11:37<21:53, 24.78s/it]

File '2004 - watcom gui.txt' — vague sentences: 3
Sending prompt for file: 2004 - watcom gui.txt
Gemini response preview:
 1. The system shall: (a) maintain continuous operation for a minimum of 72 hours without unscheduled restarts; (b) be available under a license that permits usage without direct monetary cost; (c) respond to 95% of user interface interactions within 200 milliseconds; (d) include comprehensive documentation for installation, user operations, and API usage; (e) support installation and full function
Added 3 rows for 2004 - watcom gui.txt


Processing files:  34%|███▍      | 27/79 [11:53<19:19, 22.30s/it]

File '2000 - nasa x38.txt' — vague sentences: 5
Sending prompt for file: 2000 - nasa x38.txt
Gemini response preview:
 1. Pipes shall provide a data throughput of at least 1 Gbps and a data latency of no more than 100 microseconds for data transfer between or within virtual groups.
2. The utility timer shall [SRS247] have an accuracy of no more than 50 PPM.
3. The utility timer shall [SRS256] have a resolution of 1 nanosecond or finer.
4. Pipes shall provide a data throughput of at least 1 Gbps and a data latency 
Added 5 rows for 2000 - nasa x38.txt


Processing files:  35%|███▌      | 28/79 [12:03<15:49, 18.61s/it]

File '2004 - jse.txt' — vague sentences: 4
Sending prompt for file: 2004 - jse.txt
Gemini response preview:
 1.  The system shall reduce the average bid-ask spread for the top 20 most traded Money Market instruments by 15% within 12 months of deployment.
2.  The system shall enable the execution of secondary market trades for Money Market instruments with an average latency of less than 200 milliseconds from order submission to trade confirmation.
3.  The system shall reduce the average all-in transactio
Added 4 rows for 2004 - jse.txt


Processing files:  37%|███▋      | 29/79 [12:22<15:42, 18.85s/it]

File '2010 - home 1.3.txt' — vague sentences: 4
Sending prompt for file: 2010 - home 1.3.txt
Gemini response preview:
 1. The DigitalHome system's web interface shall enable a user with prior experience in general web operations (e.g., logging in, logging out, browsing web pages, and submitting information and requests) to successfully complete each of these tasks within 30 seconds per task on first attempt, without requiring system-specific training.
2. The DigitalHome system shall be designed and implemented to 
Added 4 rows for 2010 - home 1.3.txt


Processing files:  38%|███▊      | 30/79 [12:40<15:10, 18.59s/it]

File '1999 - dii.txt' — vague sentences: 4
Sending prompt for file: 1999 - dii.txt
Gemini response preview:
 1. The system shall consume XML 1.0 and XML Schema 1.0 documents, utilizing both DOM and SAX parsing models, and shall validate these documents against W3C XML 1.0/1.1 Schema specifications and well-formedness rules. The system shall enable programmatic interrogation of document content via XPath 1.0, extract data views based on query results, and render these into user-defined tree structures or 
Added 4 rows for 1999 - dii.txt


Processing files:  39%|███▉      | 31/79 [13:05<16:22, 20.48s/it]

File '2005 - microcare.txt' — vague sentences: 10
Sending prompt for file: 2005 - microcare.txt
Gemini response preview:
 1. The system shall be developed on the Oracle 9i platform, utilizing a Visual Basic (VB) front-end and Crystal Reports 9 for reporting. The system's transaction screens shall incorporate drop-down selection menus for all fields populated from master tables, enabling users to select predefined values and reducing data entry errors for these fields to a rate of less than 1% during standard operatio
Added 10 rows for 2005 - microcare.txt


Processing files:  41%|████      | 32/79 [13:33<17:48, 22.74s/it]

File '2010 - mashboot.txt' — vague sentences: 3
Sending prompt for file: 2010 - mashboot.txt
Gemini response preview:
 1.  The system shall centralize customer interaction management and reputation monitoring across a minimum of 5 specified communication channels (e.g., email, social media platforms, chat applications), enabling the consistent application of customer service policies and reputation management strategies, and ensuring that 90% of customer inquiries receive a response within 240 minutes.
2.  The sof
Added 3 rows for 2010 - mashboot.txt


Processing files:  42%|████▏     | 33/79 [13:53<16:48, 21.92s/it]

File '2007 - ertms.txt' — vague sentences: 0
File '2008 - virtual ed.txt' — vague sentences: 9
Sending prompt for file: 2008 - virtual ed.txt
Gemini response preview:
 1. The System shall provide a communication platform for professors and students, encompassing email, group chat, and bulletin board functionalities. The System shall incorporate real-time audio and video communication tools. All transmitted and stored data within the platform shall be secured in accordance with the university's IT security policy (e.g., NIST SP 800-53).
2. The System shall provid
Added 9 rows for 2008 - virtual ed.txt


Processing files:  44%|████▍     | 35/79 [14:19<13:02, 17.78s/it]

File '2007 - central trading system.txt' — vague sentences: 1
Sending prompt for file: 2007 - central trading system.txt
Gemini response preview:
 1. The CTS shall enable terminal users to complete any standard operational task requiring user interaction within a maximum of three (3) distinct mouse clicks or five (5) distinct keyboard inputs, measured from the task's initial interface screen to completion.
Added 1 rows for 2007 - central trading system.txt


Processing files:  46%|████▌     | 36/79 [14:30<11:24, 15.92s/it]

File '2010 - split merge.txt' — vague sentences: 10
Sending prompt for file: 2010 - split merge.txt
Gemini response preview:
 1. The system shall support the processing of Portable Document Format (PDF) files utilizing industry-standard security features, such as AES 256-bit encryption, to ensure data confidentiality. The system shall maintain the structural integrity and fidelity of processed PDF files, ensuring compatibility with ISO 32000-1 compliant readers.
2. The software must be distributed under the GNU General P
Added 10 rows for 2010 - split merge.txt


Processing files:  47%|████▋     | 37/79 [15:00<13:53, 19.83s/it]

File '2009 - peazip.txt' — vague sentences: 12
Sending prompt for file: 2009 - peazip.txt
Gemini response preview:
 1. The system shall provide the following core functions: creating, updating, and extracting content from compressed archives; file and archive management tools including file copy with data integrity verification; file splitting and joining; both a fast file deletion (unlinking file entries) and a secure file deletion (overwriting file data at least 3 times with specified patterns to prevent reco
Added 12 rows for 2009 - peazip.txt


Processing files:  48%|████▊     | 38/79 [15:46<18:21, 26.87s/it]

File '2007 - mdot.txt' — vague sentences: 12
Sending prompt for file: 2007 - mdot.txt
Gemini response preview:
 1.  The system shall generate summary reports and interactive visualizations of Vehicle-Infrastructure Integration (VII) infrastructure and test bed data. These materials shall be presented on a publicly accessible web portal, designed to support outreach programs, and shall increase the documented engagement with VII information (e.g., unique page views or download counts) by a minimum of 10% qua
Added 12 rows for 2007 - mdot.txt


Processing files:  49%|████▉     | 39/79 [16:13<17:55, 26.89s/it]

File '1998 - themas.txt' — vague sentences: 1
Sending prompt for file: 1998 - themas.txt
Gemini response preview:
 1. The system shall provide THEMAS System Supervisors with capabilities to configure heating and cooling system parameters, monitor real-time operational status, and access historical energy consumption data. These capabilities shall enable supervisors to ensure that indoor temperatures are maintained within ±1.0°C of their configured setpoints and that the system's average daily energy consumptio
Added 1 rows for 1998 - themas.txt


Processing files:  51%|█████     | 40/79 [16:22<14:10, 21.82s/it]

File '2006 - eirene sys 15.txt' — vague sentences: 6
Sending prompt for file: 2006 - eirene sys 15.txt
Gemini response preview:
 1. The system shall implement the GSM eMLPP (Enhanced Multi-Level Precedence and Pre-emption) specification. This implementation shall ensure that emergency group calls achieve a call setup time of less than 2 seconds and pre-emption of lower priority calls is completed within 500 milliseconds.
2. The system shall ensure that the alerting duration for Party A, when receiving an incoming call from 
Added 6 rows for 2006 - eirene sys 15.txt


Processing files:  52%|█████▏    | 41/79 [16:35<12:07, 19.14s/it]

File '2001 - space fractions.txt' — vague sentences: 7
Sending prompt for file: 2001 - space fractions.txt
Gemini response preview:
 1.  The Space Fractions system shall provide interactive exercises and educational modules designed to enable sixth-grade students to achieve a minimum 10% increase in their average score on a standardized fraction proficiency assessment after 10 hours of system usage.
2.  The system shall present a multi-chapter narrative storyline that unfolds as the user completes fraction-solving challenges, a
Added 7 rows for 2001 - space fractions.txt


Processing files:  53%|█████▎    | 42/79 [16:53<11:37, 18.86s/it]

File '2005 - phin.txt' — vague sentences: 4
Sending prompt for file: 2005 - phin.txt
Gemini response preview:
 1. The system shall allow authorized administrators to configure the parameters for outbreak investigation workflows, including data fields, notification triggers, and reporting templates, without requiring software redeployment or code modification.
2. The system shall allow users to capture specimen/sample identifier data by scanning 1D or 2D barcodes. The system shall automatically populate rel
Added 4 rows for 2005 - phin.txt


Processing files:  54%|█████▍    | 43/79 [17:07<10:23, 17.33s/it]

File '2004 - e-procurement.txt' — vague sentences: 0
File '0000 - cctns.txt' — vague sentences: 14
Sending prompt for file: 0000 - cctns.txt
Gemini response preview:
 1. CCTNS V1.0 shall include dedicated functional modules to support police personnel in the execution of tasks relevant to 'Investigation of Crime' and 'Detection of Criminals'.
2. The System shall provide a reporting interface that enables police personnel to generate standard monthly operational reports and Right to Information (RTI) related reports. Each report generation shall require a maximu
Added 14 rows for 0000 - cctns.txt


Processing files:  57%|█████▋    | 45/79 [17:48<10:44, 18.96s/it]

File '1999 - multi-mahjong.txt' — vague sentences: 1
Sending prompt for file: 1999 - multi-mahjong.txt
Gemini response preview:
 1. The documentation for the MultiMahjongClient and MultiMahjongServer applications shall enable 90% of target users (as defined in Section X.Y of this SRS) to successfully perform all primary application functions without additional verbal or written instruction, following an initial reading period of no more than 30 minutes.
Added 1 rows for 1999 - multi-mahjong.txt


Processing files:  58%|█████▊    | 46/79 [17:55<08:45, 15.93s/it]

File '2004 - ijis.txt' — vague sentences: 2
Sending prompt for file: 2004 - ijis.txt
Gemini response preview:
 1. The IJIS system shall provide capabilities that measurably improve the effectiveness of justice administration in Tarrant County by achieving a quantifiable reduction in average end-to-end processing times for common justice workflows by [X]% and enhancing data accuracy across all participating justice entities to [Y]%.
2. The system shall provide the Office of Attorney Appointments with capabi
Added 2 rows for 2004 - ijis.txt


Processing files:  59%|█████▉    | 47/79 [18:15<09:06, 17.08s/it]

File '2009 - gaia.txt' — vague sentences: 1
Sending prompt for file: 2009 - gaia.txt
Gemini response preview:
 1. The module shall provide a common Application Programming Interface (API) for retrieving configuration values. This API shall allow retrieval of any configuration value using a single function call and shall complete retrieval within 50 milliseconds for 95% of requests.
Added 1 rows for 2009 - gaia.txt


Processing files:  61%|██████    | 48/79 [18:23<07:32, 14.59s/it]

File '2002 - evla corr.txt' — vague sentences: 11
Sending prompt for file: 2002 - evla corr.txt
Gemini response preview:
 1.  The system shall provide a dedicated diagnostic mode or interface, accessible to authorized personnel, enabling full read/write access to system parameters and logs within 10 seconds of login, for the purpose of testing and debugging.
2.  The system shall provide the specified data sets (e.g., state counts, auto correlations) to the Backend Data Processing System over a secondary virtual netwo
Added 11 rows for 2002 - evla corr.txt


Processing files:  62%|██████▏   | 49/79 [18:54<09:33, 19.12s/it]

File '2007 - puget sound.txt' — vague sentences: 11
Sending prompt for file: 2007 - puget sound.txt
Gemini response preview:
 1. The system shall allow actors to download their voice clips in at least three industry-standard audio formats, including MP3, WAV, and FLAC.
2. The system shall provide an interface such that a new user can complete a primary task within 10 minutes without prior training. The system shall render all user interface screens within 2 seconds under peak load conditions.
3. The system shall allow ac
Added 11 rows for 2007 - puget sound.txt


Processing files:  63%|██████▎   | 50/79 [19:06<08:14, 17.04s/it]

File '2008 - peering.txt' — vague sentences: 8
Sending prompt for file: 2008 - peering.txt
Gemini response preview:
 1. The CDN system shall dynamically adjust its resource allocation to support a peak load of 1.5 Tbps while ensuring hardware acquisition costs do not exceed $0.003 per GB-hour of data delivered, and demonstrate the ability to scale its content delivery capacity by 25% within 15 minutes to meet demands for increased geographical coverage, ensuring 98% of users experience less than 75ms latency.
2.
Added 8 rows for 2008 - peering.txt


Processing files:  65%|██████▍   | 51/79 [19:39<10:08, 21.74s/it]

File '2011 - opensg 0.1.txt' — vague sentences: 0
File '2008 - viper.txt' — vague sentences: 4
Sending prompt for file: 2008 - viper.txt
Gemini response preview:
 1. The system shall modify the format of all system-generated reports to align with the Organization's Document Style Guide v2.0.
2. The system shall facilitate the delivery of Ejada company products (IT products, Business Consultation, and IT services) such that the average time from customer request submission to fulfillment completion does not exceed 48 business hours.
3. The system shall provi
Added 4 rows for 2008 - viper.txt


Processing files:  67%|██████▋   | 53/79 [19:58<07:02, 16.24s/it]

File '2001 - npac.txt' — vague sentences: 22
Sending prompt for file: 2001 - npac.txt
Gemini response preview:
 1. The NPAC shall represent Efficient Data Representation (EDR) using 1K Blocks for data exchange within the NPAC, over the SOA to NPAC interface, and over the NPAC to LSMS interface.
2. The NPAC Customer Data Model shall include an 'NPAC Customer LSMS EDR Indicator' field, defined as a Boolean, to specify if the Service Provider utilizes Efficient Data Representation on their Local SMS (TRUE for 
Added 22 rows for 2001 - npac.txt


Processing files:  68%|██████▊   | 54/79 [20:29<08:13, 19.74s/it]

File '2007 - nde.txt' — vague sentences: 10
Sending prompt for file: 2007 - nde.txt
Gemini response preview:
 1. The system shall ensure that unscheduled service interruptions do not exceed 5 minutes per incident and total no more than 30 minutes per calendar month.
2. The Contractor shall, within 6 months of contract award and prior to the commencement of coding activities, provide a cost estimate for a segregated test environment that replicates 95% of the production system's functional and data interfa
Added 10 rows for 2007 - nde.txt


Processing files:  70%|██████▉   | 55/79 [21:00<09:01, 22.57s/it]

File '2010 - blit draft.txt' — vague sentences: 3
Sending prompt for file: 2010 - blit draft.txt
Gemini response preview:
 1.  The new Laboratory Information System (LIS) shall achieve an average transaction response time of less than 2 seconds for 95% of critical user interactions (e.g., order entry, result retrieval, report generation) under typical load conditions.
2.  The system shall maintain an operational availability of 99.9% over any 30-day period, and shall implement security and confidentiality controls for
Added 3 rows for 2010 - blit draft.txt


Processing files:  71%|███████   | 56/79 [21:22<08:36, 22.48s/it]

File '2007 - water use.txt' — vague sentences: 16
Sending prompt for file: 2007 - water use.txt
Gemini response preview:
 1. The IBM DB2 data server shall support a minimum of 200 concurrent users and 500 transactions per second, with an average response time not exceeding 2 seconds for 90% of requests, and maintain CPU utilization below 70% and memory utilization below 80% under peak load.
2. The system shall support a minimum of 250 concurrent users and 750 transactions per second, while maintaining an average resp
Added 16 rows for 2007 - water use.txt


Processing files:  72%|███████▏  | 57/79 [21:48<08:39, 23.59s/it]

File '2001 - elsfork.txt' — vague sentences: 11
Sending prompt for file: 2001 - elsfork.txt
Gemini response preview:
 1. The system shall perform CRC check calculations for supported protocols. For protocols requiring CRC-16, the system shall complete the calculation for a 1 KB data block within 5 milliseconds. For protocols requiring CRC-32, the system shall complete the calculation for a 1 KB data block within 10 milliseconds.
2. The system shall ensure that the integration of a single wind turbine or a wind tu
Added 11 rows for 2001 - elsfork.txt


Processing files:  73%|███████▎  | 58/79 [22:09<07:57, 22.76s/it]

File '2007 - e-store.txt' — vague sentences: 3
Sending prompt for file: 2007 - e-store.txt
Gemini response preview:
 1. The system shall align its user interface and workflows with documented primary usage scenarios, enabling first-time users to successfully complete a typical core task (e.g., 'registering an account') within a maximum of 5 steps. The system's architecture shall support the integration of future feature enhancements, equivalent in complexity to adding a new search filter, with an estimated devel
Added 3 rows for 2007 - e-store.txt


Processing files:  75%|███████▍  | 59/79 [22:40<08:24, 25.21s/it]

File '2003 - qheadache.txt' — vague sentences: 4
Sending prompt for file: 2003 - qheadache.txt
Gemini response preview:
 1. The system shall be usable by individuals aged 8 and older who possess basic computer literacy, including the ability to operate a mouse for point-and-click actions and to use a keyboard for minimal text input.
2. The system shall not display the 'Simple Finish Window' unless the game's winning condition has been achieved.
3. The 'Simple Finish Window' shall display the text string "You win !" 
Added 4 rows for 2003 - qheadache.txt


Processing files:  76%|███████▌  | 60/79 [23:08<08:11, 25.85s/it]

File '2004 - grid bgc.txt' — vague sentences: 2
Sending prompt for file: 2004 - grid bgc.txt
Gemini response preview:
 1. The system shall encrypt all communications with remote computing resources using TLS 1.2 or higher with a minimum of AES-256 encryption. The system shall authenticate remote computing resources using X.509 certificates. The system shall maintain a communication availability of 99.9% for remote computing resource interactions over any 30-day period.
2. The software must transmit all data exchan
Added 2 rows for 2004 - grid bgc.txt


Processing files:  77%|███████▋  | 61/79 [23:16<06:14, 20.81s/it]

File '2009 - library.txt' — vague sentences: 4
Sending prompt for file: 2009 - library.txt
Gemini response preview:
 1.  The system shall provide authenticated administrators with an interface to view and modify all email configuration parameters, including masquerading settings, log retention periods, and bounce management rules. The system shall display a menu of commonly modified email configuration settings, enabling an authenticated administrator to navigate to and initiate modification of any listed settin
Added 4 rows for 2009 - library.txt


Processing files:  78%|███████▊  | 62/79 [23:26<04:56, 17.41s/it]

File '2009 - model manager.txt' — vague sentences: 1
Sending prompt for file: 2009 - model manager.txt
Gemini response preview:
 1. The system shall provide MM users at the ranges with a minimum of five distinct pre-configured post-processing options. Each option shall define specific settings for data filtering, data transformation, and visualization.
Added 1 rows for 2009 - model manager.txt


Processing files:  80%|███████▉  | 63/79 [23:37<04:07, 15.44s/it]

File '2008 - caiso.txt' — vague sentences: 5
Sending prompt for file: 2008 - caiso.txt
Gemini response preview:
 1.  The Black Start generator shall be capable of self-starting and reaching stable output within [X] minutes without reliance on external electrical power sources. During operation, the generator shall maintain voltage within [Y]% of its nominal value and frequency within [Z] Hz of its nominal value while energizing isolated transmission facilities and auxiliary loads of other generators.
2.  The
Added 5 rows for 2008 - caiso.txt


Processing files:  81%|████████  | 64/79 [23:57<04:13, 16.90s/it]

File '2007 - nlm.txt' — vague sentences: 15
Sending prompt for file: 2007 - nlm.txt
Gemini response preview:
 1.  The system shall provide an infrastructure that ensures 99.999% data integrity, supports a minimum of 5 petabytes of digital content, and manages 20 distinct digital formats. The system shall maintain 99.9% availability for content management and preservation services.
2.  The repository shall ensure 99.9999% data integrity for all preserved digital content not covered by PMC and the videocast
Added 15 rows for 2007 - nlm.txt


Processing files:  82%|████████▏ | 65/79 [24:22<04:29, 19.28s/it]

File '2005 - nenios.txt' — vague sentences: 12
Sending prompt for file: 2005 - nenios.txt
Gemini response preview:
 1. The system shall provide role-based filters for requirements, allowing stakeholders to display only requirements relevant to their assigned responsibilities and reducing the time to find specific requirements by 50%.
2. The software shall automate at least 70% of the manual tasks for class scheduling, resource allocation, and attendance tracking, thereby reducing the average operational workflo
Added 12 rows for 2005 - nenios.txt


Processing files:  84%|████████▎ | 66/79 [24:37<03:53, 17.94s/it]

File '2005 - triangle.txt' — vague sentences: 7
Sending prompt for file: 2005 - triangle.txt
Gemini response preview:
 1. The system shall not require external databases for installation. The installation process shall complete within 5 minutes on a target machine without requiring user interaction after initial execution.
2. The system shall provide a graphical user interface that supports both mouse and keyboard input. 90% of new users shall be able to complete a predefined set of five core tasks within 5 minute
Added 7 rows for 2005 - triangle.txt


Processing files:  85%|████████▍ | 67/79 [24:58<03:46, 18.87s/it]

File '1999 - tcs.txt' — vague sentences: 34
Sending prompt for file: 1999 - tcs.txt
Gemini response preview:
 1.  The TCS shall present visual and auditory cues to the operator enabling the accurate implementation and monitoring of automatic launch and recovery procedures within 5 seconds of a status change. The TCS shall present distinct visual and auditory cues for initiating abort procedures within 2 seconds of detecting a critical anomaly.
2.  The TCS shall provide real-time visual and auditory flight
Added 34 rows for 1999 - tcs.txt


Processing files:  86%|████████▌ | 68/79 [25:43<04:56, 26.94s/it]

File '2007 - eirene fun 7.txt' — vague sentences: 9
Sending prompt for file: 2007 - eirene fun 7.txt
Gemini response preview:
 1. The batteries used in the mobile equipment shall provide a minimum of 8 hours of continuous operation under a duty cycle of 10% transmit, 10% receive, and 80% standby.
2. Using no more than three MMI actions, the system shall allow the driver to view a prioritised list of all authorised mobile radio networks.
3. When presented with the network list, the system shall allow the driver to select t
Added 9 rows for 2007 - eirene fun 7.txt


Processing files:  87%|████████▋ | 69/79 [26:03<04:07, 24.71s/it]

File '2005 - clarus low.txt' — vague sentences: 15
Sending prompt for file: 2005 - clarus low.txt
Gemini response preview:
 1. The system shall integrate surface transportation weather observations with existing observation data. The integrated data shall enable atmospheric models to achieve a quantifiable reduction in prediction error for the atmospheric boundary layer and near the earth's surface (e.g., a X% reduction in mean absolute error for Y-hour forecasts) and shall provide the necessary data for the creation o
Added 15 rows for 2005 - clarus low.txt


Processing files:  89%|████████▊ | 70/79 [26:30<03:48, 25.38s/it]

File '2001 - beyond.txt' — vague sentences: 29
Sending prompt for file: 2001 - beyond.txt
Gemini response preview:
 1.  The system shall provide development tools that reduce the average time required to add a new rule to the speech system's rule database by 30% compared to the existing manual process. The tools shall enable developers to define new rules using a domain-specific language at an abstraction level that requires no direct modification of underlying code.
2.  The system shall implement context engin
Added 29 rows for 2001 - beyond.txt


Processing files:  90%|████████▉ | 71/79 [27:05<03:46, 28.35s/it]

File '2004 - sprat.txt' — vague sentences: 8
Sending prompt for file: 2004 - sprat.txt
Gemini response preview:
 1.  The system shall enable users to define, modify, and apply conditions based on attributes such as ID, keywords, taxonomy, subject, actor, and occurrences.
2.  The system shall store user passwords in the database using a one-way cryptographic hash function (e.g., PBKDF2 with SHA-256) with a unique salt for each password.
3.  The system shall store all user passwords in the database using a one
Added 8 rows for 2004 - sprat.txt


Processing files:  91%|█████████ | 72/79 [27:21<02:51, 24.47s/it]

File '2004 - rlcs.txt' — vague sentences: 10
Sending prompt for file: 2004 - rlcs.txt
Gemini response preview:
 1.  The requirements in this section shall be complete, unambiguous, and verifiable, enabling designers to develop the system without further clarification and acceptance testers to create comprehensive test cases for system validation.
2.  The controllers at the FCU and DCU shall be replaced with units that meet or exceed the functional and performance specifications of the Department of Transpor
Added 10 rows for 2004 - rlcs.txt


Processing files:  92%|█████████▏| 73/79 [27:51<02:37, 26.28s/it]

File '2010 - fishing.txt' — vague sentences: 0
File '2005 - pontis.txt' — vague sentences: 21
Sending prompt for file: 2005 - pontis.txt
Gemini response preview:
 1. The system shall provide a web application, an optimized database architecture for transactional and analytical processing, and mechanisms for users to duplicate and modify existing reports and forms. The user interface shall provide customization options for display elements (e.g., column order, sorting, filtering) and shall support the integration of new modules and UI extensions without requ
Added 21 rows for 2005 - pontis.txt


Processing files:  95%|█████████▍| 75/79 [28:43<01:44, 26.14s/it]

File '2001 - libra.txt' — vague sentences: 7
Sending prompt for file: 2001 - libra.txt
Gemini response preview:
 1. The system shall provide job management and scheduling capabilities for high-performance computing clusters, processing at least 1,000 jobs per hour with an average job submission latency under 500ms, and shall integrate with external cluster management software and job schedulers through documented APIs.
2. The system shall transparently distribute computational jobs across available cluster n
Added 7 rows for 2001 - libra.txt


Processing files:  96%|█████████▌| 76/79 [29:16<01:23, 27.82s/it]

File '2008 - vub.txt' — vague sentences: 6
Sending prompt for file: 2008 - vub.txt
Gemini response preview:
 1. The system design shall ensure that 100% of its components are directly traceable to one or more specified software requirements. The system shall be implemented such that its compliance with all specified software requirements is objectively verifiable through testing.
2. The PMS software shall be fully installable within 15 minutes by an experienced administrator. The installation process sha
Added 6 rows for 2008 - vub.txt


Processing files:  97%|█████████▋| 77/79 [29:51<00:59, 29.69s/it]

File '2001 - telescope.txt' — vague sentences: 11
Sending prompt for file: 2001 - telescope.txt
Gemini response preview:
 1. The system shall achieve a spatial resolution of 0.25 pixels through the utilization of a centroid algorithm.
2. The system shall implement an on/off control algorithm with a configurable hysteresis value, settable in increments of 0.1 units from 0.5 to 5.0 units.
3. The software shall transition to fast timing mode when one of the following conditions is met: (a) the spacecraft's pointing erro
Added 11 rows for 2001 - telescope.txt


Processing files:  99%|█████████▊| 78/79 [30:19<00:29, 29.20s/it]

File '2003 - tachonet.txt' — vague sentences: 8
Sending prompt for file: 2003 - tachonet.txt
Gemini response preview:
 1. The interface specification document shall define all necessary protocols, port numbers, logical addresses, data formats, and communication sequences required for software development and verification against interface requirements.
2. The system shall allow an authorized administrator to add a new network member within 5 minutes, requiring no more than 3 distinct user interface interactions, a
Added 8 rows for 2003 - tachonet.txt


Processing files: 100%|██████████| 79/79 [30:40<00:00, 23.30s/it]

File-wise detection & rewriting complete!
Results saved to: /content/drive/MyDrive/NLP_ASG/Outputs/Vague_Rewrites.csv





In [None]:
from itertools import chain

# Load generated output
CSV_PATH = "/content/drive/MyDrive/NLP_ASG/Outputs/Vague_Rewrites.csv"
df = pd.read_csv(CSV_PATH)

terms_split = df["vague_term(s)"].dropna().apply(lambda x: [t.strip().lower() for t in x.split(",")])

# Flatten list of lists → single list of all vague terms
all_terms = list(chain.from_iterable(terms_split))

# Count frequency of each unique vague term
term_freq = pd.Series(all_terms).value_counts().reset_index()
term_freq.columns = ["Type of Bad Smell(Vague Term)", "Frequency"]

# printing first 15 rows of output
print(term_freq.head(15))

Summary_CSV = "/content/drive/MyDrive/NLP_ASG/Outputs/Summary.csv"
term_freq.to_csv(Summary_CSV, index=False, encoding="utf-8")

print(f"Per-term frequency table saved to {Summary_CSV}")