Gemini Prompt 1


In [None]:
!pip install git+https://github.com/microsoft/autogen.git@v0.2.25

Collecting git+https://github.com/microsoft/autogen.git@v0.2.25
  Cloning https://github.com/microsoft/autogen.git (to revision v0.2.25) to /tmp/pip-req-build-c7_kq7rt
  Running command git clone --filter=blob:none --quiet https://github.com/microsoft/autogen.git /tmp/pip-req-build-c7_kq7rt
  Running command git checkout -q 4ab8a884870f4aeafe3587c56169bb094061af5b
  Encountered 1 file(s) that should have been pointers, but weren't:
        website/static/img/gallery/autotx.png
  Resolved https://github.com/microsoft/autogen.git to commit 4ab8a884870f4aeafe3587c56169bb094061af5b
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache (from pyautogen==0.2.25)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting flaml (from pyautogen==0.2.25)
  Downloading FLAML-2.3.3-py3-none-any.whl.metadata (16 kB)
Collecting python-dotenv (from p

In [None]:
import os

api_key = os.environ.get("GEMINI_API_KEY")


In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve your API key securely stored as a secret
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

MAX_USER_REPLIES = 5
MAX_RETRIES = 5  # Maximum number of retries
RETRY_DELAY = 30  # Delay in seconds for retries due to rate limit
PROCESS_DELAY = 15  # Delay in seconds between each prompt
output_file = '/content/Requirement_data_part1_gemini_p1.csv'  # Path to save results

# Load statements from CSV
csv_path = '/content/Requirement_data_part1.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define your model configuration
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"The following error happened: {str(e)}")
    exit()

# Define the prompt format with specific instructions
BASE_PROMPT_TEMPLATE = (
    "Identify the type of requirement for the following statement:\n\n"
    "'{}'\n\n"
    "Provide only the best option from the following categories:\n"
    "Functional (F), Availability (A), Fault Tolerance (FT), Legal (L), Look & Feel (LF), "
    "Maintainability (MN), Operational (O), Performance (PE), Portability (PO), Scalability (SC), "
    "Security (SE), Usability (US), Non-Functional Requirement (NFR)"
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)

    while retries < MAX_RETRIES:
        try:
            # Start the chat between the user proxy and the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Collect only the last assistant response from the chat history
            response_text = chat_response.chat_history[-1]['content'].strip()

            # Store the response in the "Response" column
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save progress to CSV after each statement
            df.to_csv(output_file, index=False)

            # Wait for 30 seconds before processing the next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit loop if successful

        except RuntimeError as e:
            if '429' in str(e):
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"An unexpected error occurred for statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")


user_proxy (to assistant):

Identify the type of requirement for the following statement:

'The system shall refresh the display every 60 seconds.'

Provide only the best option from the following categories:
Functional (F), Availability (A), Fault Tolerance (FT), Legal (L), Look & Feel (LF), Maintainability (MN), Operational (O), Performance (PE), Portability (PO), Scalability (SC), Security (SE), Usability (US).

--------------------------------------------------------------------------------
assistant (to user_proxy):

This requirement specifies a behavior of the system, namely refreshing the display. This behavior directly relates to the functionality of the system. 

Therefore, the best option is **Functional (F)**.

**Explanation:**

* **Functional requirements** describe what a system should do. In this case, the system "shall refresh the display". 
* Other categories like Performance (PE) might seem relevant (e.g., how fast the refresh should be), but the core requirement is ab

**Prompt 2**

In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve your API key securely stored as a secret
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

MAX_USER_REPLIES = 5
MAX_RETRIES = 5  # Maximum number of retries
RETRY_DELAY = 30  # Delay in seconds for retries due to rate limit
PROCESS_DELAY = 15  # Delay in seconds between each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Path to save results

# Load statements from CSV
csv_path = '/content/Requirement_data_part2.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define your model configuration
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"The following error happened: {str(e)}")
    exit()

# Define the prompt format with specific instructions
BASE_PROMPT_TEMPLATE = (
    "Identify the type of requirement based on the provided statement:\n\n"
    "'{}'\n\n"
    "Evaluate the nature of the statement and determine the most relevant requirement type "
    "from the following options:\nFunctional (F), Availability (A), Fault Tolerance (FT), "
    "Legal (L), Look & Feel (LF), Maintainability (MN), Operational (O), Performance (PE), "
    "Portability (PO), Scalability (SC), Security (SE), Usability (US), Non-Functional Requirement (NFR).\n\n"
    "Select the best option that aligns with the characteristics of the statement and justify your choice briefly."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)

    while retries < MAX_RETRIES:
        try:
            # Start the chat between the user proxy and the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Collect only the last assistant response from the chat history
            response_text = chat_response.chat_history[-1]['content'].strip()

            # Store the response in the "Response" column
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save progress to CSV after each statement
            df.to_csv(output_file, index=False)

            # Wait for 15 seconds before processing the next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit loop if successful

        except RuntimeError as e:
            if '429' in str(e):
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"An unexpected error occurred for statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")


user_proxy (to assistant):

Identify the type of requirement based on the provided statement:

' The Disputes System must provide a confirmation to the user upon the creation of ticket retrieval request that contains the following information; the dispute case number  the type of retrieval requested (copy  original or portfolio)  and the date that the merchant response is due.'

Evaluate the nature of the statement and determine the most relevant requirement type from the following options:
Functional (F), Availability (A), Fault Tolerance (FT), Legal (L), Look & Feel (LF), Maintainability (MN), Operational (O), Performance (PE), Portability (PO), Scalability (SC), Security (SE), Usability (US).

Select the best option that aligns with the characteristics of the statement and justify your choice briefly.

--------------------------------------------------------------------------------
assistant (to user_proxy):

The requirement type is **Functional (F)**.

**Justification:**

The state

**Prompt3**

In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part2.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Define the prompt format using your specified prompt
BASE_PROMPT_TEMPLATE = (
    "Classify the statement '{}' into the following categories:\n\n"
    "* Functional (F)\n* Availability (A)\n* Fault Tolerance (FT)\n"
    "* Legal (L)\n* Look & Feel (LF)\n* Maintainability (MN)\n"
    "* Operational (O)\n* Performance (PE)\n* Portability (PO)\n"
    "* Scalability (SC)\n* Security (SE)\n* Usability (US)\n* Non-Functional Requirement (NFR)\n\n"
    "Please provide the most relevant category for the given statement."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)

    while retries < MAX_RETRIES:
        try:
            # Initiate chat with the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Retrieve and store the last assistant response
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except RuntimeError as e:
            if '429' in str(e):  # Rate limit error
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")


user_proxy (to assistant):

Identify the type of requirement based on the provided statement:

' The Disputes System must provide a confirmation to the user upon the creation of ticket retrieval request that contains the following information; the dispute case number  the type of retrieval requested (copy  original or portfolio)  and the date that the merchant response is due.'

Evaluate the nature of the statement and determine the most relevant requirement type from the following options:
Functional (F), Availability (A), Fault Tolerance (FT), Legal (L), Look & Feel (LF), Maintainability (MN), Operational (O), Performance (PE), Portability (PO), Scalability (SC), Security (SE), Usability (US).

Select the best option that aligns with the characteristics of the statement and justify your choice briefly.

--------------------------------------------------------------------------------
assistant (to user_proxy):

The requirement type is **Functional (F)**.

**Justification:**

The state

KeyboardInterrupt: 

**Prompt4**

In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part2.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Define the new simplified prompt format
BASE_PROMPT_TEMPLATE = (
    "Please analyze the following statement and identify the type of software requirement it represents:\n\n"
    "'{}'\n\n"
    "Provide only the type of requirement."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)

    while retries < MAX_RETRIES:
        try:
            # Initiate chat with the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Retrieve and store the last assistant response
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except RuntimeError as e:
            if '429' in str(e):  # Rate limit error
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")




user_proxy (to assistant):

Please analyze the following preprocessed statement and identify the type of software requirement it represents:

'the system shall refresh the display every 60 second'

Provide only the type of requirement.

--------------------------------------------------------------------------------
assistant (to user_proxy):

Functional requirement
TERMINATE


--------------------------------------------------------------------------------
Processed Statement 1/25: The system shall refresh the display every 60 seconds.
Preprocessed: the system shall refresh the display every 60 second
Response: Functional requirement
TERMINATE
----------------------------------------
user_proxy (to assistant):

Please analyze the following preprocessed statement and identify the type of software requirement it represents:

'the application shall match the color of the schema set forth by Department of Homeland Security'

Provide only the type of requirement.

-------------------------

KeyboardInterrupt: 

**Human-1**

In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part2.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Define the new prompt format
BASE_PROMPT_TEMPLATE = (
    "I want to know which software requirements this statement fulfills: '{}'.\n"
    "Classify it into one of the following categories:\n"
    "* Functional (F)\n"
    "* Availability (A)\n"
    "* Fault Tolerance (FT)\n"
    "* Legal (L)\n"
    "* Look & Feel (LF)\n"
    "* Maintainability (MN)\n"
    "* Operational (O)\n"
    "* Performance (PE)\n"
    "* Portability (PO)\n"
    "* Scalability (SC)\n"
    "* Security (SE)\n"
    "* Usability (US)\n"
    "* Non-Functional Requirement (NFR)\n"
    "Provide only the category code."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)

    while retries < MAX_RETRIES:
        try:
            # Initiate chat with the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Retrieve and store the last assistant response
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except RuntimeError as e:
            if '429' in str(e):  # Rate limit error
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")


**Human prompt2**

In [None]:
import time
import pandas as pd
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part1.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Define the prompt template with each statement from the dataset
BASE_PROMPT_TEMPLATE = (
    "“{}” Now write which software requirement category is this.' into the following categories:\n"
    "* Functional (F)\n"
    "* Availability (A)\n"
    "* Fault Tolerance (FT)\n"
    "* Legal (L)\n"
    "* Look & Feel (LF)\n"
    "* Maintainability (MN)\n"
    "* Operational (O)\n"
    "* Performance (PE)\n"
    "* Portability (PO)\n"
    "* Scalability (SC)\n"
    "* Security (SE)\n"
    "* Usability (US)\n"
    "* Non-Functional Requirement (NFR)\n"
    "Respond with only the category code."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    prompt = BASE_PROMPT_TEMPLATE.format(statement)  # Insert each statement from dataset

    while retries < MAX_RETRIES:
        try:
            # Initiate chat with the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Retrieve and store the last assistant response
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except RuntimeError as e:
            if '429' in str(e):  # Rate limit error
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")


user_proxy (to assistant):

“The system shall refresh the display every 60 seconds.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

--------------------------------------------------------------------------------




Unexpected error on statement 'The system shall refresh the display every 60 seconds.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The application shall match the color of the schema set forth by Department of Homeland Security” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

----------------



Unexpected error on statement 'The application shall match the color of the schema set forth by Department of Homeland Security': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“ If projected  the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)




Unexpected error on statement ' If projected  the data must be readable.  On a 10x10 projection screen  90% of viewers must be able to read Event / Activity data from a viewing distance of 30': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“ The product shall be available during normal business hours. As long as the user has access to the client PC  the system will be available 99% of the time during the first six months of operation.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainabi



Unexpected error on statement ' The product shall be available during normal business hours. As long as the user has access to the client PC  the system will be available 99% of the time during the first six months of operation.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“ If projected  the data must be understandable. On a 10x10 projection screen  90% of viewers must be able to determine that Events or Activities are occuring in current time from a viewing distance of 100” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)




Unexpected error on statement ' If projected  the data must be understandable. On a 10x10 projection screen  90% of viewers must be able to determine that Events or Activities are occuring in current time from a viewing distance of 100': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The product shall ensure that it can only be accessed by authorized users.  The product will be able to distinguish between authorized and unauthorized users in all access attempts” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Loo



Unexpected error on statement 'The product shall ensure that it can only be accessed by authorized users.  The product will be able to distinguish between authorized and unauthorized users in all access attempts': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The product shall be intuitive and self-explanatory.  ” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non



Unexpected error on statement 'The product shall be intuitive and self-explanatory.  ': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The product shall respond fast to keep up-to-date data in the display.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

------------------------------------------



Unexpected error on statement 'The product shall respond fast to keep up-to-date data in the display.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The system shall have a MDI form that allows for the viewing of the graph and the data table.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

---



Unexpected error on statement 'The system shall have a MDI form that allows for the viewing of the graph and the data table.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The system shall display Events in a vertical table by time.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

-------------



Unexpected error on statement 'The system shall display Events in a vertical table by time.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The system shall display the Events in a graph by time.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

---------------------------------------------------



Unexpected error on statement 'The system shall display the Events in a graph by time.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“All business rules specified in the Disputes System shall be in compliance to the guidelines of Regulation E and Regulation Z.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the c



Unexpected error on statement 'All business rules specified in the Disputes System shall be in compliance to the guidelines of Regulation E and Regulation Z.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes application must maintain a detailed history of every action that a user takes on a dispute case.  This ensures a complete audit trail if questions arise later on with regard to a particular dispute case.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Op



Unexpected error on statement 'The Disputes application must maintain a detailed history of every action that a user takes on a dispute case.  This ensures a complete audit trail if questions arise later on with regard to a particular dispute case.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“All actions that modify an existing dispute case must be recorded in the case history.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Porta



Unexpected error on statement 'All actions that modify an existing dispute case must be recorded in the case history.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System must be accessible by both internal and external users.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

-----



Unexpected error on statement 'The Disputes System must be accessible by both internal and external users.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System must prevent users from accessing any dispute cases that do not belong to their cardholder base.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with 



Unexpected error on statement 'The Disputes System must prevent users from accessing any dispute cases that do not belong to their cardholder base.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System will facilitate direct data entry of a dispute case via a user interface that supports real time responses to the users.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)




Unexpected error on statement 'The Disputes System will facilitate direct data entry of a dispute case via a user interface that supports real time responses to the users.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System must provide different levels of access with regard to disputes case initiation and follow-up actions.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security



Unexpected error on statement 'The Disputes System must provide different levels of access with regard to disputes case initiation and follow-up actions.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System shall provide view access capability for authorized users of the application.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Req



Unexpected error on statement 'The Disputes System shall provide view access capability for authorized users of the application.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System shall provide update access capability for authorized users of the application.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond 



Unexpected error on statement 'The Disputes System shall provide update access capability for authorized users of the application.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System must allow the users to select disputable transactions (based on the age of the transaction) from a user interface and initiate a dispute (ticket retrieval request or chargeback notification) on the selected transaction.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operat



Unexpected error on statement 'The Disputes System must allow the users to select disputable transactions (based on the age of the transaction) from a user interface and initiate a dispute (ticket retrieval request or chargeback notification) on the selected transaction.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System must provide the user the ability to initiate a single dispute case on multiple transactions that belong to a single merchant.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* L



Unexpected error on statement 'The Disputes System must provide the user the ability to initiate a single dispute case on multiple transactions that belong to a single merchant.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
user_proxy (to assistant):

“The Disputes System will provide the user the ability to create or initiate a ticket retrieval request.   As part of ticket retrieval creation process the system must prompt the user to enter all the required information to create the ticket retrieval request.  The ticket retrieval request is a document that is sent to merchant inquiring the validity of a transaction.” Now write which software requir



“The Disputes System must allow the user to create three unique types of ticket retrieval requests.  The three types of ticket retrieval requests are (1) Request for original receipt (2) Request for a copy of the receipt or (3) Request for a portfolio. A portfolio consists of documentation that would provide proof of a purchase such as the documentation that is received from a car rental agency that is more than a sales receipt.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

--------------------------------------------------------------------------------
Unexpected error on statement 'The Disputes System must allow the user to create three unique types of t



“The Disputes System must prevent external users from requesting original receipts. Requests for original receipts are restricted to internal users.” Now write which software requirement category is this.' into the following categories:
* Functional (F)
* Availability (A)
* Fault Tolerance (FT)
* Legal (L)
* Look & Feel (LF)
* Maintainability (MN)
* Operational (O)
* Performance (PE)
* Portability (PO)
* Scalability (SC)
* Security (SE)
* Usability (US)
* Non-Functional Requirement (NFR)
Respond with only the category code.

--------------------------------------------------------------------------------




Unexpected error on statement 'The Disputes System must prevent external users from requesting original receipts. Requests for original receipts are restricted to internal users.': Google GenAI exception occurred while calling Gemini API: 404 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-exp-0827:generateContent?%24alt=json%3Benum-encoding%3Dint: models/gemini-1.5-pro-exp-0827 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.
Processing COMPLETE


**New check**


In [None]:
import time
import pandas as pd
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Download required resources
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load dataset
csv_path = '/content/Requirement_data_part1.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

df['Response'] = ""

# Initialize preprocessing tools
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Lowercase conversion
    text = text.lower()
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Tokenization
    words = word_tokenize(text)
    # Lemmatization
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Step 1: Train LLM with the full dataset
training_prompt = "Learn from the following software requirement statements:\n\n"
training_prompt += "\n".join([preprocess_text(statement) for statement in statements])
training_prompt += "\n\nUnderstand the context and structure of these statements."

user_proxy.initiate_chat(assistant, message=training_prompt)
print("Training COMPLETE. Now processing new statements...")

# Define improved prompt format
BASE_PROMPT_TEMPLATE = (
    "Based on your learned knowledge, analyze the following preprocessed requirement statement and determine its category:\n\n"
    "'{}'\n\n"
    "Provide only the type of requirement."
)

# Step 2: Process each statement with preprocessing and model inference
for idx, statement in enumerate(statements):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = BASE_PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < MAX_RETRIES:
        try:
            chat_response = user_proxy.initiate_chat(assistant, message=prompt)
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text

            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Preprocessed: {preprocessed_statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save progress
            df.to_csv(output_file, index=False)
            time.sleep(PROCESS_DELAY)
            break
        except RuntimeError as e:
            if '429' in str(e):
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************


**Newone**

In [None]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import time
import pandas as pd
import spacy
from textblob import TextBlob
from autogen import AssistantAgent, UserProxyAgent
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Retrieve the securely stored API key
api_key = userdata.get('GEMINI_API_KEY')
if not api_key:
    raise ValueError("API key is invalid.")

# Define configuration constants
MAX_USER_REPLIES = 5
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part2_gemini_p1.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part1.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    # Fix spelling using TextBlob
    corrected_text = str(TextBlob(text).correct())
    # Lemmatization
    lemmatized_text = " ".join([token.lemma_ for token in doc if not token.is_punct])
    return lemmatized_text

# Define model configuration for the assistant agent
config_list_gemini = [
    {
        "model": "gemini-1.5-pro-exp-0827",
        "api_key": api_key,
        "api_type": "google"
    }
]

# Initialize the assistant agent with the Gemini model configuration
assistant = AssistantAgent(
    name="assistant",
    llm_config={
        "cache_seed": 41,
        "config_list": config_list_gemini,
        "seed": 42
    },
)

# Initialize the user proxy agent
try:
    user_proxy = UserProxyAgent(
        name="user_proxy",
        human_input_mode="NEVER",
        max_consecutive_auto_reply=MAX_USER_REPLIES,
        is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
        code_execution_config={
            "work_dir": "coding",
            "use_docker": False
        },
    )
except Exception as e:
    print(f"Initialization error: {str(e)}")
    exit()

# Define the new simplified prompt format
BASE_PROMPT_TEMPLATE = (
    "Please analyze the following preprocessed statement and identify the type of software requirement it represents:\n\n"
    "'{}'\n\n"
    "Provide only the type of requirement."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = BASE_PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < MAX_RETRIES:
        try:
            # Initiate chat with the assistant agent
            chat_response = user_proxy.initiate_chat(
                assistant,
                message=prompt,
            )

            # Retrieve and store the last assistant response
            response_text = chat_response.chat_history[-1]['content'].strip()
            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Preprocessed: {preprocessed_statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except RuntimeError as e:
            if '429' in str(e):  # Rate limit error
                retries += 1
                print(f"Rate limit hit. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
                time.sleep(RETRY_DELAY)
            else:
                print(f"Unexpected error on statement '{statement}': {str(e)}")
                break

print("Processing COMPLETE")




In [None]:
!pip install google-generativeai --upgrade



Gemini PP prompt 4

In [None]:
import time
import pandas as pd
import spacy
from textblob import TextBlob
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Retrieve the securely stored API key
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))

# Define configuration constants
MAX_RETRIES = 5
RETRY_DELAY = 30  # Seconds between retries if rate-limited
PROCESS_DELAY = 15  # Seconds between processing each prompt
output_file = '/content/Requirement_data_part10_gemini_new.csv'  # Output CSV path

# Load statements from the input CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
statements = df['Requirement'].tolist()

# Add a new "Response" column to store AI responses
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    # Fix spelling using TextBlob
    corrected_text = str(TextBlob(text).correct())
    # Lemmatization
    lemmatized_text = " ".join([token.lemma_ for token in doc if not token.is_punct])
    return lemmatized_text

# Initialize the Gemini model
model = genai.GenerativeModel(
    model_name="gemini-2.0-flash",
    generation_config={
        "temperature": 0.9,
        "top_p": 1,
        "max_output_tokens": 2048,
        "response_mime_type": "text/plain",
    }
)

# Define the new simplified prompt format
BASE_PROMPT_TEMPLATE = (
    "Please analyze the following preprocessed statement and identify the type of software requirement it represents:\n\n"
    "'{}'\n\n"
    "Provide only the type of requirement."
)

# Process each statement from the CSV file
for idx, statement in enumerate(statements):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = BASE_PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < MAX_RETRIES:
        try:
            # Start chat session
            chat_session = model.start_chat()
            response = chat_session.send_message(prompt)
            response_text = response.text.strip()

            df.at[idx, 'Response'] = response_text
            print(f"Processed Statement {idx + 1}/{len(statements)}: {statement}")
            print(f"Preprocessed: {preprocessed_statement}")
            print(f"Response: {response_text}")
            print("-" * 40)

            # Save the progress to CSV
            df.to_csv(output_file, index=False)

            # Delay before next prompt
            time.sleep(PROCESS_DELAY)
            break  # Exit retry loop upon successful processing

        except Exception as e:
            retries += 1
            print(f"Error processing statement '{statement}': {str(e)}. Retrying in {RETRY_DELAY} seconds... ({retries}/{MAX_RETRIES})")
            time.sleep(RETRY_DELAY)

print("Processing COMPLETE")




Processed Statement 1/25:  Users should only have to navigate through a maximum of 4 pages  to create an account with the website.
Preprocessed:   user should only have to navigate through a maximum of 4 page   to create an account with the website
Response: Usability requirement
----------------------------------------
Processed Statement 2/25: Users should be able to access their streaming movies in under 2 clicks after logging into the website.
Preprocessed: user should be able to access their stream movie in under 2 click after log into the website
Response: Usability Requirement
----------------------------------------
Processed Statement 3/25: An anonymous survey will show that 70% of customers are comfortable purchasing streaming movies after 2 weeks using the website.
Preprocessed: an anonymous survey will show that 70 of customer be comfortable purchase streaming movie after 2 week use the website
Response: Usability Requirement
----------------------------------------
Process

Gemini_preprocessed_prompt1

In [None]:
import time
import pandas as pd
import spacy
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Configure Gemini API
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

# Load statements from CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Prompt template
PROMPT_TEMPLATE = (
    "Identify the best category for the following requirement statement: '{}'\n"
    "Choose one from: F, A, FT, L, LF, MN, O, PE, PO, SC, SE, US, NFR."
)

# Process each statement
output_file = '/content/Requirement_data_part10_gemini_new.csv'
for idx, statement in enumerate(df['Requirement']):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < 5:
        try:
            response = model.start_chat().send_message(prompt).text.strip()
            df.at[idx, 'Response'] = response
            df.to_csv(output_file, index=False)
            print(f"Processed {idx+1}/{len(df)}: {response}")
            time.sleep(15)
            break
        except Exception as e:
            retries += 1
            print(f"Error: {e}. Retrying {retries}/5 in 30s...")
            time.sleep(30)

print("Processing COMPLETE")


Gemini pp prompt-2

In [None]:
import time
import pandas as pd
import spacy
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Configure Gemini API
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

# Load statements from CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Prompt template
PROMPT_TEMPLATE = (
    "Identify the type of requirement for the following statement: '{}'\n"
    "Evaluate its nature and determine the most relevant requirement type from the following options: F, A, FT, L, LF, MN, O, PE, PO, SC, SE, US, NFR.\n"
    "Select the best option that aligns with the characteristics of the statement and briefly justify your choice."
)

# Process each statement
output_file = '/content/Requirement_data_part10_gemini_new.csv'
for idx, statement in enumerate(df['Requirement']):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < 5:
        try:
            response = model.start_chat().send_message(prompt).text.strip()
            df.at[idx, 'Response'] = response
            df.to_csv(output_file, index=False)
            print(f"Processed {idx+1}/{len(df)}: {response}")
            time.sleep(15)
            break
        except Exception as e:
            retries += 1
            print(f"Error: {e}. Retrying {retries}/5 in 30s...")
            time.sleep(30)

print("Processing COMPLETE")


Gemini PP Prompt 3

In [None]:
import time
import pandas as pd
import spacy
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Configure Gemini API
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

# Load statements from CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Prompt template
PROMPT_TEMPLATE = (
    "Classify the statement '{}' into the following categories:\n"
    "* Functional (F)\n"
    "* Availability (A)\n"
    "* Fault Tolerance (FT)\n"
    "* Legal (L)\n"
    "* Look & Feel (LF)\n"
    "* Maintainability (MN)\n"
    "* Operational (O)\n"
    "* Performance (PE)\n"
    "* Portability (PO)\n"
    "* Scalability (SC)\n"
    "* Security (SE)\n"
    "* Usability (US)\n"
    "* Non-Functional Requirement (NFR)\n"
    "Please provide the most relevant category for the given statement."
)

# Process each statement
output_file = '/content/Requirement_data_part10_gemini_new.csv'
for idx, statement in enumerate(df['Requirement']):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < 5:
        try:
            response = model.start_chat().send_message(prompt).text.strip()
            df.at[idx, 'Response'] = response
            df.to_csv(output_file, index=False)
            print(f"Processed {idx+1}/{len(df)}: {response}")
            time.sleep(15)
            break
        except Exception as e:
            retries += 1
            print(f"Error: {e}. Retrying {retries}/5 in 30s...")
            time.sleep(30)

print("Processing COMPLETE")


Gemini PP Human 1

In [None]:
import time
import pandas as pd
import spacy
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Configure Gemini API
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

# Load statements from CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Prompt template
PROMPT_TEMPLATE = (
    "I want to know which software requirements this statement fulfills: '{}'\n"
    "Classify it into the following categories:\n"
    "* Functional (F)\n"
    "* Availability (A)\n"
    "* Fault Tolerance (FT)\n"
    "* Legal (L)\n"
    "* Look & Feel (LF)\n"
    "* Maintainability (MN)\n"
    "* Operational (O)\n"
    "* Performance (PE)\n"
    "* Portability (PO)\n"
    "* Scalability (SC)\n"
    "* Security (SE)\n"
    "* Usability (US)\n"
    "* Non-Functional Requirement (NFR)\n"
    "Please provide the most relevant category for the given statement."
)

# Process each statement
output_file = '/content/Requirement_data_part10_gemini_new.csv'
for idx, statement in enumerate(df['Requirement']):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < 5:
        try:
            response = model.start_chat().send_message(prompt).text.strip()
            df.at[idx, 'Response'] = response
            df.to_csv(output_file, index=False)
            print(f"Processed {idx+1}/{len(df)}: {response}")
            time.sleep(15)
            break
        except Exception as e:
            retries += 1
            print(f"Error: {e}. Retrying {retries}/5 in 30s...")
            time.sleep(30)

print("Processing COMPLETE")


Gemini PP human 2

In [None]:
import time
import pandas as pd
import spacy
import google.generativeai as genai
from google.colab import userdata

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Configure Gemini API
genai.configure(api_key=userdata.get("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-2.0-flash")

# Load statements from CSV
csv_path = '/content/Requirement_data_part10.csv'
df = pd.read_csv(csv_path)
df['Response'] = ""

# Define preprocessing function
def preprocess_text(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct])

# Prompt template
PROMPT_TEMPLATE = (
    "'{}' Now write which software requirement category this belongs to:\n"
    "* Functional (F)\n"
    "* Availability (A)\n"
    "* Fault Tolerance (FT)\n"
    "* Legal (L)\n"
    "* Look & Feel (LF)\n"
    "* Maintainability (MN)\n"
    "* Operational (O)\n"
    "* Performance (PE)\n"
    "* Portability (PO)\n"
    "* Scalability (SC)\n"
    "* Security (SE)\n"
    "* Usability (US)\n"
    "* Non-Functional Requirement (NFR)\n"
    "Please provide the most relevant category for the given statement."
)

# Process each statement
output_file = '/content/Requirement_data_part10_gemini_new.csv'
for idx, statement in enumerate(df['Requirement']):
    retries = 0
    preprocessed_statement = preprocess_text(statement)
    prompt = PROMPT_TEMPLATE.format(preprocessed_statement)

    while retries < 5:
        try:
            response = model.start_chat().send_message(prompt).text.strip()
            df.at[idx, 'Response'] = response
            df.to_csv(output_file, index=False)
            print(f"Processed {idx+1}/{len(df)}: {response}")
            time.sleep(15)
            break
        except Exception as e:
            retries += 1
            print(f"Error: {e}. Retrying {retries}/5 in 30s...")
            time.sleep(30)

print("Processing COMPLETE")
