# Analysing the redundancies of User Stories benefits

In [17]:
from support_functions.load_data import load_datasets_with_annotations as loading
from support_functions.request_handler import send_requierment_to_chatgpt, StoppedAnswerException
from support_functions.TimeRecorder import TimeRecorder
from support_functions.save_data import save_to_json_persistent
from itertools import combinations
import pandas as pd
import json
import tiktoken
import os

In [5]:
MODEL_VERSION_NAME = os.getenv('MODEL_VERSION')
print(MODEL_VERSION_NAME)

gpt-4-turbo


### Load all User Stories from the different packages

In [6]:
datasets: dict[str, list] = loading()

### Datatransformation to panda dataframes and numpy arrarys. 

In [7]:
def transform_data_id_text(data: list):
    rows = []
    for entry in data:
        row = {
            'ID': entry['id'],
            'User Story': entry['text']
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [8]:
# n(n-1) / 2
def transform_pairwise(df: pd.DataFrame):
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            'First ID': df.iloc[i, 0],
            'First User Story': df.iloc[i, 1],
            'Second ID': df.iloc[j, 0],
            'Second User Story': df.iloc[j, 1]
        }
        rows.append(row)
    return pd.DataFrame(rows)

## Request definition for User Stories

In [9]:
definition_main_part: str = "The main part of a User Story describes the core action that a persona wishes to accomplish, such as 'As an Applicant, I want to submit an application.' This part specifies the main functionality or feature the story aims to deliver."
definition_benefit: str = "A benefit in a User Story refers to the positive outcome or advantage that a user or stakeholder gains from the functionality described in the User Story. It directly addresses user needs and enhances the value of the software product, providing a clear motive for the user action, like easing a task or improving efficiency."
definition_general_redundancy: str = "Redundancy in a User Story refers to the unnecessary repetition or duplication of information within the story's description. Such redundancies can cloud the core purpose or value of the feature, leading to potential confusion or misinterpretation by the development team. It's crucial to remove redundancies to maintain clarity and focus in development."
definition_main_part_redundancy: str = "A main part redundancy in User Stories refers to unnecessary duplication in the description of the action or functionality that the persona aims to achieve, excluding persona repetitions and benefits. This type of redundancy occurs when similar actions or functionalities are described in multiple stories without adding distinct value or clarity, potentially leading to confusion or inefficiencies in the development process."
definition_benefit_redundancy: str = "Benefit redundancy in User Stories occurs when multiple stories repeatedly emphasize the same positive outcomes of similar software features. This duplication can lead to inefficiencies as the same advantages are stated multiple times, potentially diluting the impact and clarity of user benefits in the product documentation."

In [10]:
### Maybe add a field for explanation
### Define that the fist entry of the referenceToOriginalText has to be from the first user story and the second from the second
### Add a describtion why it is redundant

definition_json_format_pair_user_stories: str = '''
{
    "relatedStories": [1, 3],
    "redundantMainPart": true, 
    "mainPartRedundancies": [
        {
            "redundancyTitel": "Improved Security through Authentication",
            "reasonDescribtion": "The redundancy arises from the repetition of the concept of user verification expressed in slightly different ways for emphasis or to cater to different audiences.",
            "referenceToOriginalText": ["As a User, I want to verify myself", "As an application User, I want to verify that I am who I am"]
        },
        {
            "redundancyTitel": "Testing possibilities",
            "reasonDescribtion": "The redundancy here stems from the repetition of the testing action and the tested entity, expressed in slightly different ways, likely for emphasis or to accommodate different perspectives",
            "referenceToOriginalText": ["As a software tester, I want to begin user testing", "As a tester, I want to begin testing the software"]
        }
        // Additional main part redundancies can be added here
    ],
    "redundantBenefit": true, 
    "benefitRedundancies": [
        {
            "redundancyTitel": "User login button",
            "reasonDescribtion": "The redundancy in these sentences lies in the repetition of the action ("login") and the destination ("webpage" and "website"), which convey the same idea using slightly different wording.",
            "referenceToOriginalText": ["I can login into the webpage.", "I could login into the website"]
        },
        {
            "redundancyTitel": "Printing a document",
            "reasonDescribtion": "The redundancy occurs due to the repetition of the action ('print') expressed in slightly different ways, conveying the same meaning of initiating the printing process.",
            "referenceToOriginalText": ["I can print a document", "I can give the order to print"]
        }
        // Additional benefit redundancies can be added here
    ]
}
'''

### Request definition to ChatGPT

In [11]:
#Collection of messages -> In future it can be used to create a context aware system
message: list[dict] = []

#Defining initial user request
user_inital_message: dict = {
  "role": "user",
  "content": "Act as a Requirements Engineer focused on identifying redundancies. Please review the User Stories and pinpoint any unnecessary duplications that obscure clarity or add no distinct value."
}
message.append(user_inital_message)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng = '''As a requirements engineer in agile development, it is my responsibility to review user stories for redundancies. My goal is to identify and report any overlapping or duplicate requirements. By carefully analysing the user stories in depth, I ensure that each requirement is necessary and contributes uniquely, increasing the coherence of the product.'''
assistent_initial_message: str = {"role": "system", "content": system_is_r_eng}
message.append(assistent_initial_message)

#Defining the definition of a benefit
user_defining_benefit = {"role": "user", "content": "Please analyze redundancies in the main part of two given User Stories as well as the benefit. "
    "Note that a User Story may include multiple redundancies. The definition of the main part is "
    f"{definition_main_part} and the definition of a benefit is as follows: {definition_benefit}"}
message.append(user_defining_benefit)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng_defintion_aware: str ="I will analyze redundancies in the main parts and benefits of two User Stories. Each story might include multiple redundancies. The main part typically describes the desired functionality by the persona, while the benefit details the positive outcomes from the functionality."
message.append({"role":"system","content":system_is_r_eng_defintion_aware})

#Defining what a redundancy is
user_defining_redundancy: dict = {"role": "user", "content":f"Please analyze the User Stories considering the general definition of redundancy: {definition_general_redundancy} "
    f"Additionally, take into account the specific redundancy in the main part: {definition_main_part_redundancy} "
    f"and the redundancy in benefits: {definition_benefit_redundancy}. Can you do that?"}
message.append(user_defining_redundancy)

# This was generated by ChatGPT thus it is marked as assistant
system_is_r_eng_defintion_aware_redundancy: str = "I'll review the User Stories for redundancies in main parts and benefits, using the specified definitions. Ready to proceed?"
message.append({"role":"system", "content": system_is_r_eng_defintion_aware_redundancy})

#Going to the formating of the output
user_defining_return_json_format = {"role":"user", "content": f"Before we proceed, consider the following JSON output format: {definition_json_format_pair_user_stories} This JSON structure organizes information about redundancies in User Stories, focusing on both the main parts and the benefits. Each section includes descriptions of the redundancies and specific text references to illustrate where these redundancies occur within the stories."
                                    + "1.) The field 'redundant' can be true or false. true indicates that the pair of User Stories is redundant. It is an mandatory field. \n" 
                                    + "2.) The field 'relatedStories' is an integer value. The value are the ids of the user stories and this field is also mandatory. \n" 
                                    + "3.) The array 'mainPartRedundancies' This array contains objects, each representing a specific redundancy found within the main parts of User Stories. This array is mandatory to be filled when the field 'redundant' is true. \n" 
                                    + "3.1) The field 'reasonDescribtion' is a sentence or two where a reason describtion for the redundancy is inserted. \n"
                                    + "3.2.) The field 'redundancyDescription' is a brief description of what the redundancy is about. \n"
                                    + "3.3.) The array 'referenceToOriginalText' holds the exact text from the User Stories that demonstrates the redundancy.  Thus, the array has two entries. The first entry is from the first user story and the second entry is from the user story. \n"
                                    + "4.) The array 'benefitRedundancies' is similar to 'mainPartRedundancies' but contain not the same content. This array details redundancies found in the benefits described by the User Stories. This array is mandatory to be filled when the field 'redundant' is true.  \n" 
                                    + "4.1) The field 'reasonDescribtion' is a sentence or two where a reason describtion for the redundancy is inserted. \n"
                                    + "4.2.) The field 'redundancyDescription' is a brief description of what the redundancy is about. \n"
                                    + "4.3.) The array 'referenceToOriginalText' lists the text from the stories that highlight the redundant benefits, helping to pinpoint where exactly these duplications occur. Thus, the array has two entries. The first entry is from the first user story and the second entry is from the user story. \n"                                    
                                    + "Are you ready to analyze redundancies in User Stories, focusing on main parts and benefits, and return the findings in JSON format?"}
message.append(user_defining_return_json_format)

#Defining that ChatGPT can do it
message.append({"role":"system", "content": "I've noted the JSON output format specified. I am ready to analyze redundancies in User Stories, focusing on the main parts and benefits. Shall we begin?"})


In [12]:
if True:
    message_text: str = ""
    for key in message:
        message_text += key["content"]
        print(key["content"])
    enc = tiktoken.encoding_for_model(os.getenv('MODEL_VERSION'))
    token_size = enc.encode(message_text)
    print("-" * 3 + "Token" + "-" * 3)
    print(token_size)
    print("-" * 3 + "Tokensize" + "-" * 3)
    print(len(token_size))
    

Act as a Requirements Engineer focused on identifying redundancies. Please review the User Stories and pinpoint any unnecessary duplications that obscure clarity or add no distinct value.
As a requirements engineer in agile development, it is my responsibility to review user stories for redundancies. My goal is to identify and report any overlapping or duplicate requirements. By carefully analysing the user stories in depth, I ensure that each requirement is necessary and contributes uniquely, increasing the coherence of the product.
Please analyze redundancies in the main part of two given User Stories as well as the benefit. Note that a User Story may include multiple redundancies. The definition of the main part is The main part of a User Story describes the core action that a persona wishes to accomplish, such as 'As an Applicant, I want to submit an application.' This part specifies the main functionality or feature the story aims to deliver. and the definition of a benefit is as 

## Data Processing Definition Pipline for User Stories

In [13]:
def templat_request_two_user_stories(current_message: list[dict], user_story_one: str, user_story_two: str, user_story_one_id: str, user_story_two_id: str,):
    request: dict = {"role":"user", "content":"Yes. Please process the following pair of user stories:\n"
    f"id: {user_story_one_id}, describtion:  {user_story_one}\n"
    f"id: {user_story_two_id}, describtion: {user_story_two}"}
    current_message.append(request)

In [16]:
def process_user_stories(pairs: pd.DataFrame, results: list, exceptions_during_processing: list) -> None:   
    for idx in range(len(pairs)):
        current_message: list[dict] = message.copy()
        templat_request_two_user_stories(current_message, pairs.iat[idx, 1], pairs.iat[idx, 3], str(pairs.iat[idx, 0]), str(pairs.iat[idx, 2]))
        try:
            time_recorder: TimeRecorder = TimeRecorder()
            resonse = send_requierment_to_chatgpt(current_message, time_recorder)
            json_object = json.loads(resonse)
            json_object = {'elipsedTimeNs': time_recorder.nanoseconds, **json_object}
            results.append(json_object)
        except StoppedAnswerException:  # Handle StoppedAnswerException
            exceptions_during_processing_data = {"Reason" : "Not stopped exception from ChatGPT Endpoint", "UID1" : str(pairs.iat[idx, 0]),  "UID2" : str(pairs.iat[idx, 2])}
            exceptions_during_processing.append(json.loads(exceptions_during_processing_data))
        except ValueError as e:  # Handle ValueError
            exceptions_during_processing_data = {"Reason" : "ValueError", "UID1" : str(pairs.iat[idx, 0]),  "UID2" : str(pairs.iat[idx, 2])}
            exceptions_during_processing.append(json.loads(exceptions_during_processing_data))

## Data processing for: G02 federal funding

In [None]:
id_g02: str = "#G02#"
data_g02: list = datasets[id_g02]
df_g02: pd.DataFrame = transform_data_id_text(data_g02)
df_g02_pairs = transform_pairwise(df_g02)

In [None]:
display(df_g02)

Unnamed: 0,ID,User Story
0,215,"As a Data user, I want to have the 12-19-2017 ..."
1,216,"As a UI designer, I want to redesign the Resou..."
2,217,"As a UI designer, I want to report to the Agen..."
3,218,"As a UI designer, I want to move on to round 2..."
4,220,"As a UI designer, I want to move on to round 3..."
...,...,...
90,308,"As a FABS user, I want to have my validations ..."
91,309,"As a FABS user, I want to see correct status l..."
92,310,"As an agency user, I want to know when the sub..."
93,311,"As an agency user, I want a landing page to na..."


In [None]:
display(df_g02_pairs)

Unnamed: 0,First ID,First User Story,Second ID,Second User Story
0,215,"As a Data user, I want to have the 12-19-2017 ...",216,"As a UI designer, I want to redesign the Resou..."
1,215,"As a Data user, I want to have the 12-19-2017 ...",217,"As a UI designer, I want to report to the Agen..."
2,215,"As a Data user, I want to have the 12-19-2017 ...",218,"As a UI designer, I want to move on to round 2..."
3,215,"As a Data user, I want to have the 12-19-2017 ...",220,"As a UI designer, I want to move on to round 3..."
4,215,"As a Data user, I want to have the 12-19-2017 ...",221,"As a Developer , I want to be able to log bett..."
...,...,...,...,...
4460,309,"As a FABS user, I want to see correct status l...",311,"As an agency user, I want a landing page to na..."
4461,309,"As a FABS user, I want to see correct status l...",312,"As an agency user, I want to submit my data el..."
4462,310,"As an agency user, I want to know when the sub...",311,"As an agency user, I want a landing page to na..."
4463,310,"As an agency user, I want to know when the sub...",312,"As an agency user, I want to submit my data el..."


### Process the User Stories

In [None]:
results_g02: list = []
exceptions_g02: list = []

process_user_stories(df_g02_pairs, results_g02, exceptions_g02)

KeyboardInterrupt: 

### Store the results persistently

In [None]:
results_collection: dict = {}
results_collection['#G02#'] = results_g02
if len(exceptions_g02) != 0: results_collection['#G02#Exceptions'] = exceptions_g02

save_to_json_persistent("benefit", results_collection)

## Data processing for: G03 loudoun

In [15]:
id_g03: str = "#G03#"
data_g03: list = datasets[id_g03]
df_g03: pd.DataFrame = transform_data_id_text(data_g03)
df_g03_pairs = transform_pairwise(df_g03)

In [16]:
df_g03.head()

Unnamed: 0,ID,User Story
0,315,"As a Public User, I want to Search for Informa..."
1,316,"As a ProspectiveApplicant, I want to research ..."
2,317,"As an Applicant, I want to Request PreApplicat..."
3,318,"As a Customer, I want to Create a Customer Por..."
4,319,"As an Applicant, I want to Submit Application,..."


In [17]:
display(df_g03_pairs)

Unnamed: 0,First ID,First User Story,Second ID,Second User Story
0,315,"As a Public User, I want to Search for Informa...",316,"As a ProspectiveApplicant, I want to research ..."
1,315,"As a Public User, I want to Search for Informa...",317,"As an Applicant, I want to Request PreApplicat..."
2,315,"As a Public User, I want to Search for Informa...",318,"As a Customer, I want to Create a Customer Por..."
3,315,"As a Public User, I want to Search for Informa...",319,"As an Applicant, I want to Submit Application,..."
4,315,"As a Public User, I want to Search for Informa...",320,"As an Applicant, I want to Submit Supporting D..."
...,...,...,...,...
1591,369,"As a County Staff Member, I want to create a c...",371,"As a Technical Staff member, I want to update ..."
1592,369,"As a County Staff Member, I want to create a c...",372,"As a Staff member, I want to Send or Post Cita..."
1593,370,"As a Staff member, I want to create each condo...",371,"As a Technical Staff member, I want to update ..."
1594,370,"As a Staff member, I want to create each condo...",372,"As a Staff member, I want to Send or Post Cita..."


### Process the User Stories

In [18]:
results_g03: list = []
exceptions_g03: list = []

process_user_stories(df_g03_pairs, results_g03, exceptions_g03)

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

### Store the results persistently

In [19]:
results_collection: dict = {}
results_collection['#G03#'] = results_g03
if len(exceptions_g03) != 0: results_collection['#G03#Exceptions'] = exceptions_g03

save_to_json_persistent(f"redundancy-model-{MODEL_VERSION_NAME}", results_collection)