# Analysing the redundancies of User Stories benefits

In [32]:
from load_data import load_datasets_with_annotations as loading
from itertools import combinations
import pandas as pd
from save_data import save_to_json_persistent
import json
from request_handler import send_requierment_to_chatgpt, StoppedAnswerException

### Load all User Stories from the different packages

In [18]:
datasets: dict[str, list] = loading()

### Datatransformation to panda dataframes and numpy arrarys. 

In [19]:
### This way of working with pandas is not recommend
### It has to be optimized to find a way to store for each text ther enteties and relations

def transform_data(data):
    rows = []
    row_enteties = []
    row_relations = []
    for line in data:
        for entity in line['entities']:
            entity_row = {
                "id": entity["id"],
                "label": entity["label"],
                "start_offset": entity["start_offset"],
                "end_offset": entity["end_offset"]
            }
            row_enteties.append(entity_row)
        for relation in line['relations']:
            relation_row = {
                "id": relation["id"],
                "type": relation["type"],
                "from id": relation["from_id"],
                "to id": relation["to_id"]
            }
            row_relations.append(relation_row)
        row = {
            'ID': line['id'],
            'User Story': line['text'],
            'Enteties': row_enteties,
            'Relations': row_relations
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [20]:
def transform_data_id_text(data: list):
    rows = []
    for entry in data:
        row = {
            'ID': entry['id'],
            'User Story': entry['text']
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [21]:
def transform_pairwise(df: pd.DataFrame):
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            'First ID': df.iloc[i, 0],
            'First User Story': df.iloc[i, 1],
            'Second ID': df.iloc[j, 0],
            'Second User Story': df.iloc[j, 1]
        }
        rows.append(row)
    return pd.DataFrame(rows)

## Request definition for User Stories

In [22]:
defintion_benefit: str = "A benefit of a User Story is the positive outcome or advantage that a user or stakeholder derives from the action or functionality described in a User Story. Benefits typically address specific user needs, goals, or requirements and contribute to enhancing the overall value proposition of the software product. In short a benefit is: the outcome of the action."
defintion_general_redundancy: str = "Redundancy in a User Story: Unnecessary repetition or duplication of information, actions, or goals within the description of a User Story. Redundancy in a User Story can obscure the core purpose or value of the feature, leading to confusion, inefficiency, and potential misinterpretation by the development team. Removing redundancy from User Stories is crucial for maintaining clarity, focus, and effectiveness in Agile software development processes." 
defintion_benifit_redundancy: str = "Redundancy of a benefit occurs when the same positive outcome resulting from a software feature is repeatedly emphasized across multiple user stories or product functionalities. This duplication leads to unnecessary repetition in development efforts, where similar advantages are stated multiple times. In short a benefit redundancy is when two or more User Stories contain the same outcome of an action."


In [23]:
definition_json_format_pair_user_stories: str = '''
    {
        "redundant": "Yes",
        "related_stories": ["related_story_id_1", "related_story_id_3"],
        "redundancies": [
            {
                "benefit": "Improved Security through Authentication",
                "reference_to_original_text": ["Users should securely access their accounts", "User authentication is crucial for account security"]
            },
            {
                "benefit": "Enhanced User Experience with Secure Access",
                "reference_to_original_text": ["Smooth login experience enhances user satisfaction", "Providing a secure login process is essential for user trust"]
            }
        ]
    }
'''

### Request definition to ChatGPT

In [24]:
#Collection of messages -> In future it can be used to create a context aware system
message: list[dict] = []

#Defining initial user request
user_inital_message: dict = {"role": "user", "content":"I want you to be a Requirements Engineer who looks for redundancies."}
message.append(user_inital_message)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng = '''As a Requirements Engineer, my primary role is to analyze and document the needs and expectations of stakeholders for a particular system or product. In this capacity, I am responsible for identifying redundancies in requirements to ensure that the final product is efficient, cost-effective, and meets the needs of the end-users.
To identify redundancies, I will carefully review all requirements documentation, including user stories, use cases, and functional specifications. I will look for duplicate or overlapping requirements that can be consolidated or eliminated to streamline the development process. I will also work closely with stakeholders to clarify their needs and priorities, ensuring that all requirements are necessary and relevant to the project.
By eliminating redundancies in requirements, I can help to reduce development time and costs, improve the overall quality of the product, and ensure that the final solution meets the needs of the end-users. My attention to detail and thorough analysis skills make me well-suited for this role, and I am committed to delivering high-quality, efficient requirements that drive successful project outcomes.'''
assistent_initial_message: str = {"role": "assistant", "content": system_is_r_eng}
message.append(assistent_initial_message)

#Defining the definition of a benefit
user_defining_benefit = {"role": "user", "content": '''I want you to analyse redundancies of the User Stories benefit. 
                         Therefor you should consider that multiple benefits could be included in a User Story. 
                         Moreover the definition of a benefit is the following: ''' + defintion_benefit}
message.append(user_defining_benefit)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng_defintion_aware: str = '''
    As a Requirements Engineer tasked with analyzing redundancies in User Stories benefits, I will carefully review each User Story to identify any overlapping or duplicate benefits. Given that multiple benefits could be included in a single User Story, it is important to ensure that each benefit is unique and contributes to the overall value proposition of the software product.
    To begin the analysis, I will first familiarize myself with the definition of a benefit in the context of a User Story. A benefit is the positive outcome or advantage that a user or stakeholder derives from the action or functionality described in the User Story. These benefits should address specific user needs, goals, or requirements and enhance the overall value proposition of the software product.
    During the analysis, I will look for User Stories that contain similar or redundant benefits. For example, if two User Stories both mention the benefit of increased efficiency for the user, I will consider consolidating these benefits into a single User Story to avoid duplication. Additionally, I will ensure that each benefit is distinct and contributes to the overall value proposition of the software product.
    By identifying and eliminating redundancies in User Stories benefits, I can help streamline the requirements documentation process, improve the clarity and effectiveness of the User Stories, and ultimately contribute to the successful delivery of a high-quality software product that meets the needs and expectations of the end-users.
'''
message.append({"role":"assistant","content":system_is_r_eng_defintion_aware})

#Defining what a redundancy is
user_defining_redundancy: dict = {"role": "user", "content":"Besides of the definition of a benefit conder the general definition of a redundancy: "
                                  + defintion_general_redundancy
                                  + "\n Additionally consider the specific definition of a redundacy of a benefit: "
                                  + defintion_benifit_redundancy
                                  + "Can you do that?"}
message.append(user_defining_redundancy)

# This was generated by ChatGPT thus it is marked as assistant
system_is_r_eng_defintion_aware_redundancy: str = '''
    As a Requirements Engineer focusing on analyzing redundancies in User Stories benefits, I will take into consideration both the general definition of redundancy in a User Story and the specific definition of redundancy of a benefit. I will carefully review each User Story to identify any unnecessary repetition or duplication of information, actions, or goals within the description of the benefits.
    In line with the general definition of redundancy in a User Story, I will look for instances where the same information, actions, or goals are repeated across multiple User Stories. This unnecessary repetition can obscure the core purpose or value of the feature, leading to confusion, inefficiency, and potential misinterpretation by the development team. By removing such redundancies, we can maintain clarity, focus, and effectiveness in Agile software development processes.
    Furthermore, considering the specific definition of redundancy of a benefit, I will pay close attention to instances where the same positive outcome resulting from a software feature is repeatedly emphasized across multiple User Stories or product functionalities. This duplication of benefits leads to unnecessary repetition in development efforts, where similar advantages are stated multiple times. By identifying and addressing benefit redundancies, we can streamline the requirements documentation process, avoid duplication of efforts, and ensure that each User Story contributes unique value to the software product.
    By conducting a thorough analysis of redundancies in User Stories benefits, I aim to enhance the quality and effectiveness of the requirements documentation, improve communication and understanding within the development team, and ultimately contribute to the successful delivery of a high-quality software product that meets the needs and expectations of the stakeholders.
'''
message.append({"role":"assistant", "content": system_is_r_eng_defintion_aware_redundancy})

#Going to the formating of the output
user_defining_return_json_format = {"role":"user", "content": "You shall return a json format. An example is: " 
                                    + definition_json_format_pair_user_stories 
                                    + "If the user stories are redundant place in the field \"redundant\" the value \"yes\" and if not then \"no\". \n"
                                    + "For the field \"related_stories\" you always insert the id's of the compared user stories.\n"
                                    + "Insert into the field \"benefit\" your describtion of the benefit.\n"
                                    + "Insert into the field \"reference_to_original_text\" the reference where you found the redundancy.\n"
                                    + "If the user stories are not redundant the you the \"redundancies\" array shall be empty."
                                    + "Do you know how to create such a return structure?"}
message.append(user_defining_return_json_format)

#Defining that ChatGPT can do it
message.append({"role":"system", "content": "Yes, I can create a return structure in the JSON format as requested and just return the json format."})


In [25]:
def templat_request_two_user_stories(current_message: list[dict], user_story_one: str, user_story_two: str, user_story_one_id: str, user_story_two_id: str,):
    request: dict = {"role":"user", "content":"Please process the two following user stories:"
                     + f"User Story 1 (id: {user_story_one_id}):" + user_story_one
                     + f"User Story 2 (id: {user_story_two_id}):" + user_story_two}
    current_message.append(request)    

## Data Processing Definition Pipline for User Stories

In [46]:
def process_user_stories(pairs: pd.DataFrame, results: list, exceptions_during_processing: list) -> None:
    ###Remove the following break to analyse all
    i: int = 0
    
    for idx in range(len(pairs)):
        current_message: list[dict] = message.copy()
        templat_request_two_user_stories(current_message, pairs.iat[idx, 1], pairs.iat[idx, 3], str(pairs.iat[idx, 0]), str(pairs.iat[idx, 2]))
        try:
            resonse = send_requierment_to_chatgpt(current_message)
            json_object = json.loads(resonse)
            results.append(json_object)
        except StoppedAnswerException:  # Handle StoppedAnswerException
            exceptions_during_processing_data = {"Reason" : "Not stopped exception from ChatGPT Endpoint", "UID1" : str(pairs.iat[idx, 0]),  "UID2" : str(pairs.iat[idx, 2])}
            exceptions_during_processing.append(json.loads(exceptions_during_processing_data))
        except ValueError as e:  # Handle ValueError
            exceptions_during_processing_data = {"Reason" : "ValueError", "UID1" : str(pairs.iat[idx, 0]),  "UID2" : str(pairs.iat[idx, 2])}
            exceptions_during_processing.append(json.loads(exceptions_during_processing_data))
        
        ###Remove the following to analyse all
        i += 1
        if i > 15: break  
        

## Data processing for: G02 federal funding

In [33]:
id_g02: str = "#G02#"
data_g02: list = datasets[id_g02]
df_g02: pd.DataFrame = transform_data_id_text(data_g02)
df_g02_pairs = transform_pairwise(df_g02)

In [27]:
display(df_g02)

Unnamed: 0,ID,User Story
0,215,"As a Data user, I want to have the 12-19-2017 ..."
1,216,"As a UI designer, I want to redesign the Resou..."
2,217,"As a UI designer, I want to report to the Agen..."
3,218,"As a UI designer, I want to move on to round 2..."
4,220,"As a UI designer, I want to move on to round 3..."
...,...,...
90,308,"As a FABS user, I want to have my validations ..."
91,309,"As a FABS user, I want to see correct status l..."
92,310,"As an agency user, I want to know when the sub..."
93,311,"As an agency user, I want a landing page to na..."


In [28]:
display(df_g02_pairs)

Unnamed: 0,First ID,First User Story,Second ID,Second User Story
0,215,"As a Data user, I want to have the 12-19-2017 ...",216,"As a UI designer, I want to redesign the Resou..."
1,215,"As a Data user, I want to have the 12-19-2017 ...",217,"As a UI designer, I want to report to the Agen..."
2,215,"As a Data user, I want to have the 12-19-2017 ...",218,"As a UI designer, I want to move on to round 2..."
3,215,"As a Data user, I want to have the 12-19-2017 ...",220,"As a UI designer, I want to move on to round 3..."
4,215,"As a Data user, I want to have the 12-19-2017 ...",221,"As a Developer , I want to be able to log bett..."
...,...,...,...,...
4460,309,"As a FABS user, I want to see correct status l...",311,"As an agency user, I want a landing page to na..."
4461,309,"As a FABS user, I want to see correct status l...",312,"As an agency user, I want to submit my data el..."
4462,310,"As an agency user, I want to know when the sub...",311,"As an agency user, I want a landing page to na..."
4463,310,"As an agency user, I want to know when the sub...",312,"As an agency user, I want to submit my data el..."


### Process the User Stories

In [29]:
results_g02: list = []
exceptions_g02: list = []

process_user_stories(df_g02_pairs, results_g02, exceptions_g02)

### Store the results persistently

In [30]:
results_collection: dict = {}
results_collection['#G02#'] = results_g02
if len(exceptions_g02) != 0: results_collection['#G02#Exceptions'] = exceptions_g02

save_to_json_persistent("benefit", results_collection)

## Data processing for: G03 loudoun

In [40]:
id_g03: str = "#G03#"
data_g03: list = datasets[id_g03]
df_g03: pd.DataFrame = transform_data_id_text(data_g03)
df_g03_pairs = transform_pairwise(df_g03)

In [41]:
df_g03.head()

Unnamed: 0,ID,User Story
0,315,"As a Public User, I want to Search for Informa..."
1,316,"As a ProspectiveApplicant, I want to research ..."
2,317,"As an Applicant, I want to Request PreApplicat..."
3,318,"As a Customer, I want to Create a Customer Por..."
4,319,"As an Applicant, I want to Submit Application,..."


In [42]:
display(df_g03_pairs)

Unnamed: 0,First ID,First User Story,Second ID,Second User Story
0,315,"As a Public User, I want to Search for Informa...",316,"As a ProspectiveApplicant, I want to research ..."
1,315,"As a Public User, I want to Search for Informa...",317,"As an Applicant, I want to Request PreApplicat..."
2,315,"As a Public User, I want to Search for Informa...",318,"As a Customer, I want to Create a Customer Por..."
3,315,"As a Public User, I want to Search for Informa...",319,"As an Applicant, I want to Submit Application,..."
4,315,"As a Public User, I want to Search for Informa...",320,"As an Applicant, I want to Submit Supporting D..."
...,...,...,...,...
1591,369,"As a County Staff Member, I want to create a c...",371,"As a Technical Staff member, I want to update ..."
1592,369,"As a County Staff Member, I want to create a c...",372,"As a Staff member, I want to Send or Post Cita..."
1593,370,"As a Staff member, I want to create each condo...",371,"As a Technical Staff member, I want to update ..."
1594,370,"As a Staff member, I want to create each condo...",372,"As a Staff member, I want to Send or Post Cita..."


### Process the User Stories

In [54]:
results_g03: list = []
exceptions_g03: list = []

process_user_stories(df_g03_pairs, results_g03, exceptions_g03)

### Store the results persistently

In [55]:
results_collection: dict = {}
results_collection['#G03#'] = results_g03
if len(exceptions_g03) != 0: results_collection['#G03#Exceptions'] = exceptions_g03

save_to_json_persistent("benefit", results_collection)