# Analysing the redundancies of User Stories - Annotations but no US

In [1]:
import locale
import os
from itertools import combinations

import pandas as pd
import tiktoken
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter as utils_get_column_letter
#from openpyxl.worksheet.worksheet import Worksheet

from support_functions.excel_helper import save_to_excel
from support_functions.load_data import load_datasets_with_annotations as loading
from support_functions.request_handler import process_user_stories, process_user_stories_parallel
from support_functions.data_transformations import convert_annotation_dataset
from support_functions.time_recorder import TimeRecorder
from support_functions.json_validator import validation, chat_gpt_schema_with_annotations

In [2]:
locale.setlocale(locale.LC_ALL, 'de_DE') 
MODEL_VERSION_NAME = os.getenv('MODEL_VERSION')
THREADING = bool(int(os.getenv('THREADING')))
print(f"Threading is used: {THREADING}")
print(MODEL_VERSION_NAME)

Threading is used: True
gpt-3.5-turbo


In [3]:
time_recorders: dict = {}
time_recorder: TimeRecorder = None

### Load all User Stories from the different packages

We do not need:
- POS

Highlighting words with #

In [4]:
not_processed_datasets: dict[str, list] = loading()
print(not_processed_datasets["g02"][0])

{'PID': '#G02#', 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.', 'Persona': ['Data user'], 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']}, 'Entity': {'Primary Entity': ['12-19-2017 deletions'], 'Secondary Entity': ['']}, 'Benefit': '', 'Triggers': [['Data user', 'have']], 'Targets': [['processed', '12-19-2017 deletions'], ['have', '12-19-2017 deletions']], 'Contains': [], 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']], 'Persona POS text': [['Data', 'user']]}, 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']], 'Primary Action POS text': [['have']]}, 'Secondary Action POS': {'Secondary Action POS tag': [['VERB']], 'Secondary Action POS text': [['processed']]}}, 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM', 'NOUN']], 'Primary Entity POS text': [['12-19-2017', 'deletions']]}, 'Secondary Entity POS': {'Secondary Entity POS tag': [[]], 'Secondary Entity POS text': [[]]}}}

### Converting Data to used suited format

In [5]:
datasets, ignored_items = convert_annotation_dataset(not_processed_datasets)

In [6]:
datasets["g02"][2]

{'PID': '#G02#',
 'USID': '217',
 'Text': 'As a UI designer, I want to report to the Agencies about user testing, so that they are aware of their contributions to making Broker a better UX.',
 'Main Part': 'As a UI designer, I want to report to the Agencies about user testing, ',
 'Benefit': 'they are aware of their contributions to making Broker a better UX',
 'Triggers': {'Main Part': [['UI designer', 'report']], 'Benefit': []},
 'Targets': {'Main Part': [['report', 'user testing']],
  'Benefit': [['aware', 'contributions'], ['making', 'Broker']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [7]:
ignored_items["#G02#"][0]

'PID: #G02#; Text: #G02# As a UI designer, I want to redesign the Resources page, so that it matches the new Broker design styles.; Label Type: Contain'

In [8]:
if True:
    def formatter_time(wb: Workbook, sheet_name: str): 
        ws = wb[sheet_name]
        header_font = Font(size=14, bold=True)
        for cell in ws["1:1"]:
            cell.font = header_font
        
        ADDITIONAL_LENGTH: int = 0
        ADJUSTED_WIDTH: int = 0
        MAX_LEN: int = 0
        for col in ws.iter_cols(min_row=1, max_row=1):
            for cell in col:
                MAX_LEN = len(str(cell.value))
                ADDITIONAL_LENGTH = (MAX_LEN + 2)
                ADJUSTED_WIDTH = 0
                
                if cell.col_idx == 1:
                    ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 10
                if cell.col_idx == 2:
                    ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 3
                else:
                    ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 1.5
                ws.column_dimensions[utils_get_column_letter(cell.column)].width = ADJUSTED_WIDTH
                
        alignment = Alignment(vertical='center', horizontal='left')
        for row in ws.iter_rows():
            for cell in row:
                cell.alignment = alignment

        num_columns = ws.max_column
        header_range = f"A1:{utils_get_column_letter(num_columns)}1"
        ws.auto_filter.ref = header_range
        ws.freeze_panes = ws['A2']

        wrap_alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')
        for row in ws.iter_rows(min_row=2):
            for cell in row:
                cell.alignment = wrap_alignment
            
    cols: list[str] =  ["PID", "Text", "Label Type"]
    df_ignored_items: pd.DataFrame = pd.DataFrame(columns=cols)
    current_ignored_data: dict = {}
    curremt_pid: int = 0
    current_text: str = "" 
    for key, set in ignored_items.items():
        for item in set:
            parts = item.split('; ')
            for part in parts:
                key, value = part.split(': ', 1)
                current_ignored_data[key] = value
            df_ignored_items = pd.concat([df_ignored_items, pd.DataFrame([current_ignored_data])], ignore_index = True)               
            df_ignored_items.reset_index()
    save_to_excel(df_ignored_items, formatter_time, "Incomplete US labelling", os.getenv("OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS"))
    print(df_ignored_items.head())
    print(f"Count of ignored Elements: {df_ignored_items.shape[0]}")

1
     PID                                               Text Label Type
0  #G02#  #G02# As a UI designer, I want to redesign the...    Contain
1  #G02#  #G02# As a UI designer, I want to track the is...     Target
2  #G02#  #G02# As a UI designer, I want to track the is...     Target
3  #G02#  #G02# As a UI designer, I want to track the is...     Target
4  #G02#  #G02# As an agency user, I want to submit my d...    Contain
Count of ignored Elements: 191


### Datatransformation to panda dataframes and numpy arrarys. 

In [1]:
def transform_2_dataframe(data: list):
    rows = []
    for entry in data:
        row = {
            "PID": entry["PID"],
            "User Story Text": entry["User Story Text"],
            "Main Part": entry["Main Part"],
            "Benefit": entry["Benefit"],
            "Triggers Main Part": entry["Triggers"]["Main Part"],
            "Triggers Benefit": entry["Triggers"]["Benefit"],
            "Targets Main Part": entry["Targets"]["Main Part"],
            "Targets Benefit": entry["Targets"]["Benefit"],
            "Contains Main Part": entry["Contains"]["Main Part"],
            "Contains Benefit": entry["Contains"]["Benefit"]
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [2]:
# n(n-1) / 2
def transform_pairwise(df: pd.DataFrame):
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            "First PID": df.iloc[i, 0],
            "First User Story Text": df.iloc[i, 1],
            "First Main Part": df.iloc[i, 2],
            "First Benefit": df.iloc[i, 3],
            "First Triggers Main Part": df.iloc[i, 4],
            "First Triggers Benefit": df.iloc[i, 5],
            "First Targets Main Part": df.iloc[i, 6],
            "First Targets Benefit": df.iloc[i, 7],
            "First Contains Main Part": df.iloc[i, 8],
            "First Contains Benefit": df.iloc[i, 9],
            "Second PID": df.iloc[j, 0],
            "Second User Story Text": df.iloc[j, 1],
            "Second Main Part": df.iloc[j, 2],
            "Second Benefit": df.iloc[j, 3],
            "Second Triggers Main Part": df.iloc[j, 4],
            "Second Triggers Benefit": df.iloc[j, 5],
            "Second Targets Main Part": df.iloc[j, 6],
            "Second Targets Benefit": df.iloc[j, 7],
            "Second Contains Main Part": df.iloc[j, 8],
            "Second Contains Benefit": df.iloc[j, 9]
        }
        rows.append(row)
    return pd.DataFrame(rows)

NameError: name 'pd' is not defined

In [None]:
ID_G02: str = "g02"
data_g02: list = datasets[ID_G02]
df_g02: pd.DataFrame = transform_2_dataframe(data_g02)
print(df_g02.head())
element = df_g02.iloc[1, 6]
print(type(element))
print(type(element[0]))
df_g02_pairs = transform_pairwise(df_g02)
print(df_g02_pairs.head())

     PID                                    User Story Text  \
0  #G02#  As a Data user, I want to have the 12-19-2017 ...   
1  #G02#  As a UI designer, I want to redesign the Resou...   
2  #G02#  As a UI designer, I want to report to the Agen...   
3  #G02#  As a UI designer, I want to move on to round 2...   
4  #G02#  As a UI designer, I want to move on to round 3...   

                                           Main Part  \
0  As a Data user, I want to have the 12-19-2017 ...   
1  As a UI designer, I want to redesign the Resou...   
2  As a UI designer, I want to report to the Agen...   
3  As a UI designer, I want to move on to round 2...   
4  As a UI designer, I want to move on to round 3...   

                                             Benefit  \
0                                                      
1            it matches the new Broker design styles   
2  they are aware of their contributions to makin...   
3                I can get approvals from leadership   
4   

In [None]:
### Convert the data back
### Add the Type Hinting
### Use this function in processing the pairs
### Processing the pairs has to be done more abstract to use it
def convert_dataframe_to_json(pairs):
    pass

In [None]:
def template_request_two_user_stories(
    current_message: list[dict],
    idx: int,
    pairs: pd.DataFrame,
):
    raise Exception()
    # user_story_one: str = pairs.iat[idx, 1]
    # user_story_two: str = pairs.iat[idx, 3]
    # user_story_one_id: str = str(pairs.iat[idx, 0])
    # user_story_two_id: str = str(pairs.iat[idx, 2])
    # request: dict = {
    #     "role": "user",
    #     "content": "Yes. Please, process the following pair of user stories:\n"
    #     f"id: {user_story_one_id}, describtion:  {user_story_one}\n"
    #     f"id: {user_story_two_id}, describtion: {user_story_two}",
    # }
    # current_message.append(request)

In [None]:
def sort_threaded_results(to_sort: dict) -> None:
    new_list: list = None
    for _ in to_sort.values():
        new_list = sorted(_, key=lambda x: (int(x['relatedStories'][0]), int(x['relatedStories'][1])))
        _.clear()
        _ += new_list

In [None]:
def json_validator(json_data: dict) -> tuple[bool, str]:
    return validation(json_data, chat_gpt_schema_with_annotations)

### Prepaire Prompting

In [None]:
definition_main_part: str = "The main part of a User Story describes the core action that a persona wishes to accomplish. For example, the main part of the user story 'As a UI designer, I want to begin user testing, so that I can validate stakeholder UI improvement requests' is 'As a UI designer, I want to begin user testing'. This part specifies the main functionality or feature the story aims to deliver."
definition_benefit: str = "A benefit in a User Story refers to the positive outcome or advantage that a user or stakeholder gains from the functionality described in the User Story. It directly addresses user needs and enhances the value of the software product, providing a clear motive for the user action, like easing a task or improving efficiency. We use the example from before which was 'As a UI designer, I want to begin user testing, so that I can validate stakeholder UI improvement requests'. Here we determine the benefit part, which is 'so that I can validate stakeholder UI improvement request'. Hence, a benefit starts with the phrase 'so that'."
definition_general_redundancy: str = "Redundancy in a User Story refers to the unnecessary repetition or duplication of information within the story's description. Such redundancies can cloud the core purpose or value of the feature, leading to potential confusion or misinterpretation by the development team. It's crucial to remove redundancies to maintain clarity and focus in development."
definition_main_part_redundancy: str = "A main part redundancy in User Stories refers to unnecessary duplication in the description of the action or functionality that the persona aims to achieve, excluding persona repetitions and benefits. This type of redundancy occurs when similar actions or functionalities are described in multiple stories without adding distinct value or clarity, potentially leading to confusion or inefficiencies in the development process. Thus, a main part redundancy occurs when a pair of two User Stories have a redundant main part."
definition_benefit_redundancy: str = "Benefit redundancy in User Stories occurs when multiple stories repeatedly emphasize the same positive outcomes of similar software features. This duplication can lead to inefficiencies as the same advantages are stated multiple times, potentially diluting the impact and clarity of user benefits in the product documentation. Thus, a benefit redundancy occurs when a pair of two User Stories have a redundant benefit part."

In [None]:
full_redundancy: str = ""
partial_redundancy: str = ""

In [None]:
# Hashmarks müssen gesetzt werden.
definition_json_format_pair_user_stories: str = '''
{
    "relatedStories": [1, 3],
    "redundantMainPart": true, 
    "mainPartRedundancies": [
        partialRedundancy: false,
        fullRedundancy: true,
        redundancies: [
            {
                "pairsOfTriggerRedundancy": {
                    [["User", "application User"], ["User", "application User"]],
                    ...
                },
                "pairsOfTriggerRedundancy": {
                    [["User", "application User"], ["User", "application User"]],
                    ...
                },
                "pairsOfTriggerRedundancy": {
                    [["User", "application User"], ["User", "application User"]],
                    ...
                }
            },
        ],
            
    ],
    "redundantBenefit": true, 
    "benefitRedundancies": [
        // Analog zu oben
    ]
}
'''

### Request definition to ChatGPT

In [None]:
#Collection of messages -> In future it can be used to create a context aware system
message: list[dict] = []

#Defining initial user request
user_inital_message: dict = {
  "role": "user",
  "content": "Act as a Requirements Engineer focused on identifying redundancies. Please review pairs of two User Stories and pinpoint any unnecessary duplications that obscure clarity or add no distinct value."
}
message.append(user_inital_message)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng = ("As a requirements engineer in agile development, it is my responsibility to review user stories for redundancies. My goal is to identify and report any overlapping or duplicate requirements." 
" By carefully analysing the user stories in depth, I ensure that each requirement is necessary and contributes uniquely, increasing the coherence of the product.")
assistent_initial_message: str = {"role": "system", "content": system_is_r_eng}
message.append(assistent_initial_message)

#Defining the definition of a main part and benefit redundancy
user_defining_benefit = {"role": "user", "content": "Please, analyse redundancies in the main part and benefit of a pair of two given User Stories"
    " Note that a User Story may include multiple redundancies in the main part as well as the benefit. The redundancies of the main part and benefit are disjoint sets."
    " Hence, a main part can be redundant while a benefit is not and vice versa. However, in some cases the main part and the benefit can be at the same time redundant, but they do not depend on each other and therefore they are independent redundant." 
    " The definition of the main part is: "
    f"\"{definition_main_part}\" and the definition of a benefit is as follows: \"{definition_benefit}\""}
message.append(user_defining_benefit)

# This was generated by ChatGPT thus it is marked as assistent
system_is_r_eng_defintion_aware: str = ("I will analyse redundancies in the main parts and benefits of two User Stories. Each story might include multiple redundancies."
" The main part typically describes the desired functionality by the persona, while the benefit details the positive outcomes from the functionality.")
message.append({"role":"system","content":system_is_r_eng_defintion_aware})

#Defining what a redundancy is
user_defining_redundancy: dict = {"role": "user", "content":f"Please analyse the User Stories considering the general definition of redundancy: {definition_general_redundancy} "
    f"Additionally, take into account the specific redundancy in the main part: \"{definition_main_part_redundancy}\" "
    f"and the redundancy in benefits: \"{definition_benefit_redundancy}\" Can you do that?"}
message.append(user_defining_redundancy)

# This was generated by ChatGPT thus it is marked as assistant
system_is_r_eng_defintion_aware_redundancy: str = "I'll review the User Stories for redundancies in main parts and benefits, using the specified definitions. Ready to proceed?"
message.append({"role":"system", "content": system_is_r_eng_defintion_aware_redundancy})

#Going to the formating of the output
user_defining_return_json_format = {"role":"user", "content": f"Before we proceed, consider the following JSON output format: {definition_json_format_pair_user_stories}."
                                    +     " This JSON structure organizes information about redundancies in User Stories, focusing on both the main parts and the benefits."
                                    +     " Each section includes descriptions of the redundancies and specific text references to illustrate where these redundancies occur within the stories."
                                    + "1.) The field 'relatedStories' is an integer value. The value are the ids of the User Stories and this field is also mandatory. \n" 
                                    + "2.) The field 'redundantMainPart' can be true or false. true indicates that a pair of User Stories has at least one main part redundancy and otherwise false. It is a mandatory field. \n" 
                                    + "3.) The array 'mainPartRedundancies' contains no, one or more objects, each representing a specific redundancy found within the main parts of User Stories."
                                    +     " When no main part redundancy is found, it contains no objects and thus it is empty."
                                    +     " In the case only one main part redundancy is found, it contains one entry."
                                    +     " The last case is that multiple main part redundancies are found. Then it contains more than one entry."
                                    +     " This array is mandatory to be filled when the field 'redundantMainPart' is true. \n" 
                                    + "3.1.) The field 'redundancyDescription' should contain a brief description, typically a sentence or two, explaining the reason for the main part redundancy. \n"
                                    + "3.2.) The array 'referenceToOriginalText' holds the exact text from the User Story pairs that demonstrates the redundancy.  Thus, the array has two entries."
                                    +     " The first entry is from the first user story and the second entry is from the user story. \n"
                                    + "4.) The field 'redundantbenefit' can be true or false. true indicates that a pair of User Stories has at least one benefit redundancy and otherwise false. It is a mandatory field. \n"
                                    + "5.) The array 'benefitRedundancies' is similar to 'mainPartRedundancies' but contain not the same content."
                                    +     " The main part and benefit redundancies are disjoint, therefore a benefit redundancy can exist while a main part redundancy does not exist or exists."
                                    +     " This array details redundancies found in the benefits described by the User Story pairs. This array is mandatory to be filled when the field 'redundantBenefit' is true.  \n" 
                                    + "5.1) The field 'redundancyDescription' should contain a brief description, typically a sentence or two, explaining the reason for the benefit redundancy. \n"
                                    + "5.2.) The array 'referenceToOriginalText' lists the text from the stories that highlight the redundant benefits, helping to pinpoint where exactly these redundancies occur."
                                    +     "Thus, the array has two entries. The first entry is from the first user story and the second entry is from the user story."
                                    +     " When no main part redundancy is found, it contains no objects and thus it is empty."
                                    +     " In the case only one main part redundancy is found, it contains one entry."
                                    +     " The last case is that multiple main part redundancies are found. Then it contains more than one entry."
                                    +     " This array is mandatory to be filled when the field 'redundantMainPart' is true. \n"                                 
                                    + "Are you ready to analyse redundancies in User Stories, focusing on main parts and benefits, and return the findings in JSON format?"}
message.append(user_defining_return_json_format)

#Defining that ChatGPT can do it
message.append({"role":"system", "content": "I've noted the JSON output format specified. I am ready to analyse redundancies in User Stories, focusing on the main parts and benefits. Shall we begin?"})


In [None]:
if True:
    message_text: str = ""
    for key in message:
        message_text += key["content"]
        print(key["content"])
    enc = tiktoken.get_encoding('cl100k_base')
    token_size = enc.encode(message_text)
    print("-" * 3 + "Token" + "-" * 3)
    print(token_size)
    print("-" * 3 + "Sum of Tokens" + "-" * 3)
    # The output is not correct as the result from the online pages differs https://platform.openai.com/tokenizer
    print("The total token sum is: " + locale.format_string("%d", sum(token_size), grouping=True))
    

Act as a Requirements Engineer focused on identifying redundancies. Please review pairs of two User Stories and pinpoint any unnecessary duplications that obscure clarity or add no distinct value.
As a requirements engineer in agile development, it is my responsibility to review user stories for redundancies. My goal is to identify and report any overlapping or duplicate requirements. By carefully analysing the user stories in depth, I ensure that each requirement is necessary and contributes uniquely, increasing the coherence of the product.
Please, analyse redundancies in the main part and benefit of a pair of two given User Stories Note that a User Story may include multiple redundancies in the main part as well as the benefit. The redundancies of the main part and benefit are disjoint sets. Hence, a main part can be redundant while a benefit is not and vice versa. However, in some cases the main part and the benefit can be at the same time redundant, but they do not depend on each 

## Data processing for: G02 federal funding

In [None]:
ID_G02: str = "g02"
data_g02: list = datasets[ID_G02]
df_g02: pd.DataFrame = transform_2_dataframe(data_g02)
df_g02_pairs = transform_pairwise(df_g02)

In [None]:
display(df_g02)

Unnamed: 0,PID,User Story Text,Main Part,Benefit,Triggers Main Part,Triggers Benefit,Targets Main Part,Targets Benefit,Contains Main Part,Contains Benefit
0,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[]
1,#G02#,"As a UI designer, I want to redesign the Resou...","As a UI designer, I want to redesign the Resou...",it matches the new Broker design styles,"[[UI designer, redesign]]",[],"[[redesign, Resources page]]","[[matches, new Broker design styles]]",[],[]
2,#G02#,"As a UI designer, I want to report to the Agen...","As a UI designer, I want to report to the Agen...",they are aware of their contributions to makin...,"[[UI designer, report]]",[],"[[report, user testing]]","[[aware, contributions], [making, Broker]]",[],[]
3,#G02#,"As a UI designer, I want to move on to round 2...","As a UI designer, I want to move on to round 2...",I can get approvals from leadership,"[[UI designer, move on]]",[],"[[move on, round 2]]","[[get, approvals]]",[],[]
4,#G02#,"As a UI designer, I want to move on to round 3...","As a UI designer, I want to move on to round 3...",I can get approvals from leadership,"[[UI designer, move on]]",[],"[[move on, round 3]]","[[get, approvals]]",[],[]
...,...,...,...,...,...,...,...,...,...,...
90,#G02#,"As a FABS user, I want to have my validations ...","As a FABS user, I want to have my validations ...",,"[[FABS user, have]]",[],"[[have, validations]]","[[run, reasonable amount of time]]",[],[]
91,#G02#,"As a FABS user, I want to see correct status l...","As a FABS user, I want to see correct status l...",I can quickly see my submission history,"[[FABS user, see]]",[],"[[see, correct status labels]]","[[quickly see, submission history]]",[],[]
92,#G02#,"As an agency user, I want to know when the sub...","As an agency user, I want to know when the sub...",I know when the submission starts and ends,"[[agency user, know]]",[],"[[know, submission periods start and end]]","[[know, submission starts and ends]]",[],[]
93,#G02#,"As an agency user, I want a landing page to na...","As an agency user, I want a landing page to na...",I can access both sides of the site,"[[agency user, want]]",[],"[[want, landing page]]","[[navigate, FABS], [navigate, DABS pages], [ac...",[],[]


In [None]:
display(df_g02_pairs)

Unnamed: 0,First PID,First User Story Text,First Main Part,First Benefit,First Triggers Main Part,First Triggers Benefit,First Targets Main Part,First Targets Benefit,First Contains Main Part,First Contains Benefit,Second PID,Second User Story Text,Second Main Part,Second Benefit,Second Triggers Main Part,Second Triggers Benefit,Second Targets Main Part,Second Targets Benefit,Second Contains Main Part,Second Contains Benefit
0,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[],#G02#,"As a UI designer, I want to redesign the Resou...","As a UI designer, I want to redesign the Resou...",it matches the new Broker design styles,"[[UI designer, redesign]]",[],"[[redesign, Resources page]]","[[matches, new Broker design styles]]",[],[]
1,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[],#G02#,"As a UI designer, I want to report to the Agen...","As a UI designer, I want to report to the Agen...",they are aware of their contributions to makin...,"[[UI designer, report]]",[],"[[report, user testing]]","[[aware, contributions], [making, Broker]]",[],[]
2,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[],#G02#,"As a UI designer, I want to move on to round 2...","As a UI designer, I want to move on to round 2...",I can get approvals from leadership,"[[UI designer, move on]]",[],"[[move on, round 2]]","[[get, approvals]]",[],[]
3,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[],#G02#,"As a UI designer, I want to move on to round 3...","As a UI designer, I want to move on to round 3...",I can get approvals from leadership,"[[UI designer, move on]]",[],"[[move on, round 3]]","[[get, approvals]]",[],[]
4,#G02#,"As a Data user, I want to have the 12-19-2017 ...","As a Data user, I want to have the 12-19-2017 ...",,"[[Data user, have]]",[],"[[have, 12-19-2017 deletions]]",[],[],[],#G02#,"As a Developer , I want to be able to log bett...","As a Developer , I want to be able to log better",I can troubleshoot issues with particular subm...,"[[Developer, log better]]","[[functions, issues], [particular submissions,...",[],"[[troubleshoot, issues]]",[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4460,#G02#,"As a FABS user, I want to see correct status l...","As a FABS user, I want to see correct status l...",I can quickly see my submission history,"[[FABS user, see]]",[],"[[see, correct status labels]]","[[quickly see, submission history]]",[],[],#G02#,"As an agency user, I want a landing page to na...","As an agency user, I want a landing page to na...",I can access both sides of the site,"[[agency user, want]]",[],"[[want, landing page]]","[[navigate, FABS], [navigate, DABS pages], [ac...",[],[]
4461,#G02#,"As a FABS user, I want to see correct status l...","As a FABS user, I want to see correct status l...",I can quickly see my submission history,"[[FABS user, see]]",[],"[[see, correct status labels]]","[[quickly see, submission history]]",[],[],#G02#,"As an agency user, I want to submit my data el...","As an agency user, I want to submit my data el...",Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]",[],"[[submit, data elements surrounded by quotatio...",[],[],[]
4462,#G02#,"As an agency user, I want to know when the sub...","As an agency user, I want to know when the sub...",I know when the submission starts and ends,"[[agency user, know]]",[],"[[know, submission periods start and end]]","[[know, submission starts and ends]]",[],[],#G02#,"As an agency user, I want a landing page to na...","As an agency user, I want a landing page to na...",I can access both sides of the site,"[[agency user, want]]",[],"[[want, landing page]]","[[navigate, FABS], [navigate, DABS pages], [ac...",[],[]
4463,#G02#,"As an agency user, I want to know when the sub...","As an agency user, I want to know when the sub...",I know when the submission starts and ends,"[[agency user, know]]",[],"[[know, submission periods start and end]]","[[know, submission starts and ends]]",[],[],#G02#,"As an agency user, I want to submit my data el...","As an agency user, I want to submit my data el...",Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]",[],"[[submit, data elements surrounded by quotatio...",[],[],[]


### Process and Store the User Stories persistently

In [None]:
time_recorder = TimeRecorder()
time_recorders[ID_G02] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=0, index_usid2=2, message=message, pairs=df_g02_pairs, key=ID_G02, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=0, index_usid2=2, message=message, pairs=df_g02_pairs, key=ID_G02, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, time_recorder=time_recorder)

## Save total Speeds

In [None]:
SHEET_NAME = 'Time Consup. Anlys.S.'

In [None]:
base_path: str = os.getcwd()
path_to_file = os.path.join(base_path, "results")
path_to_file = os.path.join(path_to_file, "redundancy-model-" + MODEL_VERSION_NAME)

count_runs_per_data_set: dict[str, int] = {}
for key in time_recorders.keys():
        idx = 0
        _ =  f"{idx:02d}_{key}.json"
        while os.path.exists(os.path.join(path_to_file, f"{idx:02d}_{key}.json")):
                idx += 1
        count_runs_per_data_set[key] = idx

entries: list[tuple[int,str,str, int, float, float, float]] = []

entry: tuple[int,str,str, int, float, float, float] = None
for key, value in time_recorders.items():
        entry = (key, count_runs_per_data_set[key], MODEL_VERSION_NAME, str(THREADING).lower(), 
                 value.nanoseconds, value.milliseconds, value.seconds, value.minutes)
        entries.append(entry)

columns = ['Dataset', 'Run Count', 'Model Version', 'Threading Enabled', 
           'Nanoseconds', 'Milliseconds', 'Seconds', 'Minutes']

_file_path: str = os.path.join(base_path, os.getenv('OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS_MODIFIED'))
check = os.path.exists(_file_path)

old_time_consuption_data: pd.DataFrame = None
if check:
        try:
                old_time_consuption_data = pd.read_excel(_file_path, SHEET_NAME)
        except ValueError:
                check = False
           
time_consumption_data: pd.DataFrame = pd.DataFrame(entries, columns=columns)

if check and not old_time_consuption_data.empty:
        # Checking if in the excel is already the data. Case: this code is executed twice for the same data
        old_time_consuption_data = old_time_consuption_data.dropna()
        for idx in range(len(old_time_consuption_data)):
                condition = time_consumption_data[
                        (time_consumption_data['Dataset'] == old_time_consuption_data.iat[idx, 0]) & 
                        (time_consumption_data['Run Count'] == old_time_consuption_data.iat[idx, 1])
                ].index
                time_consumption_data = time_consumption_data.drop(condition).reset_index(drop=True)
        time_consumption_data = pd.concat([old_time_consuption_data, time_consumption_data]).reset_index(drop=True)

In [None]:
def formatter_time(wb: Workbook, sheet_name: str): 
    ws = wb[sheet_name]
    header_font = Font(size=14, bold=True)
    for cell in ws["1:1"]:
        cell.font = header_font
    
    ADDITIONAL_LENGTH: int = 0
    ADJUSTED_WIDTH: int = 0
    MAX_LEN: int = 0
    for col in ws.iter_cols(min_row=1, max_row=1):
        for cell in col:
            MAX_LEN = len(str(cell.value))
            ADDITIONAL_LENGTH = (MAX_LEN + 2)
            ADJUSTED_WIDTH = 0
            ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 1.5
            ws.column_dimensions[utils_get_column_letter(cell.column)].width = ADJUSTED_WIDTH
            
    alignment = Alignment(vertical='center', horizontal='left')
    for row in ws.iter_rows():
        for cell in row:
            cell.alignment = alignment

    num_columns = ws.max_column
    header_range = f"A1:{utils_get_column_letter(num_columns)}1"
    ws.auto_filter.ref = header_range
    ws.freeze_panes = ws['A2']

    wrap_alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = wrap_alignment

In [None]:
save_to_excel(time_consumption_data, formatter_time, SHEET_NAME, os.getenv("OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS"))