# Analysing the redundancies of User Stories - Annotations and US Text

In [2]:
import locale
import os, json
from itertools import combinations

import pandas as pd
import tiktoken
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter as utils_get_column_letter

from support_functions.excel_helper import save_to_excel, formatter_ignored_items
from support_functions.load_data import load_datasets_with_annotations as loading
from support_functions.request_handler import process_user_stories, process_user_stories_parallel
from support_functions.data_transformations import convert_annotation_dataset
from support_functions.time_recorder import TimeRecorder
from support_functions.json_validator import validation, chat_gpt_schema_with_annotations

In [3]:
locale.setlocale(locale.LC_ALL, 'de_DE') 
MODEL_VERSION_NAME = os.getenv('MODEL_VERSION')
THREADING = bool(int(os.getenv('THREADING')))
print(f"Threading is used: {THREADING}")
print(MODEL_VERSION_NAME)

Threading is used: True
gpt-3.5-turbo


In [4]:
time_recorders: dict = {}
time_recorder: TimeRecorder = None

### Load all User Stories from the different packages

We use here Text and it split and the annotations (formatted):
- Text
- Main Part
- Benefit
- Triggers
- Targets
- Contains

We do not need:
- POS

Highlighting words with #

In [5]:
not_processed_datasets: dict[str, list] = loading()
print(not_processed_datasets["g02"][0])

{'PID': '#G02#', 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.', 'Persona': ['Data user'], 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']}, 'Entity': {'Primary Entity': ['12-19-2017 deletions'], 'Secondary Entity': ['']}, 'Benefit': '', 'Triggers': [['Data user', 'have']], 'Targets': [['processed', '12-19-2017 deletions'], ['have', '12-19-2017 deletions']], 'Contains': [], 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']], 'Persona POS text': [['Data', 'user']]}, 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']], 'Primary Action POS text': [['have']]}, 'Secondary Action POS': {'Secondary Action POS tag': [['VERB']], 'Secondary Action POS text': [['processed']]}}, 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM', 'NOUN']], 'Primary Entity POS text': [['12-19-2017', 'deletions']]}, 'Secondary Entity POS': {'Secondary Entity POS tag': [[]], 'Secondary Entity POS text': [[]]}}}

### Converting Data to used suited format

In [6]:
datasets, ignored_items = convert_annotation_dataset(not_processed_datasets)

In [7]:
datasets["g02"][2]

{'PID': '#G02#',
 'USID': '217',
 'Text': 'As a UI designer, I want to report to the Agencies about user testing, so that they are aware of their contributions to making Broker a better UX.',
 'Main Part': 'As a UI designer, I want to report to the Agencies about user testing, ',
 'Benefit': 'they are aware of their contributions to making Broker a better UX',
 'Triggers': {'Main Part': [['UI designer', 'report']], 'Benefit': []},
 'Targets': {'Main Part': [['report', 'user testing']],
  'Benefit': [['aware', 'contributions'], ['making', 'Broker']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [8]:
ignored_items["#G02#"][0]

'PID: #G02#; Text: As a UI designer, I want to redesign the Resources page, so that it matches the new Broker design styles.; Label Type: Contain'

In [9]:
df_ignored_items: pd.DataFrame = None
if True:            
    cols: list[str] =  ["PID", "Text", "Label Type"]
    df_ignored_items = pd.DataFrame(columns=cols)
    current_ignored_data: dict = {}
    curremt_pid: int = 0
    current_text: str = "" 
    for key, set in ignored_items.items():
        for item in set:
            parts = item.split('; ')
            for part in parts:
                key, value = part.split(': ', 1)
                current_ignored_data[key] = value
            df_ignored_items = pd.concat([df_ignored_items, pd.DataFrame([current_ignored_data])], ignore_index = True)               
            df_ignored_items.reset_index()
    save_to_excel(df_ignored_items, formatter_ignored_items, "Incomplete US labelling", os.getenv("OUTPUT_EXCEL_NAME_WITH_TEXT_AND_ANNOTATIONS"))
print(f"Count of ignored Elements: {df_ignored_items.shape[0]}")
df_ignored_items.head()

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'D:\\_Projects\\_myProjects\\RedundancyAndConflictAnalysis\\src\\output_detections_highlighting_with_annotations.xlsx'

### Datatransformation to panda dataframes and numpy arrarys. 

In [None]:
def transform_2_dataframe(data: list):
    rows = []
    for entry in data:
        row = {
            "PID": entry["PID"],
            "USID": entry["USID"],
            "Text": entry["Text"],
            "Main Part": entry["Main Part"],
            "Benefit": entry["Benefit"],
            "Triggers": entry["Triggers"],
            "Targets": entry["Targets"],
            "Contains": entry["Contains"],
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [None]:
# n(n-1) / 2
def transform_pairwise(df: pd.DataFrame):
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            "First PID": df.iloc[i, 0],
            "First USID": df.iloc[i, 1],
            "First Text": df.iloc[i, 2],
            "First Main Part": df.iloc[i, 3],
            "First Benefit": df.iloc[i, 4],
            "First Triggers": df.iloc[i, 5],
            "First Targets": df.iloc[i, 6],
            "First Contains": df.iloc[i, 7],
            "Second PID": df.iloc[j, 0],
            "Second USID": df.iloc[j, 1],
            "Second Text": df.iloc[j, 2],
            "Second Main Part": df.iloc[j, 3],
            "Second Benefit": df.iloc[j, 4],
            "Second Triggers": df.iloc[j, 5],
            "Second Targets": df.iloc[j, 6],
            "Second Contains": df.iloc[j, 7]
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [None]:
def convert_dataframe_to_json(pair: pd.Series) -> tuple[dict, dict]:
    pair = pair.to_dict()
    us_one: dict = {
        "USID": pair["First USID"],
        "Text": pair["First Text"],
        "Main Part": pair["First Main Part"],
        "Benefit": pair["First Benefit"],
        "Triggers": pair["First Triggers"],
        "Targets": pair["First Targets"],
        "Contains": pair["First Contains"]
    }
    
    us_two: dict = {
        "USID": pair["Second USID"],
        "Text": pair["Second Text"],
        "Main Part": pair["Second Main Part"],
        "Benefit": pair["Second Benefit"],
        "Triggers": pair["Second Triggers"],
        "Targets": pair["Second Targets"],
        "Contains": pair["Second Contains"]
    }

    json_one: dict = json.loads(json.dumps(us_one))
    json_two: dict = json.loads(json.dumps(us_two))
    
    return json_one, json_two

In [None]:
def template_request_two_user_stories(
    current_message: list[dict],
    idx: int,
    pairs: pd.DataFrame,
) -> None:
    row = pairs.iloc[idx]
    json_us_one: dict = None
    json_us_two: dict = None
    json_us_one, json_us_two = convert_dataframe_to_json(row)
    request: dict = {
        "role": "user",
        "content": "Yes. Please, process the following pairs of user story annotations:\n"
        f"id: {json_us_one["USID"]}, annotations: {json.dumps(json_us_one)};\n"
        f"id: {json_us_two["USID"]}, annotations: {json.dumps(json_us_two)}",
    }
    current_message.append(request)

In [None]:
def sort_threaded_results(to_sort: dict) -> None:
    new_list: list = None
    for _ in to_sort.values():
        new_list = sorted(_, key=lambda x: (int(x['relatedStories'][0]), int(x['relatedStories'][1])))
        _.clear()
        _ += new_list

In [None]:
def json_validator(json_data: dict) -> tuple[bool, str]:
    return validation(json_data, chat_gpt_schema_with_annotations)

### Prepaire Prompting

In [12]:
definition_main_part: str = None 
definition_benefit: str = None
definition_general_redundancy: str = None
definition_main_part_redundancy: str = None
definition_benefit_redundancy: str = None

In [13]:
definition_user_story: str = '''A user story is a semi-structured sentence containing the following:
information: the persona involved in the story, the main part containing the actions that the persona will perform on the system and 
the entities involved in the actions, and optionally a benefit that the persona will receive after having completed these actions. 
The benefit may also include actions and entities. Classically, a user story is expressed in the following form: "As a <persona>, I can <Actions over entities>, so that <benefit>."'''
print(definition_user_story)

A user story is a semi-structured sentence containing the following:
information: the persona involved in the story, the main part containing the actions that the persona will perform on the system and 
the entities involved in the actions, and optionally a benefit that the persona will receive after having completed these actions. 
The benefit may also include actions and entities. Classically, a user story is expressed in the following form: "As a <persona>, I can <Actions over entities>, so that <benefit>."


In [14]:
definition_json_format_pair_user_stories: str = '''
    {
        "relatedStories": [1, 3], 
        "mainPartRedundancies": {
            partialRedundancy: false,
            fullRedundancy: true,
            "descriptionOfTriggersRedundancies":"awd",
            "pairsOfTriggersRedundancies": {
                [["User", "application User"], ["User", "application User"]],
                ...
            },
            "descriptionOfTargetsRedundancies":"awd"
            "pairsOfTargetsRedundancies": {
                [["User", "User"], ["User", "application User"]],
                ...
            },
            "descriptionOfContainsRedundancies":"awd",
            "pairsOfTContainsredundancies": {
                [["User", "application User"], ["User", "application User"]],
                ...
            }
        },
        "benefitRedundancies": {
            // Analog zu oben
        }
    }
'''

In [15]:
example1: str = None

In [16]:
example2: str = None

In [17]:
example3: str = None

In [18]:
exampl4: str = None

In [19]:
example5: str = None

In [20]:
#Collection of messages -> In future it can be used to create a context aware system
message: list[dict] = []

In [21]:
if True:
    message_text: str = ""
    for key in message:
        message_text += key["content"]
        print(key["content"])
    enc = tiktoken.get_encoding('cl100k_base')
    token_size = enc.encode(message_text)
    print("-" * 3 + "Token" + "-" * 3)
    print(token_size)
    print("-" * 3 + "Sum of Tokens" + "-" * 3)
    # The output is not correct as the result from the online pages differs https://platform.openai.com/tokenizer
    print("The total token sum is: " + locale.format_string("%d", sum(token_size), grouping=True))
    

---Token---
[]
---Sum of Tokens---
The total token sum is: 0


## Data processing for: G02 federal funding

In [22]:
ID_G02: str = "g02"
data_g02: list = datasets[ID_G02]
df_g02: pd.DataFrame = transform_2_dataframe(data_g02)
df_g02_pairs = transform_pairwise(df_g02)

In [23]:
datasets[ID_G02][0]

{'USID': 215,
 'PID': '#G02#',
 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.',
 'Persona': ['Data user'],
 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']},
 'Entity': {'Primary Entity': ['12-19-2017 deletions'],
  'Secondary Entity': ['']},
 'Benefit': '',
 'Triggers': [['Data user', 'have']],
 'Targets': [['processed', '12-19-2017 deletions'],
  ['have', '12-19-2017 deletions']],
 'Contains': [],
 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']],
  'Persona POS text': [['Data', 'user']]},
 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']],
   'Primary Action POS text': [['have']]},
  'Secondary Action POS': {'Secondary Action POS tag': [['VERB']],
   'Secondary Action POS text': [['processed']]}},
 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM',
     'NOUN']],
   'Primary Entity POS text': [['12-19-2017', 'deletions']]},
  'Secondary Entity POS': {'Secondary Entity POS

In [19]:
display(df_g02)

Unnamed: 0,PID,USID,Text,Action,Entity,Benefit,Triggers,Targets,Contains,Persona POS,Action POS,Entity POS
0,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
1,#G02#,216,"#G02# As a UI designer, I want to redesign the...","{'Primary Action': ['redesign'], 'Secondary Ac...","{'Primary Entity': ['Resources page'], 'Second...",it matches the new Broker design styles,"[[UI designer, redesign]]","[[redesign, Resources page], [matches, new Bro...","[[Resources page, new Broker design styles]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
2,#G02#,217,"#G02# As a UI designer, I want to report to th...","{'Primary Action': ['report'], 'Secondary Acti...","{'Primary Entity': ['user testing'], 'Secondar...",they are aware of their contributions to makin...,"[[UI designer, report]]","[[report, user testing], [aware, contributions...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
3,#G02#,218,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 2'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 2], [get, approvals]]","[[DABS, round 2], [FABS landing page edits, ro...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4,#G02#,220,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 3'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 3], [get, approvals]]","[[Help page edits, round 3]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
...,...,...,...,...,...,...,...,...,...,...,...,...
90,#G02#,308,"#G02# As a FABS user, I want to have my valida...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['validations'], 'Secondary...",,"[[FABS user, have]]","[[have, validations], [run, reasonable amount ...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
91,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
92,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
93,#G02#,311,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...


In [20]:
display(df_g02_pairs)

Unnamed: 0,First PID,First USID,First Text,First Action,First Entity,First Benefit,First Triggers,First Targets,First Contains,First Persona POS,...,Second Text,Second Action,Second Entity,Second Benefit,Second Triggers,Second Targets,Second Contains,Second Persona POS,Second Action POS,Second Entity POS
0,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to redesign the...","{'Primary Action': ['redesign'], 'Secondary Ac...","{'Primary Entity': ['Resources page'], 'Second...",it matches the new Broker design styles,"[[UI designer, redesign]]","[[redesign, Resources page], [matches, new Bro...","[[Resources page, new Broker design styles]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
1,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to report to th...","{'Primary Action': ['report'], 'Secondary Acti...","{'Primary Entity': ['user testing'], 'Secondar...",they are aware of their contributions to makin...,"[[UI designer, report]]","[[report, user testing], [aware, contributions...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
2,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 2'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 2], [get, approvals]]","[[DABS, round 2], [FABS landing page edits, ro...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
3,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 3'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 3], [get, approvals]]","[[Help page edits, round 3]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a Developer , I want to be able to lo...","{'Primary Action': ['log better'], 'Secondary ...","{'Primary Entity': [''], 'Secondary Entity': [...",I can troubleshoot issues with particular subm...,"[[Developer, log better]]","[[troubleshoot, issues]]","[[functions, issues], [particular submissions,...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4460,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4461,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want to submit my d...","{'Primary Action': ['submit'], 'Secondary Acti...",{'Primary Entity': ['data elements surrounded ...,Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]","[[submit, data elements surrounded by quotatio...","[[Excel, data elements surrounded by quotation...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4462,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4463,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want to submit my d...","{'Primary Action': ['submit'], 'Secondary Acti...",{'Primary Entity': ['data elements surrounded ...,Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]","[[submit, data elements surrounded by quotatio...","[[Excel, data elements surrounded by quotation...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...


### Process and Store the User Stories persistently

In [None]:
time_recorder = TimeRecorder()
time_recorders[ID_G02] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=0, index_usid2=2, message=message, pairs=df_g02_pairs, key=ID_G02, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="text-annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=0, index_usid2=2, message=message, pairs=df_g02_pairs, key=ID_G02, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="text-annoations", time_recorder=time_recorder)

## Save total Speeds

In [None]:
SHEET_NAME = 'Time Consup. Anlys.S.'

In [None]:
base_path: str = os.getcwd()
path_to_file = os.path.join(base_path, "results")
path_to_file = os.path.join(path_to_file, "redundancy-model-" + MODEL_VERSION_NAME)

count_runs_per_data_set: dict[str, int] = {}
for key in time_recorders.keys():
        idx = 0
        _ =  f"{idx:02d}_{key}.json"
        while os.path.exists(os.path.join(path_to_file, f"{idx:02d}_{key}.json")):
                idx += 1
        count_runs_per_data_set[key] = idx

entries: list[tuple[int,str,str, int, float, float, float]] = []

entry: tuple[int,str,str, int, float, float, float] = None
for key, value in time_recorders.items():
        entry = (key, count_runs_per_data_set[key], MODEL_VERSION_NAME, str(THREADING).lower(), 
                 value.nanoseconds, value.milliseconds, value.seconds, value.minutes)
        entries.append(entry)

columns = ['Dataset', 'Run Count', 'Model Version', 'Threading Enabled', 
           'Nanoseconds', 'Milliseconds', 'Seconds', 'Minutes']

_file_path: str = os.path.join(base_path, os.getenv('OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS_UNMODIFIED'))
check = os.path.exists(_file_path)

old_time_consuption_data: pd.DataFrame = None
if check:
        try:
                old_time_consuption_data = pd.read_excel(_file_path, SHEET_NAME)
        except ValueError:
                check = False
           
time_consumption_data: pd.DataFrame = pd.DataFrame(entries, columns=columns)

if check and not old_time_consuption_data.empty:
        # Checking if in the excel is already the data. Case: this code is executed twice for the same data
        old_time_consuption_data = old_time_consuption_data.dropna()
        for idx in range(len(old_time_consuption_data)):
                condition = time_consumption_data[
                        (time_consumption_data['Dataset'] == old_time_consuption_data.iat[idx, 0]) & 
                        (time_consumption_data['Run Count'] == old_time_consuption_data.iat[idx, 1])
                ].index
                time_consumption_data = time_consumption_data.drop(condition).reset_index(drop=True)
        time_consumption_data = pd.concat([old_time_consuption_data, time_consumption_data]).reset_index(drop=True)

In [None]:
def formatter_time(wb: Workbook, sheet_name: str): 
    ws = wb[sheet_name]
    header_font = Font(size=14, bold=True)
    for cell in ws["1:1"]:
        cell.font = header_font
    
    ADDITIONAL_LENGTH: int = 0
    ADJUSTED_WIDTH: int = 0
    MAX_LEN: int = 0
    for col in ws.iter_cols(min_row=1, max_row=1):
        for cell in col:
            MAX_LEN = len(str(cell.value))
            ADDITIONAL_LENGTH = (MAX_LEN + 2)
            ADJUSTED_WIDTH = 0
            ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 1.5
            ws.column_dimensions[utils_get_column_letter(cell.column)].width = ADJUSTED_WIDTH
            
    alignment = Alignment(vertical='center', horizontal='left')
    for row in ws.iter_rows():
        for cell in row:
            cell.alignment = alignment

    num_columns = ws.max_column
    header_range = f"A1:{utils_get_column_letter(num_columns)}1"
    ws.auto_filter.ref = header_range
    ws.freeze_panes = ws['A2']

    wrap_alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = wrap_alignment

In [None]:
save_to_excel(time_consumption_data, formatter_time, SHEET_NAME, os.getenv("OUTPUT_EXCEL_NAME_WITH_TEXT_AND_ANNOTATIONS"))