In [19]:
import locale
import os
from itertools import combinations

import pandas as pd
import tiktoken
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter as utils_get_column_letter
#from openpyxl.worksheet.worksheet import Worksheet

from support_functions.excel_helper import save_to_excel
from support_functions.load_data import load_datasets_with_annotations as loading
from support_functions.request_handler import process_user_stories, process_user_stories_parallel
from support_functions.data_transformations import remove_pov_and_add_usid
from support_functions.time_recorder import TimeRecorder

In [20]:
locale.setlocale(locale.LC_ALL, 'de_DE') 
MODEL_VERSION_NAME = os.getenv('MODEL_VERSION')
THREADING = bool(int(os.getenv('THREADING')))
print(f"Threading is used: {THREADING}")
print(MODEL_VERSION_NAME)

Threading is used: False
gpt-3.5-turbo


In [21]:
time_recorders: dict = {}
time_recorder: TimeRecorder = None

### Load all User Stories from the different packages

We do not need:
- POS

Highlighting words with #

In [22]:
datasets: dict[str, list] = loading()
print(datasets["g02"][0])

{'PID': '#G02#', 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.', 'Persona': ['Data user'], 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']}, 'Entity': {'Primary Entity': ['12-19-2017 deletions'], 'Secondary Entity': ['']}, 'Benefit': '', 'Triggers': [['Data user', 'have']], 'Targets': [['processed', '12-19-2017 deletions'], ['have', '12-19-2017 deletions']], 'Contains': [], 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']], 'Persona POS text': [['Data', 'user']]}, 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']], 'Primary Action POS text': [['have']]}, 'Secondary Action POS': {'Secondary Action POS tag': [['VERB']], 'Secondary Action POS text': [['processed']]}}, 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM', 'NOUN']], 'Primary Entity POS text': [['12-19-2017', 'deletions']]}, 'Secondary Entity POS': {'Secondary Entity POS tag': [[]], 'Secondary Entity POS text': [[]]}}}

### Keeping the data untouched. Just remove data load.

In [23]:
datasets = remove_pov_and_add_usid(datasets)
datasets["g02"][0]

{'usid': 215,
 'PID': '#G02#',
 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.',
 'Persona': ['Data user'],
 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']},
 'Entity': {'Primary Entity': ['12-19-2017 deletions'],
  'Secondary Entity': ['']},
 'Benefit': '',
 'Triggers': [['Data user', 'have']],
 'Targets': [['processed', '12-19-2017 deletions'],
  ['have', '12-19-2017 deletions']],
 'Contains': [],
 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']],
  'Persona POS text': [['Data', 'user']]},
 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']],
   'Primary Action POS text': [['have']]},
  'Secondary Action POS': {'Secondary Action POS tag': [['VERB']],
   'Secondary Action POS text': [['processed']]}},
 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM',
     'NOUN']],
   'Primary Entity POS text': [['12-19-2017', 'deletions']]},
  'Secondary Entity POS': {'Secondary Entity POS

### Prepaire Prompting

### Request definition to ChatGPT

In [28]:
def transform_2_dataframe(data: list) -> pd.DataFrame:
    rows = []
    for entry in data:
        row = {
            "PID": entry["PID"],
            "USID": entry["usid"],
            "Text": entry["Text"],
            "Action": entry["Action"],
            "Entity": entry["Entity"],
            "Benefit": entry["Benefit"],
            "Triggers": entry["Triggers"],
            "Targets": entry["Targets"],
            "Contains": entry["Contains"],
            "Persona POS": entry["Persona POS"],
            "Action POS": entry["Action POS"],
            "Entity POS": entry["Entity POS"]
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [29]:
def transform_pairwise(df: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            "First PID": df.iloc[i, 0],
            "First USID": df.iloc[i, 1],
            "First Text": df.iloc[i, 2],
            "First Action": df.iloc[i, 3],
            "First Entity": df.iloc[i, 4],
            "First Benefit": df.iloc[i, 5],
            "First Triggers": df.iloc[i, 6],
            "First Targets": df.iloc[i, 7],
            "First Contains": df.iloc[i, 8],
            "First Persona POS": df.iloc[i, 9],
            "First Action POS": df.iloc[i, 10],
            "First Entity POS": df.iloc[i, 11],
            "Second PID": df.iloc[j, 0],
            "Second USID": df.iloc[j, 1],
            "Second Text": df.iloc[j, 2],
            "Second Action": df.iloc[j, 3],
            "Second Entity": df.iloc[j, 4],
            "Second Benefit": df.iloc[j, 5],
            "Second Triggers": df.iloc[j, 6],
            "Second Targets": df.iloc[j, 7],
            "Second Contains": df.iloc[j, 8],
            "Second Persona POS": df.iloc[i, 9],
            "Second Action POS": df.iloc[i, 10],
            "Second Entity POS": df.iloc[i, 11],
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [30]:
### Convert the data back
### Add the Type Hinting
### Use this function in processing the pairs
### Processing the pairs has to be done more abstract to use it
def convert_dataframe_to_json(pairs):
    pass

## Data processing for: G02 federal funding

In [31]:
ID_G02: str = "g02"
data_g02: list = datasets[ID_G02]
df_g02: pd.DataFrame = transform_2_dataframe(data_g02)
df_g02_pairs = transform_pairwise(df_g02)

In [32]:
display(df_g02)

Unnamed: 0,PID,USID,Text,Action,Entity,Benefit,Triggers,Targets,Contains,Persona POS,Action POS,Entity POS
0,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
1,#G02#,216,"#G02# As a UI designer, I want to redesign the...","{'Primary Action': ['redesign'], 'Secondary Ac...","{'Primary Entity': ['Resources page'], 'Second...",it matches the new Broker design styles,"[[UI designer, redesign]]","[[redesign, Resources page], [matches, new Bro...","[[Resources page, new Broker design styles]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
2,#G02#,217,"#G02# As a UI designer, I want to report to th...","{'Primary Action': ['report'], 'Secondary Acti...","{'Primary Entity': ['user testing'], 'Secondar...",they are aware of their contributions to makin...,"[[UI designer, report]]","[[report, user testing], [aware, contributions...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
3,#G02#,218,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 2'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 2], [get, approvals]]","[[DABS, round 2], [FABS landing page edits, ro...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4,#G02#,220,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 3'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 3], [get, approvals]]","[[Help page edits, round 3]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
...,...,...,...,...,...,...,...,...,...,...,...,...
90,#G02#,308,"#G02# As a FABS user, I want to have my valida...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['validations'], 'Secondary...",,"[[FABS user, have]]","[[have, validations], [run, reasonable amount ...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
91,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
92,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
93,#G02#,311,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...


In [33]:
display(df_g02_pairs)

Unnamed: 0,First PID,First USID,First Text,First Action,First Entity,First Benefit,First Triggers,First Targets,First Contains,First Persona POS,...,Second Text,Second Action,Second Entity,Second Benefit,Second Triggers,Second Targets,Second Contains,Second Persona POS,Second Action POS,Second Entity POS
0,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to redesign the...","{'Primary Action': ['redesign'], 'Secondary Ac...","{'Primary Entity': ['Resources page'], 'Second...",it matches the new Broker design styles,"[[UI designer, redesign]]","[[redesign, Resources page], [matches, new Bro...","[[Resources page, new Broker design styles]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
1,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to report to th...","{'Primary Action': ['report'], 'Secondary Acti...","{'Primary Entity': ['user testing'], 'Secondar...",they are aware of their contributions to makin...,"[[UI designer, report]]","[[report, user testing], [aware, contributions...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
2,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 2'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 2], [get, approvals]]","[[DABS, round 2], [FABS landing page edits, ro...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
3,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a UI designer, I want to move on to r...","{'Primary Action': ['move on'], 'Secondary Act...","{'Primary Entity': ['round 3'], 'Secondary Ent...",I can get approvals from leadership,"[[UI designer, move on]]","[[move on, round 3], [get, approvals]]","[[Help page edits, round 3]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4,#G02#,215,"#G02# As a Data user, I want to have the 12-19...","{'Primary Action': ['have'], 'Secondary Action...","{'Primary Entity': ['12-19-2017 deletions'], '...",,"[[Data user, have]]","[[processed, 12-19-2017 deletions], [have, 12-...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As a Developer , I want to be able to lo...","{'Primary Action': ['log better'], 'Secondary ...","{'Primary Entity': [''], 'Secondary Entity': [...",I can troubleshoot issues with particular subm...,"[[Developer, log better]]","[[troubleshoot, issues]]","[[functions, issues], [particular submissions,...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4460,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4461,#G02#,309,"#G02# As a FABS user, I want to see correct st...","{'Primary Action': ['see'], 'Secondary Action'...","{'Primary Entity': ['correct status labels'], ...",I can quickly see my submission history,"[[FABS user, see]]","[[see, correct status labels], [quickly see, s...","[[Submission Dashboard, correct status labels]]","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want to submit my d...","{'Primary Action': ['submit'], 'Secondary Acti...",{'Primary Entity': ['data elements surrounded ...,Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]","[[submit, data elements surrounded by quotatio...","[[Excel, data elements surrounded by quotation...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4462,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want a landing page...","{'Primary Action': ['want'], 'Secondary Action...","{'Primary Entity': ['landing page'], 'Secondar...",I can access both sides of the site,"[[agency user, want]]","[[want, landing page], [navigate, FABS], [navi...","[[landing page, FABS], [landing page, DABS pag...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...
4463,#G02#,310,"#G02# As an agency user, I want to know when t...","{'Primary Action': ['know'], 'Secondary Action...",{'Primary Entity': ['submission periods start ...,I know when the submission starts and ends,"[[agency user, know]]","[[know, submission periods start and end], [kn...",[],"{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",...,"#G02# As an agency user, I want to submit my d...","{'Primary Action': ['submit'], 'Secondary Acti...",{'Primary Entity': ['data elements surrounded ...,Excel won't strip off leading and trailing zeroes,"[[agency user, submit]]","[[submit, data elements surrounded by quotatio...","[[Excel, data elements surrounded by quotation...","{'Persona POS tag': [['NOUN', 'NOUN']], 'Perso...",{'Primary Action POS': {'Primary Action POS ta...,{'Primary Entity POS': {'Primary Entity POS ta...


### Process and Store the User Stories persistently

In [None]:
time_recorder = TimeRecorder()
time_recorders[ID_G02] = time_recorder
if THREADING:
    process_user_stories_parallel(message, df_g02_pairs, ID_G02, MODEL_VERSION_NAME, time_recorder=time_recorder)
    print(time_recorder.milliseconds)
else:
    process_user_stories(message, df_g02_pairs, ID_G02, MODEL_VERSION_NAME, time_recorder=time_recorder)

## Save total Speeds

In [None]:
SHEET_NAME = 'Time Consup. Anlys.S.'

In [None]:
base_path: str = os.getcwd()
path_to_file = os.path.join(base_path, "results")
path_to_file = os.path.join(path_to_file, "redundancy-model-" + MODEL_VERSION_NAME)

count_runs_per_data_set: dict[str, int] = {}
for key in time_recorders.keys():
        idx = 0
        _ =  f"{idx:02d}_{key}.json"
        while os.path.exists(os.path.join(path_to_file, f"{idx:02d}_{key}.json")):
                idx += 1
        count_runs_per_data_set[key] = idx

entries: list[tuple[int,str,str, int, float, float, float]] = []

entry: tuple[int,str,str, int, float, float, float] = None
for key, value in time_recorders.items():
        entry = (key, count_runs_per_data_set[key], MODEL_VERSION_NAME, str(THREADING).lower(), 
                 value.nanoseconds, value.milliseconds, value.seconds, value.minutes)
        entries.append(entry)

columns = ['Dataset', 'Run Count', 'Model Version', 'Threading Enabled', 
           'Nanoseconds', 'Milliseconds', 'Seconds', 'Minutes']

_file_path: str = os.path.join(base_path, os.getenv('OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS_UNMODIFIED'))
check = os.path.exists(_file_path)

old_time_consuption_data: pd.DataFrame = None
if check:
        try:
                old_time_consuption_data = pd.read_excel(_file_path, SHEET_NAME)
        except ValueError:
                check = False
           
time_consumption_data: pd.DataFrame = pd.DataFrame(entries, columns=columns)

if check and not old_time_consuption_data.empty:
        # Checking if in the excel is already the data. Case: this code is executed twice for the same data
        old_time_consuption_data = old_time_consuption_data.dropna()
        for idx in range(len(old_time_consuption_data)):
                condition = time_consumption_data[
                        (time_consumption_data['Dataset'] == old_time_consuption_data.iat[idx, 0]) & 
                        (time_consumption_data['Run Count'] == old_time_consuption_data.iat[idx, 1])
                ].index
                time_consumption_data = time_consumption_data.drop(condition).reset_index(drop=True)
        time_consumption_data = pd.concat([old_time_consuption_data, time_consumption_data]).reset_index(drop=True)

In [None]:
def formatter_time(wb: Workbook, sheet_name: str): 
    ws = wb[sheet_name]
    header_font = Font(size=14, bold=True)
    for cell in ws["1:1"]:
        cell.font = header_font
    
    ADDITIONAL_LENGTH: int = 0
    ADJUSTED_WIDTH: int = 0
    MAX_LEN: int = 0
    for col in ws.iter_cols(min_row=1, max_row=1):
        for cell in col:
            MAX_LEN = len(str(cell.value))
            ADDITIONAL_LENGTH = (MAX_LEN + 2)
            ADJUSTED_WIDTH = 0
            ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 1.5
            ws.column_dimensions[utils_get_column_letter(cell.column)].width = ADJUSTED_WIDTH
            
    alignment = Alignment(vertical='center', horizontal='left')
    for row in ws.iter_rows():
        for cell in row:
            cell.alignment = alignment

    num_columns = ws.max_column
    header_range = f"A1:{utils_get_column_letter(num_columns)}1"
    ws.auto_filter.ref = header_range
    ws.freeze_panes = ws['A2']

    wrap_alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = wrap_alignment

In [None]:
save_to_excel(time_consumption_data, formatter_time, SHEET_NAME, os.getenv('OUTPUT_EXCEL_NAME_WITHOUT_ANNOTATIONS'))