# Analysing the redundancies of User Stories - Annotations but no US Text

In [3]:
import locale
import os, json
from itertools import combinations

import pandas as pd
import tiktoken
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter as utils_get_column_letter

from prompt_structure.helper_prompt_composition import PromptHelperBuilder
from prompt_structure.prompt_builder import PromptBuilder

from support_functions.excel_helper import save_to_excel, formatter_ignored_items
from support_functions.load_data import load_datasets_with_annotations as loading
from support_functions.request_handler import process_user_stories, process_user_stories_parallel
from support_functions.data_transformations import convert_annotation_dataset
from support_functions.time_recorder import TimeRecorder
from support_functions.json_validator import validation, chat_gpt_schema_with_annotations

In [4]:
locale.setlocale(locale.LC_ALL, 'de_DE') 
MODEL_VERSION_NAME = os.getenv('MODEL_VERSION')
THREADING = bool(int(os.getenv('THREADING')))
print(f"Threading is used: {THREADING}")
print(f"Model Version: {MODEL_VERSION_NAME}")
print(f"Threads: {os.getenv("THREAD_MULTIPLICATOR")}")
print(f"Limit: {os.getenv("LIMIT")}")

Threading is used: True
Model Version: gpt-3.5-turbo
Threads: 4
Limit: -1


In [5]:
time_recorders: dict = {}
time_recorder: TimeRecorder = None

### Load all User Stories from the different packages

We use here just the annotations (formatted):
- Triggers
- Targets
- Contains

We do not need:
- POS

Ignored Files as they differer from the common structure [Ref](https://github.com/ace-design/nlp-stories/tree/main?tab=readme-ov-file#note):
- g02-federal-funding
- g13-planningpoker
- g17-cask
- g27-culrepo

Ignored Files as they were used as trainings data
- g05
- g12

Data sets to analyse:
 - 'g03', 'g04', 'g08', 'g10', 'g11', 'g14', 'g16', 'g18', 'g19', 'g21', 'g22', 'g23', 'g24', 'g25', 'g26', 'g28'

Used Data:
- "nlp" --> "nlp_outputs" --> "individual_backlog" --> "nlp_outputs_original" --> "pos_baseline"

Highlighting words with #

In [6]:
not_processed_datasets: dict[str, list] = loading()
print(not_processed_datasets["g02"][0])

{'PID': '#G02#', 'Text': '#G02# As a Data user, I want to have the 12-19-2017 deletions processed.', 'Persona': ['Data user'], 'Action': {'Primary Action': ['have'], 'Secondary Action': ['processed']}, 'Entity': {'Primary Entity': ['12-19-2017 deletions'], 'Secondary Entity': ['']}, 'Benefit': '', 'Triggers': [['Data user', 'have']], 'Targets': [['processed', '12-19-2017 deletions'], ['have', '12-19-2017 deletions']], 'Contains': [], 'Persona POS': {'Persona POS tag': [['NOUN', 'NOUN']], 'Persona POS text': [['Data', 'user']]}, 'Action POS': {'Primary Action POS': {'Primary Action POS tag': [['VERB']], 'Primary Action POS text': [['have']]}, 'Secondary Action POS': {'Secondary Action POS tag': [['VERB']], 'Secondary Action POS text': [['processed']]}}, 'Entity POS': {'Primary Entity POS': {'Primary Entity POS tag': [['NUM', 'NOUN']], 'Primary Entity POS text': [['12-19-2017', 'deletions']]}, 'Secondary Entity POS': {'Secondary Entity POS tag': [[]], 'Secondary Entity POS text': [[]]}}}

### Converting Data to used suited format

In [7]:
datasets, ignored_items = convert_annotation_dataset(not_processed_datasets)

In [8]:
datasets.keys()

dict_keys(['g02', 'g03', 'g04', 'g05', 'g08', 'g10', 'g11', 'g12', 'g13', 'g14', 'g16', 'g17', 'g18', 'g19', 'g21', 'g22', 'g23', 'g24', 'g25', 'g26', 'g27', 'g28'])

In [9]:
datasets["g28"]

[{'PID': '#G28#',
  'USID': '1733',
  'Text': 'As a user, I want to search and discover music using content identification or fingerprinting.',
  'Main Part': '#G28# As a user, I want to search and discover music using content identification or fingerprinting.',
  'Benefit': '',
  'Triggers': {'Main Part': [['user', 'search'], ['user', 'discover']],
   'Benefit': []},
  'Targets': {'Main Part': [['search', 'music'], ['discover', 'music']],
   'Benefit': [['using', 'content identification'],
    ['using', 'fingerprinting']]},
  'Contains': {'Main Part': [], 'Benefit': []}},
 {'PID': '#G28#',
  'USID': '1734',
  'Text': 'As a user, I want to search and discover music based on my profile.',
  'Main Part': '#G28# As a user, I want to search and discover music based on my profile.',
  'Benefit': '',
  'Triggers': {'Main Part': [['user', 'search'], ['user', 'discover']],
   'Benefit': []},
  'Targets': {'Main Part': [['search', 'music'], ['discover', 'music']],
   'Benefit': []},
  'Contains

In [10]:
ignored_items["#G02#"][0]

'PID: #G02#; Text: As a UI designer, I want to redesign the Resources page, so that it matches the new Broker design styles.; Label Type: Contain'

In [11]:
df_ignored_items: pd.DataFrame = None
if True:            
    cols: list[str] =  ["PID", "Text", "Label Type"]
    df_ignored_items = pd.DataFrame(columns=cols)
    current_ignored_data: dict = {}
    curremt_pid: int = 0
    current_text: str = "" 
    for key, set in ignored_items.items():
        for item in set:
            parts = item.split('; ')
            for part in parts:
                key, value = part.split(': ', 1)
                current_ignored_data[key] = value
            df_ignored_items = pd.concat([df_ignored_items, pd.DataFrame([current_ignored_data])], ignore_index = True)               
            df_ignored_items.reset_index()
    save_to_excel(df_ignored_items, formatter_ignored_items, "Incomplete US labelling", os.getenv("OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS"))
print(f"Count of ignored Elements: {df_ignored_items.shape[0]}")
df_ignored_items.head()

Count of ignored Elements: 195


Unnamed: 0,PID,Text,Label Type
0,#G02#,"As a UI designer, I want to redesign the Resou...",Contain
1,#G02#,"As a UI designer, I want to track the issues t...",Target
2,#G02#,"As a UI designer, I want to track the issues t...",Target
3,#G02#,"As a UI designer, I want to track the issues t...",Target
4,#G02#,"As an agency user, I want to submit my data el...",Contain


### Datatransformation to panda dataframes and numpy arrarys. 

In [12]:
def transform_2_dataframe(data: list):
    rows = []
    for entry in data:
        row = {
            "PID": entry["PID"],
            "USID": entry["USID"],
            "Text": entry["Text"],
            "Main Part": entry["Main Part"],
            "Benefit": entry["Benefit"],
            "Triggers": entry["Triggers"],
            "Targets": entry["Targets"],
            "Contains": entry["Contains"],
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [13]:
# n(n-1) / 2
def transform_pairwise(df: pd.DataFrame):
    rows = []
    for i, j in combinations(range(df.shape[0]), 2):
        row = {
            "First PID": df.iloc[i, 0],
            "First USID": df.iloc[i, 1],
            "First Text": df.iloc[i, 2],
            "First Main Part": df.iloc[i, 3],
            "First Benefit": df.iloc[i, 4],
            "First Triggers": df.iloc[i, 5],
            "First Targets": df.iloc[i, 6],
            "First Contains": df.iloc[i, 7],
            "Second PID": df.iloc[j, 0],
            "Second USID": df.iloc[j, 1],
            "Second Text": df.iloc[j, 2],
            "Second Main Part": df.iloc[j, 3],
            "Second Benefit": df.iloc[j, 4],
            "Second Triggers": df.iloc[j, 5],
            "Second Targets": df.iloc[j, 6],
            "Second Contains": df.iloc[j, 7]
        }
        rows.append(row)
    return pd.DataFrame(rows)

In [14]:
def convert_dataframe_to_json(pair: pd.Series) -> tuple[dict, dict]:
    pair = pair.to_dict()
    us_one: dict = {
        "USID": pair["First USID"],
        "Triggers": pair["First Triggers"],
        "Targets": pair["First Targets"],
        "Contains": pair["First Contains"]
    }
    
    us_two: dict = {
        "USID": pair["Second USID"],
        "Triggers": pair["Second Triggers"],
        "Targets": pair["Second Targets"],
        "Contains": pair["Second Contains"]
    }

    json_one: dict = json.loads(json.dumps(us_one))
    json_two: dict = json.loads(json.dumps(us_two))
    
    return json_one, json_two

In [15]:
def template_request_two_user_stories(
    current_message: list[dict],
    idx: int,
    pairs: pd.DataFrame,
) -> None:
    row = pairs.iloc[idx]
    json_us_one: dict = None
    json_us_two: dict = None
    json_us_one, json_us_two = convert_dataframe_to_json(row)
    request: dict = PromptHelperBuilder.get_instance().parsing_to_pair_requests(json_us_one, json_us_two)
    current_message.append(request)

In [16]:
def sort_threaded_results(to_sort: dict) -> None:
    new_list: list = None
    for _ in to_sort.values():
        new_list = sorted(_, key=lambda x: (int(x['relatedStories'][0]), int(x['relatedStories'][1])))
        _.clear()
        _ += new_list

In [17]:
def json_validator(json_data: dict) -> tuple[bool, str]:
    return validation(json_data, chat_gpt_schema_with_annotations)

### Prepaire Prompting

In [18]:
# Init PromptBuilder
builder: PromptBuilder = PromptBuilder.get_instance()

# Collection of messages
message: list[dict] = []

In [19]:
# Actor Role
message.append(builder.get_actor_role())
message.append(builder.get_system_simulation_actor_role())

In [20]:
# User Story Definition
message.append(builder.get_user_story_definition())
message.append(builder.get_system_simulation_user_story_definition())

In [21]:
# Task and fucos definition
message.append(builder.get_task_focus_definition())
message.append(builder.get_system_simulation_task_focus_definition())

In [22]:
# Redundancy Definition for User Story pair
message.append(builder.get_redundancy_full_partial_definition())
message.append(builder.get_system_simulation_redundancy_full_partial_definition())

In [23]:
# Defining the JSON output
message.append(builder.get_json_format_defintion())
message.append(builder.get_system_simulation_json_format_defintion())

In [24]:
# Providing Examples
message.append(builder.get_input_output_examples(["USID", "Triggers", "Targets", "Contains"]))
message.append(builder.get_system_simulation_example_consideration())

In [25]:
if True:
    message_text: str = ""
    for key in message:
        message_text += key["content"]
        print(key["content"])
    enc = tiktoken.get_encoding('cl100k_base')
    token_size = enc.encode(message_text)
    print("-" * 3 + "Token" + "-" * 3)
    print(token_size)
    print("-" * 3 + "Sum of Tokens" + "-" * 3)
    # The output is not correct as the result from the online pages differs https://platform.openai.com/tokenizer
    print("The total token sum is: " + locale.format_string("%d", sum(token_size), grouping=True))
    

Act as a Requirements Engineer focused on identifying redundancies. Please review pairs of two User Stories and pinpoint any unnecessary duplications that obscure clarity or add no distinct value.
As a requirements engineer in agile development, it is my responsibility to review user stories for redundancies. My goal is to identify and report any overlapping or duplicate requirements. By carefully analysing the user stories in depth, I ensure that each requirement is necessary and contributes uniquely, increasing the coherence of the product.
A user story is a semi-structured sentence containing the following information:
(1) the persona involved in the story,
(2) the main part containing the actions that the persona will perform on the system and the entities involved in the actions, and optionally
(3) a benefit that the persona will receive after having completed these actions. The benefit may also include actions and entities.
Classically, a user story is expressed in the following 

## Data processing for: G03

In [24]:
ID_G03: str = "g03"
data_g03: list = datasets[ID_G03]
df_g03: pd.DataFrame = transform_2_dataframe(data_g03)
df_g03_pairs = transform_pairwise(df_g03)

In [25]:
datasets[ID_G03][0]

{'PID': '#G03#',
 'USID': '315',
 'Text': 'As a Public User, I want to Search for Information, so that I can obtain publicly available information concerning properties, County services, processes and other general information.',
 'Main Part': 'As a Public User, I want to Search for Information',
 'Benefit': 'I can obtain publicly available information concerning properties, County services, processes and other general information',
 'Triggers': {'Main Part': [['Public User', 'Search']], 'Benefit': []},
 'Targets': {'Main Part': [['Search', 'Information']],
  'Benefit': [['obtain', 'publicly available information']]},
 'Contains': {'Main Part': [],
  'Benefit': [['publicly available information', 'properties'],
   ['publicly available information', 'County services'],
   ['publicly available information', 'processes'],
   ['publicly available information', 'other general information']]}}

In [26]:
display(df_g03.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail..."
1,#G03#,316,"As a ProspectiveApplicant, I want to research ...","As a ProspectiveApplicant, I want to research ...",I can find the relevant service and/or applica...,"{'Main Part': [['ProspectiveApplicant', 'resea...","{'Main Part': [['research', 'requirements'], [...","{'Main Part': [], 'Benefit': [['online portal'..."
2,#G03#,317,"As an Applicant, I want to Request PreApplicat...","As an Applicant, I want to Request PreApplicat...",I can receive a response to a request for a me...,"{'Main Part': [['Applicant', 'Request']], 'Ben...","{'Main Part': [['Request', 'PreApplication Ass...","{'Main Part': [], 'Benefit': [['request for a ..."
3,#G03#,318,"As a Customer, I want to Create a Customer Por...","As a Customer, I want to Create a Customer Por...",I can log on to the Customer Portal and perfor...,"{'Main Part': [['Customer', 'Create']], 'Benef...","{'Main Part': [['Create', 'Customer Portal Use...","{'Main Part': [['Customer Portal', 'Customer P..."
4,#G03#,319,"As an Applicant, I want to Submit Application,...","As an Applicant, I want to Submit Application","I can provide my information, plans and/or doc...","{'Main Part': [['Applicant', 'Submit']], 'Bene...","{'Main Part': [['Submit', 'Application']], 'Be...","{'Main Part': [], 'Benefit': []}"


In [27]:
display(df_g03_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail...",#G03#,316,"As a ProspectiveApplicant, I want to research ...","As a ProspectiveApplicant, I want to research ...",I can find the relevant service and/or applica...,"{'Main Part': [['ProspectiveApplicant', 'resea...","{'Main Part': [['research', 'requirements'], [...","{'Main Part': [], 'Benefit': [['online portal'..."
1,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail...",#G03#,317,"As an Applicant, I want to Request PreApplicat...","As an Applicant, I want to Request PreApplicat...",I can receive a response to a request for a me...,"{'Main Part': [['Applicant', 'Request']], 'Ben...","{'Main Part': [['Request', 'PreApplication Ass...","{'Main Part': [], 'Benefit': [['request for a ..."
2,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail...",#G03#,318,"As a Customer, I want to Create a Customer Por...","As a Customer, I want to Create a Customer Por...",I can log on to the Customer Portal and perfor...,"{'Main Part': [['Customer', 'Create']], 'Benef...","{'Main Part': [['Create', 'Customer Portal Use...","{'Main Part': [['Customer Portal', 'Customer P..."
3,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail...",#G03#,319,"As an Applicant, I want to Submit Application,...","As an Applicant, I want to Submit Application","I can provide my information, plans and/or doc...","{'Main Part': [['Applicant', 'Submit']], 'Bene...","{'Main Part': [['Submit', 'Application']], 'Be...","{'Main Part': [], 'Benefit': []}"
4,#G03#,315,"As a Public User, I want to Search for Informa...","As a Public User, I want to Search for Informa...",I can obtain publicly available information co...,"{'Main Part': [['Public User', 'Search']], 'Be...","{'Main Part': [['Search', 'Information']], 'Be...","{'Main Part': [], 'Benefit': [['publicly avail...",#G03#,320,"As an Applicant, I want to Submit Supporting D...","As an Applicant, I want to Submit Supporting D...",I can satisfy documentation requirements for m...,"{'Main Part': [['Applicant', 'Submit']], 'Bene...","{'Main Part': [['Submit', 'Supporting Document...","{'Main Part': [], 'Benefit': [['application', ..."


### Process and Store the User Stories persistently

In [28]:
time_recorder = TimeRecorder()
time_recorders[ID_G03] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g03_pairs, key=ID_G03, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g03_pairs, key=ID_G03, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G04

In [29]:
ID_G04: str = "g04"
data_g04: list = datasets[ID_G04]
df_g04: pd.DataFrame = transform_2_dataframe(data_g04)
df_g04_pairs = transform_pairwise(df_g04)

In [30]:
datasets[ID_G04][0]

{'PID': '#G04#',
 'USID': '164',
 'Text': 'As a user, I want to click on the address, so that it takes me to a new tab with Google Maps.',
 'Main Part': 'As a user, I want to click on the address',
 'Benefit': 'it takes me to a new tab with Google Maps',
 'Triggers': {'Main Part': [['user', 'click']], 'Benefit': []},
 'Targets': {'Main Part': [['click', 'address']], 'Benefit': []},
 'Contains': {'Main Part': [], 'Benefit': [['new tab', 'Google Maps']]}}

In [31]:
display(df_g04.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo..."
1,#G04#,165,"As a user, I want to be able to anonymously vi...","As a user, I want to be able to anonymously vi...",I know about recycling centers near me before ...,"{'Main Part': [['user', 'anonymously view']], ...","{'Main Part': [['anonymously view', 'public in...","{'Main Part': [], 'Benefit': []}"
2,#G04#,166,"As a user, I want to be able to enter my zip c...","As a user, I want to be able to enter my zip c...",I can determine which ones I should consider,"{'Main Part': [['user', 'enter'], ['user', 'ge...","{'Main Part': [['enter', 'zip code'], ['get', ...","{'Main Part': [['list', 'nearby recycling faci..."
3,#G04#,167,"As a user, I want to be able to get the hours ...","As a user, I want to be able to get the hours ...",I can arrange drop-offs on my off days or duri...,"{'Main Part': [['user', 'get']], 'Benefit': []}","{'Main Part': [['get', 'hours']], 'Benefit': [...","{'Main Part': [['each recycling facility', 'ho..."
4,#G04#,168,"As a user, I want to have a flexible pick up t...","As a user, I want to have a flexible pick up time",I can more conveniently use the website,"{'Main Part': [['user', 'have']], 'Benefit': []}","{'Main Part': [['have', 'flexible pick up time...","{'Main Part': [], 'Benefit': []}"


In [32]:
display(df_g04_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo...",#G04#,165,"As a user, I want to be able to anonymously vi...","As a user, I want to be able to anonymously vi...",I know about recycling centers near me before ...,"{'Main Part': [['user', 'anonymously view']], ...","{'Main Part': [['anonymously view', 'public in...","{'Main Part': [], 'Benefit': []}"
1,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo...",#G04#,166,"As a user, I want to be able to enter my zip c...","As a user, I want to be able to enter my zip c...",I can determine which ones I should consider,"{'Main Part': [['user', 'enter'], ['user', 'ge...","{'Main Part': [['enter', 'zip code'], ['get', ...","{'Main Part': [['list', 'nearby recycling faci..."
2,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo...",#G04#,167,"As a user, I want to be able to get the hours ...","As a user, I want to be able to get the hours ...",I can arrange drop-offs on my off days or duri...,"{'Main Part': [['user', 'get']], 'Benefit': []}","{'Main Part': [['get', 'hours']], 'Benefit': [...","{'Main Part': [['each recycling facility', 'ho..."
3,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo...",#G04#,168,"As a user, I want to have a flexible pick up t...","As a user, I want to have a flexible pick up time",I can more conveniently use the website,"{'Main Part': [['user', 'have']], 'Benefit': []}","{'Main Part': [['have', 'flexible pick up time...","{'Main Part': [], 'Benefit': []}"
4,#G04#,164,"As a user, I want to click on the address, so ...","As a user, I want to click on the address",it takes me to a new tab with Google Maps,"{'Main Part': [['user', 'click']], 'Benefit': []}","{'Main Part': [['click', 'address']], 'Benefit...","{'Main Part': [], 'Benefit': [['new tab', 'Goo...",#G04#,169,"As a user, I want to be able to select differe...","As a user, I want to be able to select differe...",I have and get a list of facilities that accep...,"{'Main Part': [['user', 'select']], 'Benefit':...","{'Main Part': [['select', 'different types of ...","{'Main Part': [], 'Benefit': [['list', 'facili..."


### Process and Store the User Stories persistently

In [33]:
time_recorder = TimeRecorder()
time_recorders[ID_G04] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g04_pairs, key=ID_G04, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g04_pairs, key=ID_G04, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G08

In [34]:
ID_G08: str = "g08"
data_g08: list = datasets[ID_G08]
df_g08: pd.DataFrame = transform_2_dataframe(data_g08)
df_g08_pairs = transform_pairwise(df_g08)

In [35]:
datasets[ID_G08][0]

{'PID': '#G08#',
 'USID': '426',
 'Text': 'As a Developer, I want to get a Data Package into Node, so that I can start using the data for doing analysis and visualizations.',
 'Main Part': 'As a Developer, I want to get a Data Package into Node',
 'Benefit': 'I can start using the data for doing analysis and visualizations',
 'Triggers': {'Main Part': [['Developer', 'get']], 'Benefit': []},
 'Targets': {'Main Part': [['get', 'Data Package']],
  'Benefit': [['doing', 'analysis'],
   ['doing', 'visualizations'],
   ['start using', 'data']]},
 'Contains': {'Main Part': [['Node', 'Data Package']], 'Benefit': []}}

In [36]:
display(df_g08.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben..."
1,#G08#,427,"As a Researcher, I want to get a Data Package ...","As a Researcher, I want to get a Data Package ...",I can start using the data for doing analysis ...,"{'Main Part': [['Researcher', 'get']], 'Benefi...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Julia', 'Data Package']], 'Be..."
2,#G08#,428,"As a Publisher, I want to add type information...","As a Publisher, I want to add type information...",it is more useful to others and can be used be...,"{'Main Part': [['Publisher', 'add']], 'Benefit...","{'Main Part': [['add', 'type information']], '...","{'Main Part': [['data', 'type information']], ..."
3,#G08#,429,"As a Publisher, I want to be able to provide a...","As a Publisher, I want to be able to provide a...",I can provide my analysis and show my work to ...,"{'Main Part': [['Publisher', 'provide']], 'Ben...","{'Main Part': [['provide', 'visualization']], ...","{'Main Part': [['visualization', 'data']], 'Be..."
4,#G08#,430,"As a Researcher, I want to be able to save new...","As a Researcher, I want to be able to save new...",I can share them with others or include them i...,"{'Main Part': [['Researcher', 'save']], 'Benef...","{'Main Part': [['save', 'new visualizations']]...","{'Main Part': [], 'Benefit': []}"


In [37]:
display(df_g08_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben...",#G08#,427,"As a Researcher, I want to get a Data Package ...","As a Researcher, I want to get a Data Package ...",I can start using the data for doing analysis ...,"{'Main Part': [['Researcher', 'get']], 'Benefi...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Julia', 'Data Package']], 'Be..."
1,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben...",#G08#,428,"As a Publisher, I want to add type information...","As a Publisher, I want to add type information...",it is more useful to others and can be used be...,"{'Main Part': [['Publisher', 'add']], 'Benefit...","{'Main Part': [['add', 'type information']], '...","{'Main Part': [['data', 'type information']], ..."
2,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben...",#G08#,429,"As a Publisher, I want to be able to provide a...","As a Publisher, I want to be able to provide a...",I can provide my analysis and show my work to ...,"{'Main Part': [['Publisher', 'provide']], 'Ben...","{'Main Part': [['provide', 'visualization']], ...","{'Main Part': [['visualization', 'data']], 'Be..."
3,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben...",#G08#,430,"As a Researcher, I want to be able to save new...","As a Researcher, I want to be able to save new...",I can share them with others or include them i...,"{'Main Part': [['Researcher', 'save']], 'Benef...","{'Main Part': [['save', 'new visualizations']]...","{'Main Part': [], 'Benefit': []}"
4,#G08#,426,"As a Developer, I want to get a Data Package i...","As a Developer, I want to get a Data Package i...",I can start using the data for doing analysis ...,"{'Main Part': [['Developer', 'get']], 'Benefit...","{'Main Part': [['get', 'Data Package']], 'Bene...","{'Main Part': [['Node', 'Data Package']], 'Ben...",#G08#,431,"As a ResearcherPublisher, I want to know that ...","As a ResearcherPublisher, I want to know that ...",I can feel trust in the validity and usefulnes...,"{'Main Part': [['ResearcherPublisher', 'know']...","{'Main Part': [['know', 'data']], 'Benefit': [...","{'Main Part': [], 'Benefit': [['data.', 'usefu..."


### Process and Store the User Stories persistently

In [38]:
time_recorder = TimeRecorder()
time_recorders[ID_G08] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g08_pairs, key=ID_G08, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g08_pairs, key=ID_G08, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G10

In [39]:
ID_G10: str = "g10"
data_g10: list = datasets[ID_G10]
df_g10: pd.DataFrame = transform_2_dataframe(data_g10)
df_g10_pairs = transform_pairwise(df_g10)

In [40]:
datasets[ID_G10][0]

{'PID': '#G10#',
 'USID': '493',
 'Text': 'As a site member, I want to describe myself on my own page in a semi-structured way, so that others can learn about me.',
 'Main Part': 'As a site member, I want to describe myself on my own page in a semi-structured way',
 'Benefit': 'others can learn about me',
 'Triggers': {'Main Part': [['site member', 'describe']], 'Benefit': []},
 'Targets': {'Main Part': [['describe', 'myself']], 'Benefit': []},
 'Contains': {'Main Part': [],
  'Benefit': [['own page', 'semi-structured way']]}}

In [41]:
display(df_g10.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se..."
1,#G10#,494,"As a site member, I want to fill out an applic...","As a site member, I want to fill out an applic...",I can earn that designation,"{'Main Part': [['site member', 'fill out']], '...","{'Main Part': [['fill out', 'application']], '...",{'Main Part': [['Certified Scrum Practitioner'...
2,#G10#,495,"As a Practitioner, I want to include additiona...","As a Practitioner, I want to include additiona...",I can showcase my experience,"{'Main Part': [['Practitioner', 'include']], '...","{'Main Part': [['include', 'additional details...","{'Main Part': [['profile page', 'additional de..."
3,#G10#,496,"As a site member, I want to fill out an applic...","As a site member, I want to fill out an applic...",I can teach CSM and CSPO courses and certify o...,"{'Main Part': [['site member', 'fill out']], '...","{'Main Part': [['fill out', 'application']], '...","{'Main Part': [['Certified Scrum Trainer', 'ap..."
4,#G10#,497,"As a Trainer, I want to include additional det...","As a Trainer, I want to include additional det...",others can learn about me and decide if I am t...,"{'Main Part': [['Trainer', 'include']], 'Benef...","{'Main Part': [['include', 'additional details...","{'Main Part': [['profile page about me', 'addi..."


In [42]:
display(df_g10_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se...",#G10#,494,"As a site member, I want to fill out an applic...","As a site member, I want to fill out an applic...",I can earn that designation,"{'Main Part': [['site member', 'fill out']], '...","{'Main Part': [['fill out', 'application']], '...",{'Main Part': [['Certified Scrum Practitioner'...
1,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se...",#G10#,495,"As a Practitioner, I want to include additiona...","As a Practitioner, I want to include additiona...",I can showcase my experience,"{'Main Part': [['Practitioner', 'include']], '...","{'Main Part': [['include', 'additional details...","{'Main Part': [['profile page', 'additional de..."
2,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se...",#G10#,496,"As a site member, I want to fill out an applic...","As a site member, I want to fill out an applic...",I can teach CSM and CSPO courses and certify o...,"{'Main Part': [['site member', 'fill out']], '...","{'Main Part': [['fill out', 'application']], '...","{'Main Part': [['Certified Scrum Trainer', 'ap..."
3,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se...",#G10#,497,"As a Trainer, I want to include additional det...","As a Trainer, I want to include additional det...",others can learn about me and decide if I am t...,"{'Main Part': [['Trainer', 'include']], 'Benef...","{'Main Part': [['include', 'additional details...","{'Main Part': [['profile page about me', 'addi..."
4,#G10#,493,"As a site member, I want to describe myself on...","As a site member, I want to describe myself on...",others can learn about me,"{'Main Part': [['site member', 'describe']], '...","{'Main Part': [['describe', 'myself']], 'Benef...","{'Main Part': [], 'Benefit': [['own page', 'se...",#G10#,498,"As a Practitioner, I want to write an article ...","As a Practitioner, I want to write an article ...",others know my certifications when reading,"{'Main Part': [['Practitioner', 'write']], 'Be...","{'Main Part': [['write', 'article']], 'Benefit...","{'Main Part': [], 'Benefit': [['site', 'articl..."


### Process and Store the User Stories persistently

In [43]:
time_recorder = TimeRecorder()
time_recorders[ID_G10] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g10_pairs, key=ID_G10, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g10_pairs, key=ID_G10, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G11

In [44]:
ID_G11: str = "g11"
data_g11: list = datasets[ID_G11]
df_g11: pd.DataFrame = transform_2_dataframe(data_g11)
df_g11_pairs = transform_pairwise(df_g11)

In [45]:
datasets[ID_G11][0]

{'PID': '#G11#',
 'USID': '590',
 'Text': 'As a team member, I want to see the first iteration of beta up on cloud.gov.',
 'Main Part': '#G11# As a team member, I want to see the first iteration of beta up on cloud.gov.',
 'Benefit': '',
 'Triggers': {'Main Part': [['team member', 'see']], 'Benefit': []},
 'Targets': {'Main Part': [['see', 'first iteration']], 'Benefit': []},
 'Contains': {'Main Part': [['beta', 'first iteration']],
  'Benefit': [['cloud.gov', 'beta']]}}

In [46]:
display(df_g11.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '..."
1,#G11#,591,"As a developer, I want to have the subdomain b...","As a developer, I want to have the subdomain b...",I can deploy a beta site to it,"{'Main Part': [['developer', 'have']], 'Benefi...","{'Main Part': [['have', 'subdomain beta.nsf.go...","{'Main Part': [], 'Benefit': []}"
2,#G11#,592,"As a stakeholder, I want to have a decision on...","#G11# As a stakeholder, I want to have a decis...",,"{'Main Part': [['stakeholder', 'have']], 'Bene...","{'Main Part': [['have', 'decision']], 'Benefit...","{'Main Part': [], 'Benefit': [['beta', 'blog a..."
3,#G11#,593,"As an NSF employee, I want to understand the p...","#G11# As an NSF employee, I want to understand...",,"{'Main Part': [['NSF employee', 'understand']]...","{'Main Part': [['understand', 'process']], 'Be...","{'Main Part': [], 'Benefit': [['cloud.gov host..."
4,#G11#,594,"As a stakeholder, I want to see the results of...","As a stakeholder, I want to see the results of...",I can determine if the CMS is right for me,"{'Main Part': [['stakeholder', 'see']], 'Benef...","{'Main Part': [['see', 'results']], 'Benefit':...","{'Main Part': [['user research', 'results']], ..."


In [47]:
display(df_g11_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '...",#G11#,591,"As a developer, I want to have the subdomain b...","As a developer, I want to have the subdomain b...",I can deploy a beta site to it,"{'Main Part': [['developer', 'have']], 'Benefi...","{'Main Part': [['have', 'subdomain beta.nsf.go...","{'Main Part': [], 'Benefit': []}"
1,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '...",#G11#,592,"As a stakeholder, I want to have a decision on...","#G11# As a stakeholder, I want to have a decis...",,"{'Main Part': [['stakeholder', 'have']], 'Bene...","{'Main Part': [['have', 'decision']], 'Benefit...","{'Main Part': [], 'Benefit': [['beta', 'blog a..."
2,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '...",#G11#,593,"As an NSF employee, I want to understand the p...","#G11# As an NSF employee, I want to understand...",,"{'Main Part': [['NSF employee', 'understand']]...","{'Main Part': [['understand', 'process']], 'Be...","{'Main Part': [], 'Benefit': [['cloud.gov host..."
3,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '...",#G11#,594,"As a stakeholder, I want to see the results of...","As a stakeholder, I want to see the results of...",I can determine if the CMS is right for me,"{'Main Part': [['stakeholder', 'see']], 'Benef...","{'Main Part': [['see', 'results']], 'Benefit':...","{'Main Part': [['user research', 'results']], ..."
4,#G11#,590,"As a team member, I want to see the first iter...","#G11# As a team member, I want to see the firs...",,"{'Main Part': [['team member', 'see']], 'Benef...","{'Main Part': [['see', 'first iteration']], 'B...","{'Main Part': [['beta', 'first iteration']], '...",#G11#,595,"As a designer, I want to have a new social med...","As a designer, I want to have a new social med...",I can begin implementing design,"{'Main Part': [['designer', 'have']], 'Benefit...","{'Main Part': [['have', 'new social media wire...","{'Main Part': [['new social media wireframe', ..."


### Process and Store the User Stories persistently

In [48]:
time_recorder = TimeRecorder()
time_recorders[ID_G11] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g11_pairs, key=ID_G11, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g11_pairs, key=ID_G11, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G14

In [49]:
ID_G14: str = "g14"
data_g14: list = datasets[ID_G14]
df_g14: pd.DataFrame = transform_2_dataframe(data_g14)
df_g14_pairs = transform_pairwise(df_g14)

In [50]:
datasets[ID_G14][0]

{'PID': '#G14#',
 'USID': '717',
 'Text': 'As a Publisher, I want to publish a dataset, so that I can view just the dataset with a few people.',
 'Main Part': 'As a Publisher, I want to publish a dataset',
 'Benefit': 'I can view just the dataset with a few people',
 'Triggers': {'Main Part': [['Publisher', 'publish']], 'Benefit': []},
 'Targets': {'Main Part': [['publish', 'dataset']],
  'Benefit': [['view', 'dataset']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [51]:
display(df_g14.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}"
1,#G14#,718,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can share the dataset publicly with everyone,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': [['everyone', 'da..."
2,#G14#,719,"As a Publisher, I want to sign up for an accou...","As a Publisher, I want to sign up for an account",I can publish my data package to the registry ...,"{'Main Part': [['Publisher', 'sign up']], 'Ben...","{'Main Part': [['sign up', 'account']], 'Benef...","{'Main Part': [], 'Benefit': [['registry', 'da..."
3,#G14#,720,"As a Visitor, I want to sign up via github or ...","As a Visitor, I want to sign up via github or ...",I don't have to enter lots of information and ...,"{'Main Part': [['Visitor', 'sign up via']], 'B...","{'Main Part': [['sign up via', 'github'], ['si...","{'Main Part': [], 'Benefit': [['another websit..."
4,#G14#,721,"As a Publisher, I want to know what do next af...","As a Publisher, I want to know what do next af...",I can get going quickly,"{'Main Part': [['Publisher', 'know']], 'Benefi...","{'Main Part': [['know', 'what'], ['do next', '...","{'Main Part': [], 'Benefit': []}"


In [52]:
display(df_g14_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}",#G14#,718,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can share the dataset publicly with everyone,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': [['everyone', 'da..."
1,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}",#G14#,719,"As a Publisher, I want to sign up for an accou...","As a Publisher, I want to sign up for an account",I can publish my data package to the registry ...,"{'Main Part': [['Publisher', 'sign up']], 'Ben...","{'Main Part': [['sign up', 'account']], 'Benef...","{'Main Part': [], 'Benefit': [['registry', 'da..."
2,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}",#G14#,720,"As a Visitor, I want to sign up via github or ...","As a Visitor, I want to sign up via github or ...",I don't have to enter lots of information and ...,"{'Main Part': [['Visitor', 'sign up via']], 'B...","{'Main Part': [['sign up via', 'github'], ['si...","{'Main Part': [], 'Benefit': [['another websit..."
3,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}",#G14#,721,"As a Publisher, I want to know what do next af...","As a Publisher, I want to know what do next af...",I can get going quickly,"{'Main Part': [['Publisher', 'know']], 'Benefi...","{'Main Part': [['know', 'what'], ['do next', '...","{'Main Part': [], 'Benefit': []}"
4,#G14#,717,"As a Publisher, I want to publish a dataset, s...","As a Publisher, I want to publish a dataset",I can view just the dataset with a few people,"{'Main Part': [['Publisher', 'publish']], 'Ben...","{'Main Part': [['publish', 'dataset']], 'Benef...","{'Main Part': [], 'Benefit': []}",#G14#,722,"As an Admin, I want to invite someone to join ...","As an Admin, I want to invite someone to join ...",they can start contributing or using data,"{'Main Part': [['Admin', 'invite']], 'Benefit'...","{'Main Part': [['invite', 'someone']], 'Benefi...","{'Main Part': [], 'Benefit': [['platform', 'da..."


### Process and Store the User Stories persistently

In [53]:
time_recorder = TimeRecorder()
time_recorders[ID_G14] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g14_pairs, key=ID_G14, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g14_pairs, key=ID_G14, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G16

In [54]:
ID_G16: str = "g16"
data_g16: list = datasets[ID_G16]
df_g16: pd.DataFrame = transform_2_dataframe(data_g16)
df_g16_pairs = transform_pairwise(df_g16)

In [55]:
datasets[ID_G16][0]

{'PID': '#G16#',
 'USID': '784',
 'Text': 'As a collection curator, I want to set a date after which data will expire and be deleted or hidden then deleted.',
 'Main Part': '#G16# As a collection curator, I want to set a date after which data will expire and be deleted or hidden then deleted.',
 'Benefit': '',
 'Triggers': {'Main Part': [['collection curator', 'set']], 'Benefit': []},
 'Targets': {'Main Part': [['set', 'date']],
  'Benefit': [['deleted', 'data'], ['hidden', 'data'], ['deleted', 'data']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [56]:
display(df_g16.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}"
1,#G16#,785,"As a collection curator, I want to see the sch...","#G16# As a collection curator, I want to see t...",,"{'Main Part': [['collection curator', 'see']],...","{'Main Part': [['see', 'scheduled records dele...","{'Main Part': [['item', 'scheduled records del..."
2,#G16#,786,"As a repository manager, I want to track who i...","#G16# As a repository manager, I want to track...",,"{'Main Part': [['repository manager', 'track']...","{'Main Part': [['track', 'who'], ['track', 'wh...","{'Main Part': [], 'Benefit': [['repository', '..."
3,#G16#,787,"As a repository manager, I want to be able t r...","#G16# As a repository manager, I want to be ab...",,"{'Main Part': [['repository manager', 'run']],...","{'Main Part': [['run', 'report']], 'Benefit': ...","{'Main Part': [], 'Benefit': []}"
4,#G16#,788,"As a collection curator, I want to map an item...","As a collection curator, I want to map an item...",an item may administratively belong to one col...,"{'Main Part': [['collection curator', 'map']],...","{'Main Part': [['map', 'item'], ['map', 'items...","{'Main Part': [['collection', 'items']], 'Bene..."


In [57]:
display(df_g16_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}",#G16#,785,"As a collection curator, I want to see the sch...","#G16# As a collection curator, I want to see t...",,"{'Main Part': [['collection curator', 'see']],...","{'Main Part': [['see', 'scheduled records dele...","{'Main Part': [['item', 'scheduled records del..."
1,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}",#G16#,786,"As a repository manager, I want to track who i...","#G16# As a repository manager, I want to track...",,"{'Main Part': [['repository manager', 'track']...","{'Main Part': [['track', 'who'], ['track', 'wh...","{'Main Part': [], 'Benefit': [['repository', '..."
2,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}",#G16#,787,"As a repository manager, I want to be able t r...","#G16# As a repository manager, I want to be ab...",,"{'Main Part': [['repository manager', 'run']],...","{'Main Part': [['run', 'report']], 'Benefit': ...","{'Main Part': [], 'Benefit': []}"
3,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}",#G16#,788,"As a collection curator, I want to map an item...","As a collection curator, I want to map an item...",an item may administratively belong to one col...,"{'Main Part': [['collection curator', 'map']],...","{'Main Part': [['map', 'item'], ['map', 'items...","{'Main Part': [['collection', 'items']], 'Bene..."
4,#G16#,784,"As a collection curator, I want to set a date ...","#G16# As a collection curator, I want to set a...",,"{'Main Part': [['collection curator', 'set']],...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}",#G16#,789,"As a faculty member, I want to add a date deli...","As a faculty member, I want to add a date deli...",I can limit a search to return only results fr...,"{'Main Part': [['faculty member', 'add']], 'Be...","{'Main Part': [['add', 'date delimiter']], 'Be...","{'Main Part': [['search string', 'date delimit..."


### Process and Store the User Stories persistently

In [58]:
time_recorder = TimeRecorder()
time_recorders[ID_G16] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g16_pairs, key=ID_G16, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g16_pairs, key=ID_G16, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G18

In [59]:
ID_G18: str = "g18"
data_g18: list = datasets[ID_G18]
df_g18: pd.DataFrame = transform_2_dataframe(data_g18)
df_g18_pairs = transform_pairwise(df_g18)

In [60]:
datasets[ID_G18][0]

{'PID': '#G18#',
 'USID': '916',
 'Text': 'As a system administrator, I want to run a script that installs the Neurohub node onto a virgin Ubuntu operating system.',
 'Main Part': '#G18# As a system administrator, I want to run a script that installs the Neurohub node onto a virgin Ubuntu operating system.',
 'Benefit': '',
 'Triggers': {'Main Part': [['system administrator,', 'run']], 'Benefit': []},
 'Targets': {'Main Part': [['run', 'script']],
  'Benefit': [['installs', 'Neurohub node']]},
 'Contains': {'Main Part': [],
  'Benefit': [['virgin Ubuntu operating system', 'Neurohub node']]}}

In [61]:
display(df_g18.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ..."
1,#G18#,917,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator', 'run']...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': []}"
2,#G18#,918,"As a release engineer, I want to call a script...","#G18# As a release engineer, I want to call a ...",,"{'Main Part': [['release engineer', 'call'], [...","{'Main Part': [['call', 'script'], ['report ba...","{'Main Part': [], 'Benefit': []}"
3,#G18#,919,"As an investigator, I want to view the accepta...","As an investigator, I want to view the accepta...",I can understand how feature-complete the Neur...,"{'Main Part': [['investigator', 'view']], 'Ben...","{'Main Part': [['view', 'acceptance test resul...","{'Main Part': [], 'Benefit': []}"
4,#G18#,920,"As an MRI operator, I want to digitally record...","#G18# As an MRI operator, I want to digitally ...",,"{'Main Part': [['MRI operator', 'digitally rec...","{'Main Part': [['digitally record', 'responses...","{'Main Part': [['subjects', 'responses']], 'Be..."


In [62]:
display(df_g18_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ...",#G18#,917,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator', 'run']...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': []}"
1,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ...",#G18#,918,"As a release engineer, I want to call a script...","#G18# As a release engineer, I want to call a ...",,"{'Main Part': [['release engineer', 'call'], [...","{'Main Part': [['call', 'script'], ['report ba...","{'Main Part': [], 'Benefit': []}"
2,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ...",#G18#,919,"As an investigator, I want to view the accepta...","As an investigator, I want to view the accepta...",I can understand how feature-complete the Neur...,"{'Main Part': [['investigator', 'view']], 'Ben...","{'Main Part': [['view', 'acceptance test resul...","{'Main Part': [], 'Benefit': []}"
3,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ...",#G18#,920,"As an MRI operator, I want to digitally record...","#G18# As an MRI operator, I want to digitally ...",,"{'Main Part': [['MRI operator', 'digitally rec...","{'Main Part': [['digitally record', 'responses...","{'Main Part': [['subjects', 'responses']], 'Be..."
4,#G18#,916,"As a system administrator, I want to run a scr...","#G18# As a system administrator, I want to run...",,"{'Main Part': [['system administrator,', 'run'...","{'Main Part': [['run', 'script']], 'Benefit': ...","{'Main Part': [], 'Benefit': [['virgin Ubuntu ...",#G18#,921,"As a user, I want to navigate forwards and bac...","#G18# As a user, I want to navigate forwards a...",,"{'Main Part': [['user', 'navigate forwards and...",{'Main Part': [['navigate forwards and backwar...,"{'Main Part': [], 'Benefit': []}"


### Process and Store the User Stories persistently

In [63]:
time_recorder = TimeRecorder()
time_recorders[ID_G18] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g18_pairs, key=ID_G18, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g18_pairs, key=ID_G18, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G19

In [64]:
ID_G19: str = "g19"
data_g19: list = datasets[ID_G19]
df_g19: pd.DataFrame = transform_2_dataframe(data_g19)
df_g19_pairs = transform_pairwise(df_g19)

In [65]:
datasets[ID_G19][0]

{'PID': '#G19#',
 'USID': '1018',
 'Text': 'As an OlderPerson, I want to know exactly what ALFRED does with my personal data, and share it only on my specific permission.',
 'Main Part': '#G19# As an OlderPerson, I want to know exactly what ALFRED does with my personal data, and share it only on my specific permission.',
 'Benefit': '',
 'Triggers': {'Main Part': [['OlderPerson', 'know exactly']], 'Benefit': []},
 'Targets': {'Main Part': [['know exactly', 'what']],
  'Benefit': [['share', 'personal data']]},
 'Contains': {'Main Part': [], 'Benefit': [['ALFRED', 'personal data']]}}

In [66]:
display(df_g19.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers..."
1,#G19#,1019,"As an OlderPerson, I want to receive informati...","#G19# As an OlderPerson, I want to receive inf...",,"{'Main Part': [['OlderPerson', 'receive']], 'B...","{'Main Part': [['receive', 'information']], 'B...","{'Main Part': [['ALFRED', 'information']], 'Be..."
2,#G19#,1020,"As an OlderPerson, I want to use only well-vis...","#G19# As an OlderPerson, I want to use only we...",,"{'Main Part': [['OlderPerson', 'use only']], '...","{'Main Part': [['use only', 'well-visible butt...","{'Main Part': [], 'Benefit': []}"
3,#G19#,1021,"As an OlderPerson, I want to use ALFRED as muc...","#G19# As an OlderPerson, I want to use ALFRED ...",,"{'Main Part': [['OlderPerson', 'use']], 'Benef...","{'Main Part': [['use', 'ALFRED']], 'Benefit': []}","{'Main Part': [['ALFRED', 'speech interaction'..."
4,#G19#,1022,"As an OlderPerson, I want to always keep contr...","#G19# As an OlderPerson, I want to always keep...",,"{'Main Part': [['OlderPerson', 'always keep co...","{'Main Part': [['always keep control', 'activi...","{'Main Part': [['ALFRED', 'activities']], 'Ben..."


In [67]:
display(df_g19_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers...",#G19#,1019,"As an OlderPerson, I want to receive informati...","#G19# As an OlderPerson, I want to receive inf...",,"{'Main Part': [['OlderPerson', 'receive']], 'B...","{'Main Part': [['receive', 'information']], 'B...","{'Main Part': [['ALFRED', 'information']], 'Be..."
1,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers...",#G19#,1020,"As an OlderPerson, I want to use only well-vis...","#G19# As an OlderPerson, I want to use only we...",,"{'Main Part': [['OlderPerson', 'use only']], '...","{'Main Part': [['use only', 'well-visible butt...","{'Main Part': [], 'Benefit': []}"
2,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers...",#G19#,1021,"As an OlderPerson, I want to use ALFRED as muc...","#G19# As an OlderPerson, I want to use ALFRED ...",,"{'Main Part': [['OlderPerson', 'use']], 'Benef...","{'Main Part': [['use', 'ALFRED']], 'Benefit': []}","{'Main Part': [['ALFRED', 'speech interaction'..."
3,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers...",#G19#,1022,"As an OlderPerson, I want to always keep contr...","#G19# As an OlderPerson, I want to always keep...",,"{'Main Part': [['OlderPerson', 'always keep co...","{'Main Part': [['always keep control', 'activi...","{'Main Part': [['ALFRED', 'activities']], 'Ben..."
4,#G19#,1018,"As an OlderPerson, I want to know exactly what...","#G19# As an OlderPerson, I want to know exactl...",,"{'Main Part': [['OlderPerson', 'know exactly']...","{'Main Part': [['know exactly', 'what']], 'Ben...","{'Main Part': [], 'Benefit': [['ALFRED', 'pers...",#G19#,1023,"As an OlderPerson, I want to be able to manage...","#G19# As an OlderPerson, I want to be able to ...",,"{'Main Part': [['OlderPerson', 'manage']], 'Be...","{'Main Part': [['manage', 'privacy'], ['manage...","{'Main Part': [], 'Benefit': []}"


### Process and Store the User Stories persistently

In [68]:
time_recorder = TimeRecorder()
time_recorders[ID_G19] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g19_pairs, key=ID_G19, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g19_pairs, key=ID_G19, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G21

In [69]:
ID_G21: str = "g21"
data_g21: list = datasets[ID_G21]
df_g21: pd.DataFrame = transform_2_dataframe(data_g21)
df_g21_pairs = transform_pairwise(df_g21)

In [70]:
datasets[ID_G21][0]

{'PID': '#G21#',
 'USID': '1156',
 'Text': 'As a anonymoususer, I want to view a list of sponsors, so that I can thank all the awesome sponsors.',
 'Main Part': 'As a anonymoususer, I want to view a list of sponsors',
 'Benefit': 'I can thank all the awesome sponsors',
 'Triggers': {'Main Part': [['anonymoususer', 'view']], 'Benefit': []},
 'Targets': {'Main Part': [['view', 'list']],
  'Benefit': [['thank', 'awesome sponsors.']]},
 'Contains': {'Main Part': [['list', 'sponsors']], 'Benefit': []}}

In [71]:
display(df_g21.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit..."
1,#G21#,1157,"As a anonymoususer, I want to view a list of u...","As a anonymoususer, I want to view a list of u...",I know who is attending the conference,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'user profiles']], 'Be..."
2,#G21#,1158,"As a trainingcoordinator, I want to email all ...","As a trainingcoordinator, I want to email all ...",It is easier to use,"{'Main Part': [['trainingcoordinator', 'email'...","{'Main Part': [['email', 'trainers']], 'Benefi...","{'Main Part': [], 'Benefit': []}"
3,#G21#,1159,"As a attendee, I want to have a very clear map...","As a attendee, I want to have a very clear map...",I can make it to class on time,"{'Main Part': [['attendee', 'have']], 'Benefit...","{'Main Part': [['have', 'very clear map']], 'B...","{'Main Part': [['very clear map', 'buildings']..."
4,#G21#,1160,"As a trainer, I want to edit my training node ...","As a trainer, I want to edit my training node ...",the training coordinators don't have to and li...,"{'Main Part': [['trainer', 'edit']], 'Benefit'...","{'Main Part': [['edit', 'training node']], 'Be...","{'Main Part': [], 'Benefit': []}"


In [72]:
display(df_g21_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit...",#G21#,1157,"As a anonymoususer, I want to view a list of u...","As a anonymoususer, I want to view a list of u...",I know who is attending the conference,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'user profiles']], 'Be..."
1,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit...",#G21#,1158,"As a trainingcoordinator, I want to email all ...","As a trainingcoordinator, I want to email all ...",It is easier to use,"{'Main Part': [['trainingcoordinator', 'email'...","{'Main Part': [['email', 'trainers']], 'Benefi...","{'Main Part': [], 'Benefit': []}"
2,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit...",#G21#,1159,"As a attendee, I want to have a very clear map...","As a attendee, I want to have a very clear map...",I can make it to class on time,"{'Main Part': [['attendee', 'have']], 'Benefit...","{'Main Part': [['have', 'very clear map']], 'B...","{'Main Part': [['very clear map', 'buildings']..."
3,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit...",#G21#,1160,"As a trainer, I want to edit my training node ...","As a trainer, I want to edit my training node ...",the training coordinators don't have to and li...,"{'Main Part': [['trainer', 'edit']], 'Benefit'...","{'Main Part': [['edit', 'training node']], 'Be...","{'Main Part': [], 'Benefit': []}"
4,#G21#,1156,"As a anonymoususer, I want to view a list of s...","As a anonymoususer, I want to view a list of s...",I can thank all the awesome sponsors,"{'Main Part': [['anonymoususer', 'view']], 'Be...","{'Main Part': [['view', 'list']], 'Benefit': [...","{'Main Part': [['list', 'sponsors']], 'Benefit...",#G21#,1161,"As a trainee, I want to limit to one registrat...","As a trainee, I want to limit to one registrat...",we don't have duplicate spaces,"{'Main Part': [['trainee', 'limit']], 'Benefit...","{'Main Part': [['limit', 'one registration']],...","{'Main Part': [['day', 'one registration'], ['..."


### Process and Store the User Stories persistently

In [73]:
time_recorder = TimeRecorder()
time_recorders[ID_G21] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g21_pairs, key=ID_G21, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g21_pairs, key=ID_G21, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G22

In [74]:
ID_G22: str = "g22"
data_g22: list = datasets[ID_G22]
df_g22: pd.DataFrame = transform_2_dataframe(data_g22)
df_g22_pairs = transform_pairwise(df_g22)

In [75]:
datasets[ID_G22][0]

{'PID': '#G22#',
 'USID': '1225',
 'Text': 'As a PI, I want to properly record all metadata, so that I can ensure proper running of the project in case of staff changes.',
 'Main Part': 'As a PI, I want to properly record all metadata',
 'Benefit': 'I can ensure proper running of the project in case of staff changes',
 'Triggers': {'Main Part': [['PI', 'properly record']], 'Benefit': []},
 'Targets': {'Main Part': [['properly record', 'all metadata']],
  'Benefit': [['ensure', 'proper running'], ['changes', 'staff']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [76]:
display(df_g22.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}"
1,#G22#,1226,"As a researcher, I want to indicate where my d...","As a researcher, I want to indicate where my d...",it remains accessible during all stages of the...,"{'Main Part': [['researcher', 'indicate']], 'B...","{'Main Part': [['indicate', 'where'], ['indica...","{'Main Part': [], 'Benefit': [['where', 'data'..."
2,#G22#,1227,"As a researcher, I want to import metadata tha...","As a researcher, I want to import metadata tha...",I do not have to capture it again in a DMP,"{'Main Part': [['researcher', 'import']], 'Ben...","{'Main Part': [['import', 'metadata']], 'Benef...","{'Main Part': [], 'Benefit': []}"
3,#G22#,1228,"As a data manager, I want to know how the data...","As a data manager, I want to know how the data...",I can develop more detailed usage and support ...,"{'Main Part': [['data manager', 'know']], 'Ben...","{'Main Part': [['know', 'how']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}"
4,#G22#,1229,"As a repository owner, I want to be able to ch...","As a repository owner, I want to be able to ch...",I can calculate necessary submission fees and ...,"{'Main Part': [['repository owner', 'check']],...","{'Main Part': [['check', 'planned provenance']...","{'Main Part': [['data', 'planned provenance']]..."


In [77]:
display(df_g22_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}",#G22#,1226,"As a researcher, I want to indicate where my d...","As a researcher, I want to indicate where my d...",it remains accessible during all stages of the...,"{'Main Part': [['researcher', 'indicate']], 'B...","{'Main Part': [['indicate', 'where'], ['indica...","{'Main Part': [], 'Benefit': [['where', 'data'..."
1,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}",#G22#,1227,"As a researcher, I want to import metadata tha...","As a researcher, I want to import metadata tha...",I do not have to capture it again in a DMP,"{'Main Part': [['researcher', 'import']], 'Ben...","{'Main Part': [['import', 'metadata']], 'Benef...","{'Main Part': [], 'Benefit': []}"
2,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}",#G22#,1228,"As a data manager, I want to know how the data...","As a data manager, I want to know how the data...",I can develop more detailed usage and support ...,"{'Main Part': [['data manager', 'know']], 'Ben...","{'Main Part': [['know', 'how']], 'Benefit': [[...","{'Main Part': [], 'Benefit': []}"
3,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}",#G22#,1229,"As a repository owner, I want to be able to ch...","As a repository owner, I want to be able to ch...",I can calculate necessary submission fees and ...,"{'Main Part': [['repository owner', 'check']],...","{'Main Part': [['check', 'planned provenance']...","{'Main Part': [['data', 'planned provenance']]..."
4,#G22#,1225,"As a PI, I want to properly record all metadat...","As a PI, I want to properly record all metadata",I can ensure proper running of the project in ...,"{'Main Part': [['PI', 'properly record']], 'Be...","{'Main Part': [['properly record', 'all metada...","{'Main Part': [], 'Benefit': []}",#G22#,1230,"As an IT manager, I want to know about IT reso...","As an IT manager, I want to know about IT reso...",I can enable resource acquisition planning,"{'Main Part': [['IT manager', 'know']], 'Benef...","{'Main Part': [['know', 'IT resource requireme...","{'Main Part': [['project lifecycle', 'IT resou..."


### Process and Store the User Stories persistently

In [78]:
time_recorder = TimeRecorder()
time_recorders[ID_G22] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g22_pairs, key=ID_G22, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g22_pairs, key=ID_G22, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G23

In [79]:
ID_G23: str = "g23"
data_g23: list = datasets[ID_G23]
df_g23: pd.DataFrame = transform_2_dataframe(data_g23)
df_g23_pairs = transform_pairwise(df_g23)

In [80]:
datasets[ID_G23][0]

{'PID': '#G23#',
 'USID': '1308',
 'Text': 'As a researcher, I want to be able to view a finding aid for a particular collection/Resource.',
 'Main Part': '#G23# As a researcher, I want to be able to view a finding aid for a particular collection/Resource.',
 'Benefit': '',
 'Triggers': {'Main Part': [['researcher', 'view']], 'Benefit': []},
 'Targets': {'Main Part': [['view', 'finding aid']], 'Benefit': []},
 'Contains': {'Main Part': [['particular collection', 'finding aid'],
   ['Resource', 'finding aid']],
  'Benefit': []}}

In [81]:
display(df_g23.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find..."
1,#G23#,1309,"As an Application Administrator, I want to be ...","#G23# As an Application Administrator, I want ...",,"{'Main Part': [['Application Administrator', '...","{'Main Part': [['reset', 'User's password']], ...","{'Main Part': [], 'Benefit': []}"
2,#G23#,1310,"As a user, I want backend changes for managing...","#G23# As a user, I want backend changes for ma...",,"{'Main Part': [['user', 'want']], 'Benefit': []}","{'Main Part': [['want', 'backend changes']], '...","{'Main Part': [], 'Benefit': []}"
3,#G23#,1311,"As an Archivist, I want to import EAD files th...","#G23# As an Archivist, I want to import EAD fi...",,"{'Main Part': [['Archivist', 'import']], 'Bene...","{'Main Part': [['import', 'EAD files']], 'Bene...","{'Main Part': [], 'Benefit': []}"
4,#G23#,1312,"As an Archivist, I want to assign a component ...","#G23# As an Archivist, I want to assign a comp...",,"{'Main Part': [['Archivist', 'assign']], 'Bene...","{'Main Part': [['assign', 'component unique id...","{'Main Part': [['Resource Component', 'compone..."


In [82]:
display(df_g23_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find...",#G23#,1309,"As an Application Administrator, I want to be ...","#G23# As an Application Administrator, I want ...",,"{'Main Part': [['Application Administrator', '...","{'Main Part': [['reset', 'User's password']], ...","{'Main Part': [], 'Benefit': []}"
1,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find...",#G23#,1310,"As a user, I want backend changes for managing...","#G23# As a user, I want backend changes for ma...",,"{'Main Part': [['user', 'want']], 'Benefit': []}","{'Main Part': [['want', 'backend changes']], '...","{'Main Part': [], 'Benefit': []}"
2,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find...",#G23#,1311,"As an Archivist, I want to import EAD files th...","#G23# As an Archivist, I want to import EAD fi...",,"{'Main Part': [['Archivist', 'import']], 'Bene...","{'Main Part': [['import', 'EAD files']], 'Bene...","{'Main Part': [], 'Benefit': []}"
3,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find...",#G23#,1312,"As an Archivist, I want to assign a component ...","#G23# As an Archivist, I want to assign a comp...",,"{'Main Part': [['Archivist', 'assign']], 'Bene...","{'Main Part': [['assign', 'component unique id...","{'Main Part': [['Resource Component', 'compone..."
4,#G23#,1308,"As a researcher, I want to be able to view a f...","#G23# As a researcher, I want to be able to vi...",,"{'Main Part': [['researcher', 'view']], 'Benef...","{'Main Part': [['view', 'finding aid']], 'Bene...","{'Main Part': [['particular collection', 'find...",#G23#,1313,"As a User , I want to authenticate using LDAP.","#G23# As a User , I want to authenticate using...",,"{'Main Part': [['User', 'authenticate']], 'Ben...","{'Main Part': [], 'Benefit': [['using', 'LDAP']]}","{'Main Part': [], 'Benefit': []}"


### Process and Store the User Stories persistently

In [83]:
time_recorder = TimeRecorder()
time_recorders[ID_G23] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g23_pairs, key=ID_G23, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g23_pairs, key=ID_G23, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G24

In [84]:
ID_G24: str = "g24"
data_g24: list = datasets[ID_G24]
df_g24: pd.DataFrame = transform_2_dataframe(data_g24)
df_g24_pairs = transform_pairwise(df_g24)

In [85]:
datasets[ID_G24][0]

{'PID': '#G24#',
 'USID': '1365',
 'Text': "As a depositor, I want to deposit and maintain datasets through a simple web interface, so that I don't need to install and learn new software to deposit.",
 'Main Part': 'As a depositor, I want to deposit and maintain datasets through a simple web interface',
 'Benefit': "I don't need to install and learn new software to deposit",
 'Triggers': {'Main Part': [['depositor', 'deposit'],
   ['depositor', 'maintain']],
  'Benefit': []},
 'Targets': {'Main Part': [['deposit', 'datasets'], ['maintain', 'datasets']],
  'Benefit': []},
 'Contains': {'Main Part': [['simple web interface', 'datasets']],
  'Benefit': []}}

In [86]:
display(df_g24.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas..."
1,#G24#,1366,"As a depositor, I want to have a user interfac...","As a depositor, I want to have a user interfac...",I feel like all the University systems are joi...,"{'Main Part': [['depositor', 'have']], 'Benefi...","{'Main Part': [['have', 'user interface']], 'B...","{'Main Part': [], 'Benefit': []}"
2,#G24#,1367,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I have a single onestop shop for managing my r...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['Pure', 'datasets']], 'Benefit..."
3,#G24#,1368,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I can continue to work with tools with which I...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['maintain', 'datasets'], ['dep...",{'Main Part': [['Virtual Research Environments...
4,#G24#,1369,"As a depositor, I want to deposit the files th...","As a depositor, I want to deposit the files th...",I don't have to spend a lot of time finding th...,"{'Main Part': [['depositor', 'deposit']], 'Ben...","{'Main Part': [['deposit', 'files']], 'Benefit...","{'Main Part': [], 'Benefit': []}"


In [87]:
display(df_g24_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas...",#G24#,1366,"As a depositor, I want to have a user interfac...","As a depositor, I want to have a user interfac...",I feel like all the University systems are joi...,"{'Main Part': [['depositor', 'have']], 'Benefi...","{'Main Part': [['have', 'user interface']], 'B...","{'Main Part': [], 'Benefit': []}"
1,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas...",#G24#,1367,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I have a single onestop shop for managing my r...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['Pure', 'datasets']], 'Benefit..."
2,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas...",#G24#,1368,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I can continue to work with tools with which I...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['maintain', 'datasets'], ['dep...",{'Main Part': [['Virtual Research Environments...
3,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas...",#G24#,1369,"As a depositor, I want to deposit the files th...","As a depositor, I want to deposit the files th...",I don't have to spend a lot of time finding th...,"{'Main Part': [['depositor', 'deposit']], 'Ben...","{'Main Part': [['deposit', 'files']], 'Benefit...","{'Main Part': [], 'Benefit': []}"
4,#G24#,1365,"As a depositor, I want to deposit and maintain...","As a depositor, I want to deposit and maintain...",I don't need to install and learn new software...,"{'Main Part': [['depositor', 'deposit'], ['dep...","{'Main Part': [['deposit', 'datasets'], ['main...","{'Main Part': [['simple web interface', 'datas...",#G24#,1370,"As a depositor, I want to place data under an ...","As a depositor, I want to place data under an ...","my right of first use is protected, and I can ...","{'Main Part': [['depositor', 'place']], 'Benef...","{'Main Part': [['place', 'data']], 'Benefit': ...","{'Main Part': [['data', 'embargo']], 'Benefit'..."


### Process and Store the User Stories persistently

In [88]:
time_recorder = TimeRecorder()
time_recorders[ID_G24] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g24_pairs, key=ID_G24, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g24_pairs, key=ID_G24, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G25

In [89]:
ID_G25: str = "g25"
data_g25: list = datasets[ID_G25]
df_g25: pd.DataFrame = transform_2_dataframe(data_g25)
df_g25_pairs = transform_pairwise(df_g25)

In [90]:
datasets[ID_G25][0]

{'PID': '#G25#',
 'USID': '1418',
 'Text': 'As a repository manager, I want to know all the collections and objects in the DAMS for which I have custodial responsibility.',
 'Main Part': '#G25# As a repository manager, I want to know all the collections and objects in the DAMS for which I have custodial responsibility.',
 'Benefit': '',
 'Triggers': {'Main Part': [['repository manager', 'know']], 'Benefit': []},
 'Targets': {'Main Part': [['know', 'collections'], ['know', 'objects']],
  'Benefit': []},
 'Contains': {'Main Part': [['DAMS', 'objects'], ['DAMS', 'collections']],
  'Benefit': []}}

In [91]:
display(df_g25.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '..."
1,#G25#,1419,"As a repository manager, I want to be able to ...","#G25# As a repository manager, I want to be ab...",,"{'Main Part': [['repository manager', 'update'...","{'Main Part': [['update', 'repository informat...","{'Main Part': [['one place', 'repository infor..."
2,#G25#,1420,"As a repository manager, I want to constrain c...","#G25# As a repository manager, I want to const...",,"{'Main Part': [['repository manager', 'constra...","{'Main Part': [['constrain', 'curatorial acces...","{'Main Part': [['units assets', 'curatorial ac..."
3,#G25#,1421,"As a user, I want to list all the collections ...","#G25# As a user, I want to list all the collec...",,"{'Main Part': [['user', 'list']], 'Benefit': []}","{'Main Part': [['list', 'collections'], ['list...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '..."
4,#G25#,1422,"As a user, I want to be able to contact reposi...","#G25# As a user, I want to be able to contact ...",,"{'Main Part': [['user', 'contact']], 'Benefit'...","{'Main Part': [['contact', 'repositories']], '...","{'Main Part': [], 'Benefit': []}"


In [92]:
display(df_g25_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '...",#G25#,1419,"As a repository manager, I want to be able to ...","#G25# As a repository manager, I want to be ab...",,"{'Main Part': [['repository manager', 'update'...","{'Main Part': [['update', 'repository informat...","{'Main Part': [['one place', 'repository infor..."
1,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '...",#G25#,1420,"As a repository manager, I want to constrain c...","#G25# As a repository manager, I want to const...",,"{'Main Part': [['repository manager', 'constra...","{'Main Part': [['constrain', 'curatorial acces...","{'Main Part': [['units assets', 'curatorial ac..."
2,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '...",#G25#,1421,"As a user, I want to list all the collections ...","#G25# As a user, I want to list all the collec...",,"{'Main Part': [['user', 'list']], 'Benefit': []}","{'Main Part': [['list', 'collections'], ['list...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '..."
3,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '...",#G25#,1422,"As a user, I want to be able to contact reposi...","#G25# As a user, I want to be able to contact ...",,"{'Main Part': [['user', 'contact']], 'Benefit'...","{'Main Part': [['contact', 'repositories']], '...","{'Main Part': [], 'Benefit': []}"
4,#G25#,1418,"As a repository manager, I want to know all th...","#G25# As a repository manager, I want to know ...",,"{'Main Part': [['repository manager', 'know']]...","{'Main Part': [['know', 'collections'], ['know...","{'Main Part': [['DAMS', 'objects'], ['DAMS', '...",#G25#,1423,"As the DAMS manager, I want to be able to list...","#G25# As the DAMS manager, I want to be able t...",,"{'Main Part': [['DAMS manager', 'list']], 'Ben...","{'Main Part': [['browsing', 'all formal collec...","{'Main Part': [['DAMS', 'all formal collection..."


### Process and Store the User Stories persistently

In [93]:
time_recorder = TimeRecorder()
time_recorders[ID_G25] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g25_pairs, key=ID_G25, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g25_pairs, key=ID_G25, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G26

In [94]:
ID_G26: str = "g26"
data_g26: list = datasets[ID_G26]
df_g26: pd.DataFrame = transform_2_dataframe(data_g26)
df_g26_pairs = transform_pairwise(df_g26)

In [95]:
datasets[ID_G26][0]

{'PID': '#G26#',
 'USID': '1518',
 'Text': 'As an archivist, I want to apply a license or rights statement, so that I know what I can do with a file.',
 'Main Part': 'As an archivist, I want to apply a license or rights statement',
 'Benefit': 'I know what I can do with a file',
 'Triggers': {'Main Part': [['archivist', 'apply']], 'Benefit': []},
 'Targets': {'Main Part': [['apply', 'license'],
   ['apply', 'rights statement']],
  'Benefit': [['know', 'what'], ['do', 'file']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [96]:
display(df_g26.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}"
1,#G26#,1519,"As an archivist, I want to mark individual fil...","As an archivist, I want to mark individual fil...",they don't get shared by mistake,"{'Main Part': [['archivist', 'mark']], 'Benefi...","{'Main Part': [['mark', 'individual files']], ...","{'Main Part': [], 'Benefit': []}"
2,#G26#,1520,"As an archivist, I want to mark groups of file...","As an archivist, I want to mark groups of file...",they don't get shared by mistake,"{'Main Part': [['archivist', 'mark']], 'Benefi...","{'Main Part': [['mark', 'groups']], 'Benefit':...","{'Main Part': [['groups', 'files']], 'Benefit'..."
3,#G26#,1521,"As an archivist, I want to restrict a file fro...","#G26# As an archivist, I want to restrict a fi...",,"{'Main Part': [['archivist', 'restrict']], 'Be...","{'Main Part': [['restrict', 'file']], 'Benefit...","{'Main Part': [], 'Benefit': []}"
4,#G26#,1522,"As an archivist, I want to restrict a group of...","#G26# As an archivist, I want to restrict a gr...",,"{'Main Part': [['archivist', 'restrict']], 'Be...","{'Main Part': [['restrict', 'group']], 'Benefi...","{'Main Part': [['group', 'files']], 'Benefit':..."


In [97]:
display(df_g26_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}",#G26#,1519,"As an archivist, I want to mark individual fil...","As an archivist, I want to mark individual fil...",they don't get shared by mistake,"{'Main Part': [['archivist', 'mark']], 'Benefi...","{'Main Part': [['mark', 'individual files']], ...","{'Main Part': [], 'Benefit': []}"
1,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}",#G26#,1520,"As an archivist, I want to mark groups of file...","As an archivist, I want to mark groups of file...",they don't get shared by mistake,"{'Main Part': [['archivist', 'mark']], 'Benefi...","{'Main Part': [['mark', 'groups']], 'Benefit':...","{'Main Part': [['groups', 'files']], 'Benefit'..."
2,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}",#G26#,1521,"As an archivist, I want to restrict a file fro...","#G26# As an archivist, I want to restrict a fi...",,"{'Main Part': [['archivist', 'restrict']], 'Be...","{'Main Part': [['restrict', 'file']], 'Benefit...","{'Main Part': [], 'Benefit': []}"
3,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}",#G26#,1522,"As an archivist, I want to restrict a group of...","#G26# As an archivist, I want to restrict a gr...",,"{'Main Part': [['archivist', 'restrict']], 'Be...","{'Main Part': [['restrict', 'group']], 'Benefi...","{'Main Part': [['group', 'files']], 'Benefit':..."
4,#G26#,1518,"As an archivist, I want to apply a license or ...","As an archivist, I want to apply a license or ...",I know what I can do with a file,"{'Main Part': [['archivist', 'apply']], 'Benef...","{'Main Part': [['apply', 'license'], ['apply',...","{'Main Part': [], 'Benefit': []}",#G26#,1523,"As an archivist, I want to set a date on which...","As an archivist, I want to set a date on which...",I don't have to update this by hand,"{'Main Part': [['archivist', 'set']], 'Benefit...","{'Main Part': [['set', 'date']], 'Benefit': [[...","{'Main Part': [], 'Benefit': [['file', 'restri..."


### Process and Store the User Stories persistently

In [98]:
time_recorder = TimeRecorder()
time_recorders[ID_G26] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g26_pairs, key=ID_G26, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g26_pairs, key=ID_G26, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G27

In [99]:
ID_G27: str = "g27"
data_g27: list = datasets[ID_G27]
df_g27: pd.DataFrame = transform_2_dataframe(data_g27)
df_g27_pairs = transform_pairwise(df_g27)

In [100]:
datasets[ID_G27][0]

{'PID': '#G27#',
 'USID': '1618',
 'Text': 'As a faculty member, I want to access a collection within the repository, so that I can obtain research materials.',
 'Main Part': 'As a faculty member, I want to access a collection within the repository',
 'Benefit': 'I can obtain research materials',
 'Triggers': {'Main Part': [['faculty member', 'access']], 'Benefit': []},
 'Targets': {'Main Part': [['access', 'collection']],
  'Benefit': [['obtain', 'research materials']]},
 'Contains': {'Main Part': [['repository', 'collection']], 'Benefit': []}}

In [101]:
display(df_g27.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ..."
1,#G27#,1619,"As a faculty member, I want to direct students...","As a faculty member, I want to direct students...",I can facilitate my teaching (i.e. desires abi...,"{'Main Part': [['faculty member', 'direct']], ...","{'Main Part': [['direct', 'students']], 'Benef...","{'Main Part': [['students', 'item']], 'Benefit..."
2,#G27#,1620,"As a Cornell faculty member, I want to upload ...","As a Cornell faculty member, I want to upload ...",I receive attribution for my data as well as m...,"{'Main Part': [['Cornell faculty member', 'upl...","{'Main Part': [['upload', 'data'], ['associate...","{'Main Part': [], 'Benefit': [['repository', '..."
3,#G27#,1621,"As a patron, I want to know which reports/data...","As a patron, I want to know which reports/data...",I can access latest report/dataset,"{'Main Part': [['patron', 'know']], 'Benefit':...","{'Main Part': [['know', 'reports'], ['know', '...","{'Main Part': [], 'Benefit': []}"
4,#G27#,1622,"As a patron, I want to select/deselect reports...","#G27# As a patron, I want to select/deselect r...",,"{'Main Part': [['patron', 'select'], ['patron'...","{'Main Part': [['select', 'reports'], ['select...","{'Main Part': [['reports', 'new versions'], ['..."


In [102]:
display(df_g27_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ...",#G27#,1619,"As a faculty member, I want to direct students...","As a faculty member, I want to direct students...",I can facilitate my teaching (i.e. desires abi...,"{'Main Part': [['faculty member', 'direct']], ...","{'Main Part': [['direct', 'students']], 'Benef...","{'Main Part': [['students', 'item']], 'Benefit..."
1,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ...",#G27#,1620,"As a Cornell faculty member, I want to upload ...","As a Cornell faculty member, I want to upload ...",I receive attribution for my data as well as m...,"{'Main Part': [['Cornell faculty member', 'upl...","{'Main Part': [['upload', 'data'], ['associate...","{'Main Part': [], 'Benefit': [['repository', '..."
2,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ...",#G27#,1621,"As a patron, I want to know which reports/data...","As a patron, I want to know which reports/data...",I can access latest report/dataset,"{'Main Part': [['patron', 'know']], 'Benefit':...","{'Main Part': [['know', 'reports'], ['know', '...","{'Main Part': [], 'Benefit': []}"
3,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ...",#G27#,1622,"As a patron, I want to select/deselect reports...","#G27# As a patron, I want to select/deselect r...",,"{'Main Part': [['patron', 'select'], ['patron'...","{'Main Part': [['select', 'reports'], ['select...","{'Main Part': [['reports', 'new versions'], ['..."
4,#G27#,1618,"As a faculty member, I want to access a collec...","As a faculty member, I want to access a collec...",I can obtain research materials,"{'Main Part': [['faculty member', 'access']], ...","{'Main Part': [['access', 'collection']], 'Ben...","{'Main Part': [['repository', 'collection']], ...",#G27#,1623,"As a DB/IR administrator, I want to mark repor...","#G27# As a DB/IR administrator, I want to mark...",,"{'Main Part': [['DB/IR administrator', 'mark']...","{'Main Part': [['mark', 'report'], ['mark', 'd...","{'Main Part': [['report', 'error'], ['dataset'..."


### Process and Store the User Stories persistently

In [103]:
time_recorder = TimeRecorder()
time_recorders[ID_G27] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g27_pairs, key=ID_G27, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g27_pairs, key=ID_G27, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Data processing for: G28

In [104]:
ID_G28: str = "g28"
data_g28: list = datasets[ID_G28]
df_g28: pd.DataFrame = transform_2_dataframe(data_g28)
df_g28_pairs = transform_pairwise(df_g28)

In [105]:
datasets[ID_G28][0]

{'PID': '#G28#',
 'USID': '1733',
 'Text': 'As a user, I want to search and discover music using content identification or fingerprinting.',
 'Main Part': '#G28# As a user, I want to search and discover music using content identification or fingerprinting.',
 'Benefit': '',
 'Triggers': {'Main Part': [['user', 'search'], ['user', 'discover']],
  'Benefit': []},
 'Targets': {'Main Part': [['search', 'music'], ['discover', 'music']],
  'Benefit': [['using', 'content identification'],
   ['using', 'fingerprinting']]},
 'Contains': {'Main Part': [], 'Benefit': []}}

In [106]:
display(df_g28.head(5))

Unnamed: 0,PID,USID,Text,Main Part,Benefit,Triggers,Targets,Contains
0,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}"
1,#G28#,1734,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}"
2,#G28#,1735,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['discover', 'music'], ['search...",{'Main Part': [['other users similar to myself...
3,#G28#,1736,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['discover', 'music'], ['discov...","{'Main Part': [], 'Benefit': [['given music vi..."
4,#G28#,1737,"As a user, I want to search and discover music...","As a user, I want to search and discover music...","I find related artists, new genre, or other re...","{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}"


In [107]:
display(df_g28_pairs.head())

Unnamed: 0,First PID,First USID,First Text,First Main Part,First Benefit,First Triggers,First Targets,First Contains,Second PID,Second USID,Second Text,Second Main Part,Second Benefit,Second Triggers,Second Targets,Second Contains
0,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}",#G28#,1734,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}"
1,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}",#G28#,1735,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['discover', 'music'], ['search...",{'Main Part': [['other users similar to myself...
2,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}",#G28#,1736,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['discover', 'music'], ['discov...","{'Main Part': [], 'Benefit': [['given music vi..."
3,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}",#G28#,1737,"As a user, I want to search and discover music...","As a user, I want to search and discover music...","I find related artists, new genre, or other re...","{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}"
4,#G28#,1733,"As a user, I want to search and discover music...","#G28# As a user, I want to search and discover...",,"{'Main Part': [['user', 'search'], ['user', 'd...","{'Main Part': [['search', 'music'], ['discover...","{'Main Part': [], 'Benefit': []}",#G28#,1738,"As a user, I want to upload tagged videos and ...","#G28# As a user, I want to upload tagged video...",,"{'Main Part': [['user', 'upload'], ['user', 'h...","{'Main Part': [['upload', 'tagged videos'], ['...",{'Main Part': [['all further processing steps'...


### Process and Store the User Stories persistently

In [108]:
time_recorder = TimeRecorder()
time_recorders[ID_G28] = time_recorder

if THREADING:
    process_user_stories_parallel(index_usid1=1, index_usid2=9, message=message, pairs=df_g28_pairs, key=ID_G28, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories,
                                  sort_threaded_results=sort_threaded_results, json_schema=chat_gpt_schema_with_annotations, redundancy_prefix="annoations", time_recorder=time_recorder)
else:
    process_user_stories(index_usid1=1, index_usid2=9, message=message, pairs=df_g28_pairs, key=ID_G28, model_version_name=MODEL_VERSION_NAME, template_request_two_user_stories=template_request_two_user_stories, 
                         json_validation=json_validator, redundancy_prefix="annoations", time_recorder=time_recorder)

## Save total Speeds

In [109]:
SHEET_NAME = 'Time Consup. Anlys.S.'

In [110]:
base_path: str = os.getcwd()
path_to_file = os.path.join(base_path, "results")
path_to_file = os.path.join(path_to_file, "redundancy-model-" + MODEL_VERSION_NAME)

count_runs_per_data_set: dict[str, int] = {}
for key in time_recorders.keys():
        idx = 0
        _ =  f"{idx:02d}_{key}.json"
        while os.path.exists(os.path.join(path_to_file, f"{idx:02d}_{key}.json")):
                idx += 1
        count_runs_per_data_set[key] = idx

entries: list[tuple[int,str,str, int, float, float, float]] = []

entry: tuple[int,str,str, int, float, float, float] = None
for key, value in time_recorders.items():
        entry = (key, count_runs_per_data_set[key], MODEL_VERSION_NAME, str(THREADING).lower(), 
                 value.nanoseconds, value.milliseconds, value.seconds, value.minutes)
        entries.append(entry)

columns = ['Dataset', 'Run Count', 'Model Version', 'Threading Enabled', 
           'Nanoseconds', 'Milliseconds', 'Seconds', 'Minutes']

_file_path: str = os.path.join(base_path, os.getenv('OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS'))
check = os.path.exists(_file_path)

old_time_consuption_data: pd.DataFrame = None
if check:
        try:
                old_time_consuption_data = pd.read_excel(_file_path, SHEET_NAME)
        except ValueError:
                check = False
           
time_consumption_data: pd.DataFrame = pd.DataFrame(entries, columns=columns)

if check and not old_time_consuption_data.empty:
        # Checking if in the excel is already the data. Case: this code is executed twice for the same data
        old_time_consuption_data = old_time_consuption_data.dropna()
        for idx in range(len(old_time_consuption_data)):
                condition = time_consumption_data[
                        (time_consumption_data['Dataset'] == old_time_consuption_data.iat[idx, 0]) & 
                        (time_consumption_data['Run Count'] == old_time_consuption_data.iat[idx, 1])
                ].index
                time_consumption_data = time_consumption_data.drop(condition).reset_index(drop=True)
        time_consumption_data = pd.concat([old_time_consuption_data, time_consumption_data]).reset_index(drop=True)

In [111]:
def formatter_time(wb: Workbook, sheet_name: str): 
    ws = wb[sheet_name]
    header_font = Font(size=14, bold=True)
    for cell in ws["1:1"]:
        cell.font = header_font
    
    ADDITIONAL_LENGTH: int = 0
    ADJUSTED_WIDTH: int = 0
    MAX_LEN: int = 0
    for col in ws.iter_cols(min_row=1, max_row=1):
        for cell in col:
            MAX_LEN = len(str(cell.value))
            ADDITIONAL_LENGTH = (MAX_LEN + 2)
            ADJUSTED_WIDTH = 0
            ADJUSTED_WIDTH =  ADDITIONAL_LENGTH * 1.5
            ws.column_dimensions[utils_get_column_letter(cell.column)].width = ADJUSTED_WIDTH
            
    alignment = Alignment(vertical='center', horizontal='left')
    for row in ws.iter_rows():
        for cell in row:
            cell.alignment = alignment

    num_columns = ws.max_column
    header_range = f"A1:{utils_get_column_letter(num_columns)}1"
    ws.auto_filter.ref = header_range
    ws.freeze_panes = ws['A2']

    wrap_alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')
    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = wrap_alignment

In [112]:
save_to_excel(time_consumption_data, formatter_time, SHEET_NAME, os.getenv("OUTPUT_EXCEL_NAME_WITH_ANNOTATIONS"))