# With ChatModel

In [None]:
import json
import helper
import pandas as pd

from pprint import pprint

from datetime import datetime

from langchain.chat_models import ChatOpenAI
from langchain.prompts import load_prompt
from langchain.schema import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [None]:
def save_result(algorithm, output, prompt_type, model, init, best_practice, format_prompt, output_parser, ticketUri, ticket, reruns=0, evalMode=False):
    now = datetime.now()
    date = now.strftime("%Y-%m-%d")
    time = now.strftime("%H:%M:%S")

    result = {}
    result['model'] = model
    result['creation_timestamp'] = date + ' ' + time
    result['prompts'] = {
        'init_uri': init,
        'best_practice_uri': best_practice,
        'formatter_uri': format_prompt}
    result['output_parser'] = output_parser
    result['input_data'] = {
        'ticket_uri': ticketUri,
        'jira': ticket['Jira'],
        'id': ticket['IssueId'],
        'evolution': ticket['EvoId']}
    result['reruns'] = reruns
    result['output'] = output

    helper.annotateResult(result, ticket, prompt_type)

    output_name = 'output_'+date+'_'+time+'.json'
    
    if evalMode:
        with open('evaluation/' + prompt_type + '/' + model + '/' + algorithm + "/" + output_name, 'w') as f:
            json.dump(result, f, indent=4)
        print('Evaluation mode: result saved in evaluation folder')
    else:
        with open('results/' + prompt_type + '/' + output_name, 'w') as f:
            json.dump(result, f, indent=4)

def prepareConsensusTask(sample):
    answers = ""
    i = 1
    for index, row in sample.iterrows():
        answers += f"Summary {i} from prompt type '{row['prompt']}': {row['summary_new']}\n"
        i += 1
    
    return answers

def saveSelection(algorithm, output, prompt_type, model, consensus_prompt, format_prompt, output_parser, ticketUri, ticket, reruns=0, evalMode=False):
    now = datetime.now()
    date = now.strftime("%Y-%m-%d")
    time = now.strftime("%H:%M:%S")

    result = {}
    result['model'] = model
    result['creation_timestamp'] = date + ' ' + time
    result['prompts'] = {
        'consensus_uri': consensus_prompt,
        'formatter_uri': format_prompt}
    result['output_parser'] = output_parser
    result['input_data'] = {
        'ticket_uri': ticketUri,
        'jira': ticket['Jira'],
        'id': ticket['IssueId'],
        'evolution': ticket['EvoId']}
    result['reruns'] = reruns
    result['output'] = output

    output_name = 'output_'+date+'_'+time+'.json'

    if evalMode:
        with open('evaluation/' + prompt_type + '/' + model + '/' + algorithm + "/" + output_name, 'w') as f:
            json.dump(result, f, indent=4)
        print('Evaluation mode: result saved in evaluation folder')
    else:
        with open('results/' + prompt_type + '/' + output_name, 'w') as f:
            json.dump(result, f, indent=4)         

In [None]:
MODELNAME = 'gpt-4-0125-preview'

model = ChatOpenAI(
    model=MODELNAME,
    api_key=open('api.txt', 'r').read(),
    temperature=0
)

INITPROMPT = 'initPrompt_V2.1.0.json'

## Summary Length

In [None]:
async def runDetectionOfSummaryLength(prompt_type, summary_prompt, format_prompt, ticket_uri, ticket, reruns= 0, evalMode=False):
    ### Set variables ###
    minChars = 39
    maxChars = 70

    init_prompt = load_prompt('prompts/init/' + INITPROMPT)
    bestPractice_prompt = load_prompt('prompts/summary/' + summary_prompt).format(min=minChars, max=maxChars, ticket=ticket)
    format_prompt = load_prompt('prompts/summary/' + format_prompt)

    ### Create multi chain ###
    chain1 = init_prompt | model | StrOutputParser()
    chain2 = (
        {"revised_ticket": chain1}
        | format_prompt
        | model
        | JsonOutputParser()
    )

    ### Run chains ###
    output = await chain2.ainvoke({"role":"Software Engineer", "best_practice":bestPractice_prompt})

    ### Save result ###
    save_result(prompt_type, output, 'summary', MODELNAME, INITPROMPT, summary_prompt, format_prompt, 'JsonOutputParser', ticket_uri, ticket, reruns, evalMode)


async def runAnswerComparison(ticket, ticket_uri, consensus_temp, format_temp, answers, run, evalMode=False):
    ### Set variables ###
    minChars = 39
    maxChars = 70

    consensus_prompt = load_prompt('prompts/summary/' + consensus_temp)
    consensus_format_prompt = load_prompt('prompts/summary/' + format_temp)

    ### Create multi chain ###
    chain1 = consensus_prompt | model | StrOutputParser()
    chain2 = (
        {"answer": chain1}
        | consensus_format_prompt
        | model
        | JsonOutputParser()
    )

    ### Run chain ###
    output = await chain2.ainvoke({"ticket":ticket, "role":"Software Engineer", "min":minChars, "max":maxChars, "answers":answers})
    
    ### Save result ###
    saveSelection('Consensus', output, 'summary', MODELNAME, consensus_temp, format_temp, 'JsonOutputParser', ticket_uri, ticket, run, evalMode)

#### 0-Shot

In [None]:
summaryPrompt = 'summaryLengthPrompt_V3.2.0.json'
formatPrompt = 'summaryLengthFormatPrompt_V1.5.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/summary/summaryDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/summary/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runDetectionOfSummaryLength('0Shot', summaryPrompt, formatPrompt, ticketUri, ticket, 0, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### Few-Shot

In [None]:
summaryPrompt = 'summaryLengthPrompt_FewShot_V2.3.0.json'
formatPrompt = 'summaryLengthFormatPrompt_V1.5.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/summary/summaryDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/summary/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runDetectionOfSummaryLength('FewShot', summaryPrompt, formatPrompt, ticketUri, ticket, 0, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### 0-Shot CoT

In [None]:
summaryPrompt = 'summaryLengthPrompt_0ShotCoT_V2.1.0.json'
formatPrompt = 'summaryLengthFormatPrompt_V1.5.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/summary/summaryDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/summary/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runDetectionOfSummaryLength('0ShotCoT', summaryPrompt, formatPrompt, ticketUri, ticket, 0, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### Few-Shot CoT

In [None]:
summaryPrompt = 'summaryLengthPrompt_CoT_V1.2.0.json'
formatPrompt = 'summaryLengthFormatPrompt_V1.5.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/summary/summaryDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/summary/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runDetectionOfSummaryLength('FewShotCoT', summaryPrompt, formatPrompt, ticketUri, ticket, 0, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### Consensus

In [None]:
consensusPrompt = 'summaryLengthConsensusPrompt_V1.0.0.json'
consensusFormatPrompt = 'summaryLengthConsensusFormatPrompt_V1.2.0.json'

In [None]:
### preparing dataset for consensus task ###
consensusDataset = pd.read_csv('./evaluation/summary/gpt-4-0125-preview/totalSmells_1Run.csv')

reduced_df = consensusDataset[['jira', 'ticketId', 'ticket_uri', 'reruns']].drop_duplicates().sort_values(by=['jira'])

### Set variables ###
amount = reduced_df.shape[0]
num = 1
errorCounter = 0
run = 0

### Run consensus task ###
for index, row in reduced_df.iterrows():
    ticketUri = row['ticket_uri']
    run = row['reruns']
    sample = consensusDataset[((consensusDataset['jira'] == row['jira']) & (consensusDataset['ticketId'] == row['ticketId']))]
    answers = prepareConsensusTask(sample)
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runAnswerComparison(ticket, ticketUri, consensusPrompt, consensusFormatPrompt, answers, run, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

## Bug Report Structure

In [None]:
formatPrompt = 'bugReportStructureFormatPrompt_V2.2.1.json'

In [None]:
async def runBugReportDescriptionStructuring(prompt_type, bugreport_prompt, formatter_prompt, ticket_uri, ticket, reruns= 0, evalMode=False):
    ### Set variables ###
    init_prompt = load_prompt('prompts/init/' + INITPROMPT)
    bestPractice_prompt = load_prompt('prompts/bugReportStructure/' + bugreport_prompt).format(bug_report=ticket)
    formatter = load_prompt('prompts/bugReportStructure/' + formatter_prompt)

    ### Create multi chain ###
    chain1 = init_prompt | model | StrOutputParser()
    chain2 = (
        {"revised_bug_report": chain1}
        | formatter
        | model
        | JsonOutputParser()
    )

    ### Run chains ###
    output = await chain2.ainvoke({"role":"Software Engineer", "best_practice":bestPractice_prompt})

    ### Save result ###
    save_result(prompt_type, output, 'bugreportStructure', MODELNAME, INITPROMPT, bugreport_prompt, formatter_prompt, 'JsonOutputParser', ticket_uri, ticket, reruns, evalMode)

async def runFewShotBugReportDescriptionStructuring(prompt_type, bugreport_prompt, formatter_prompt, example_prompt, ticket_uri, ticket, reruns= 0, evalMode=False):
    ### Set variables ###
    init_prompt = load_prompt('prompts/init/' + INITPROMPT)
    examples_prompt = load_prompt('prompts/bugReportStructure/' + example_prompt).format(bug_report=ticket)
    bestPractice_prompt = load_prompt('prompts/bugReportStructure/' + bugreport_prompt).format(examples=examples_prompt)
    formatter = load_prompt('prompts/bugReportStructure/' + formatter_prompt)

    prompts = bugreport_prompt + " ; " + example_prompt

    ### Create multi chain ###
    chain1 = init_prompt | model | StrOutputParser()
    chain2 = (
        {"revised_bug_report": chain1}
        | formatter
        | model
        | JsonOutputParser()
    )

    ### Run chains ###
    output = await chain2.ainvoke({"role":"Software Engineer", "best_practice":bestPractice_prompt})

    ### Save result ###
    save_result(prompt_type, output, 'bugreportStructure', MODELNAME, INITPROMPT, prompts, formatter_prompt, 'JsonOutputParser', ticket_uri, ticket, reruns, evalMode)

#### 0-Shot

In [None]:
bugReportPrompt = 'bugReportStructurePrompt_V2.1.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/bugreportStructure/bugreportStructureDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

In [None]:
for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/bugreportStructure/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runBugReportDescriptionStructuring('0Shot', bugReportPrompt, formatPrompt, ticketUri, ticket, 1, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### 0-Shot CoT

In [None]:
bugReportPrompt = 'bugReportStructurePrompt_0ShotCoT_V1.0.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/bugreportStructure/bugreportStructureDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

In [None]:
for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/bugreportStructure/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runBugReportDescriptionStructuring('0ShotCoT', bugReportPrompt, formatPrompt, ticketUri, ticket, 1, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### Few-Shot

In [None]:
bugReportPrompt = 'bugReportStructurePrompt_FewShot_V2.9.0.json'
examplePrompt = 'bugReportStructurePrompt_FewShot_Examples_V1.2.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/bugreportStructure/bugreportStructureDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
errorCounter = 0

In [None]:
for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/bugreportStructure/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runFewShotBugReportDescriptionStructuring('FewShot', bugReportPrompt, formatPrompt, examplePrompt, ticketUri, ticket, 1, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

#### Few-Shot CoT

In [None]:
bugReportPrompt = 'bugReportStructurePrompt_FewShotCoT_V1.1.0.json'
examplePrompt = 'bugReportStructurePrompt_FewShotCoT_Examples_V1.1.0.json'

In [None]:
annotatedDataset = pd.read_csv('data/bugreportStructure/bugreportStructureDataset.csv')

cut = annotatedDataset[annotatedDataset['Jira'] == 'Spring']

amount = cut.shape[0]
num = 1
errorCounter = 0

In [None]:
for index, row in cut.iterrows():
    ticketUri = "./data/bugreportStructure/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(errorCounter) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runFewShotBugReportDescriptionStructuring('FewShotCoT', bugReportPrompt, formatPrompt, examplePrompt, ticketUri, ticket, 0, True)
        print("     ")
    except:
        print('Error with ticket: ' + ticketUri)
        errorCounter += 1
        continue
    finally:
        num += 1

## Arbitrary Structure

In [None]:
def runDetectionOfArbitraryStructure(init_prompt_name, bestPractice_prompt_name, format_prompt_name, ticketUri, ticket, issue_type, desired_structure, structure_desc, reruns= 0, evalMode=False):
    ### Set Prompts ### 
    init_prompt = load_prompt('prompts/init/'+init_prompt_name)
    bestPractice_prompt = load_prompt('prompts/arbitraryStructure/'+bestPractice_prompt_name).format(issue_type=issue_type, structure_desc=structure_desc, structure=desired_structure)
    format_prompt = load_prompt('prompts/arbitraryStructure/'+format_prompt_name)

    ### Create multi chain ###
    chain1 = init_prompt | model | StrOutputParser()
    chain2 = (
        {"revised_ticket": chain1}
        | format_prompt
        | model
        | JsonOutputParser()
    )

    ### Run chains ###
    output = chain2.invoke({"role":"Software Engineer", "best_practice":bestPractice_prompt, "ticket":ticket})

    ### Save result ###
    save_result(output, 'summary', MODELNAME, init_prompt_name, bestPractice_prompt_name, format_prompt_name, 'JsonOutputParser', ticketUri, ticket, reruns, evalMode)

In [None]:
issue_type = "User Story"
desired_structure = "As a <Role>, I want/want to/need/can/would <Task>, so that <Goal>"
structure_desc = """Role: abstract behavior of actors in the system context; describes who uses the system.
Task: specific things that must be done to achieve goals; solution or function of the problem.
Goal: a condition or a circumstance desired by stakeholders or actors; describes the problem domain or the impact of solving the problem."""
# structure_desc = """Role: abstract behavior of actors in the system context; describes who uses the system.
# Task: specific things that must be done to achieve goals; solution or function of the problem.
# Goal: a condition or a circumstance desired by stakeholders or actors; describes the problem domain or the impact of solving the problem.
# Capability: the ability of actors to achieve goals based on certain conditions and events."""

In [None]:
annotatedDataset = pd.read_csv('data/arbitraryStructure/arbitraryStructureDataset.csv')

for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/arbitraryStructure/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"

    with open(ticketUri) as f:
        json_sample = json.load(f)
    
    try:
        runDetectionOfArbitraryStructure('initPrompt_V1.3.0.json', 'arbitraryStructurePrompt_V1.0.0.json', 'arbitraryStructureFormatPrompt_V1.0.0.json', ticketUri, json_sample, issue_type, desired_structure, structure_desc)
    except:
        print("Error with ticket: " + ticketUri)



## Field Update

In [None]:
UPDATEPROMPT = 'updatePrompt_V1.26.0.json'
UPDATEFORMATPROMPT = 'updateFormatPrompt_V1.11.0.json'

In [None]:
async def runDetectionOfOutdatedFields(ticket, ticketUri, reruns= 0, evalMode=False):
    ### Set variables ###
    init_prompt = load_prompt('prompts/init/' + INITPROMPT)
    bestPractice_prompt = load_prompt('prompts/update/' + UPDATEPROMPT).format()
    format_prompt = load_prompt('prompts/update/' + UPDATEFORMATPROMPT)

    ### Create multi chain ###
    chain1 = init_prompt | model | StrOutputParser()
    chain2 = (
        {"revised_ticket": chain1}
        | format_prompt
        | model
        | JsonOutputParser()
    )

    ### Run chains ###
    output = await chain2.ainvoke({"role":"Software Engineer", "best_practice":bestPractice_prompt, "ticket":ticket})

    # ### Save result ###
    save_result(output, 'update', MODELNAME, INITPROMPT, UPDATEPROMPT, UPDATEFORMATPROMPT, 'JsonOutputParser', ticketUri, ticket, reruns, evalMode)

In [None]:
annotatedDataset = pd.read_csv('data/update/updateDataset.csv')

amount = annotatedDataset.shape[0]
num = 1
for index, row in annotatedDataset.iterrows():
    ticketUri = "./data/update/" + row['Jira'] + "_" + str(row['IssueId']) + "_" + str(row['EvoId']) + ".json"
    
    with open(ticketUri) as f:
        ticket = json.load(f)
    try:
        print("######## (" + str(num) + "/" + str(amount) + ") Running ticket: " + ticketUri + " ########")
        print("     ")
        await runDetectionOfOutdatedFields(ticket, ticketUri, 1, True)
        print("     ")
    except Exception as e:
        print('Error with ticket: ' + ticketUri)
        print(e)
        continue
    finally:
        num += 1

## Toxic Speech Detection

In [None]:
sample_name = "last_row_toxic.json"
with open('./data/json/last_row/'+sample_name) as f:
    json_sample = json.load(f)

In [None]:
init_prompt_name = 'initPrompt_V1.3.0.json'
init_prompt = load_prompt('prompts/init/'+init_prompt_name)

bestPractice_prompt_name = 'toxicSpeechPrompt_V1.5.0.json'
bestPractice_prompt = load_prompt('prompts/toxicSpeech/'+bestPractice_prompt_name).format()

format_prompt_name = 'toxicSpeechFormatPrompt_V1.2.0.json'
format_prompt = load_prompt('prompts/toxicSpeech/'+format_prompt_name)

In [None]:
chain1 = init_prompt | model | StrOutputParser()
chain2 = (
    {"revised_ticket": chain1}
    | format_prompt
    | model
    | JsonOutputParser()
)

In [None]:
output = await chain2.ainvoke({"role":"Content Moderator", "best_practice":bestPractice_prompt, "ticket":json_sample})
output

In [None]:
save_result(output, 'toxicSpeech', MODELNAME, init_prompt_name, bestPractice_prompt_name, format_prompt_name, 'JsonOutputParser', sample_name)

## Internationalization

In [None]:
sample_name = "last_row_german.json"
with open('./data/json/last_row/'+sample_name) as f:
    json_sample = json.load(f)

In [None]:
init_prompt_name = 'initPrompt_V1.3.0.json'
init_prompt = load_prompt('prompts/init/'+init_prompt_name)

bestPractice_prompt_name = 'internationalizationPrompt_V1.5.0.json'
bestPractice_prompt = load_prompt('prompts/internationalization/'+bestPractice_prompt_name).format()

format_prompt_name = 'internationalizationFormatPrompt_V1.2.0.json'
format_prompt = load_prompt('prompts/internationalization/'+format_prompt_name)

In [None]:
chain1 = init_prompt | model | StrOutputParser()

chain2 = (
    {"revised_ticket":chain1}
    | format_prompt
    | model
    | JsonOutputParser()
    )

In [None]:
output = await chain2.ainvoke({"role":"Software Engineer", "best_practice":bestPractice_prompt, "ticket":json_sample})
output

In [None]:
save_result(output, 'internationalization', MODELNAME, init_prompt_name, bestPractice_prompt_name, format_prompt_name, 'JsonOutputParser', sample_name)