In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

import json
import os
from datetime import datetime
from pprint import pprint

import database
import helper
import utils

In [2]:
LOG = utils.CustomLogger("CustomLogger", log_level= "info", display_loglevel= False, display_datetime= False)
PICKLE_LIB = utils.PickleLib(data_path="./data", logger= LOG)
JIRA = "Jira" 

# Data Search

In [3]:
client = database.connect()
db = client.JiraRepos
collection = db[JIRA]

#### Summary

In [None]:
def getInRange():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$lt": [{"$strLenCP": "$fields.summary"}, 70]},
                        {"$gt": [{"$strLenCP": "$fields.summary"}, 39]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 5
            }
        }
    ])
    return results

def getShorter():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$lt": [{"$strLenCP": "$fields.summary"}, 39]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 5
            }
        }
    ])
    return results

def getLonger():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$gt": [{"$strLenCP": "$fields.summary"}, 70]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 10
            }
        }
    ])
    return results

In [None]:
summary = []

# results = getInRange()
# results = getShorter()
results = getLonger()

for document in results:
    summary.append(document)
    print("Id: " + str(document['id']) + " Length: " + str(len(document['fields']['summary'])) + ": " + document['fields']['summary'])

#### Description Structure

In [4]:
def findKeysByCode(data, target_code):
    matching_keys = []
    for key, value in data.items():
        if 'code' in value and value['code'] == target_code:
            matching_keys.append(key)
    return matching_keys

In [5]:
with open('issueTypeMapping.json') as f:
    mappedIssueTypes = json.load(f)

In [6]:
mappedTypes = findKeysByCode(mappedIssueTypes[JIRA], 'Bug Report')
mappedTypes

['Bug', 'Public Security Vulnerability']

In [8]:
tickets = []
results = collection.aggregate([
    {
        "$match": {
            "fields.description": {
                "$exists": True, 
                "$type": "string",
            },
            "fields.issuetype.name": {
                "$in": mappedTypes
            }
        }
    },
    {
        "$sample": {
            "size": 30
        }
    }
])

for document in results:
    tickets.append(document)
    print("Id", document['id'])


Id 65965
Id 582506
Id 165721
Id 63755
Id 635687
Id 1535408
Id 269371
Id 255582
Id 277164
Id 236098
Id 1145690
Id 836565
Id 1672680
Id 98031
Id 582289
Id 447359
Id 24331
Id 801611
Id 1390634
Id 320748
Id 15118
Id 234599
Id 1376759
Id 1777804
Id 190571
Id 334252
Id 129731
Id 389414
Id 1385944
Id 697384


# Ticket Creation

In [9]:
### Globals ###
LOG.reset()
FOLDERNAME = "examples"

In [10]:
evo_df = PICKLE_LIB.pickle_load("./jiraEvolutions/load_evolution_dataframe(jiras=[_"+JIRA+"_])", 'gzip')
# evo_df = PICKLE_LIB.pickle_load("./jiraEvolutions/load_evolution_dataframe(sample_data_n=10000)", 'gzip')

[Start] 🥒 Loading data from Pickle: "./jiraEvolutions/load_evolution_dataframe(jiras=[_Jira_]).pgzip"


	 Data: 100%|#####################################################| 956M/956M [00:09<00:00, 105MB/s]

[ End ] Duration: 00:00:10.9274





In [18]:
# tickets = evo_df[evo_df["jira"]== JIRA]
# bugReports = tickets[tickets["data_to"].isin(mappedTypes)]
# sample_id = bugReports['issue_id'].values[3]
sample_id = tickets[7]['id']
samples = evo_df[evo_df["issue_id"] == sample_id]
sample = samples[samples["jira"]== JIRA]
sample

Unnamed: 0,jira,issue_id,history_order,field,field_evo_order,field_evo_first,field_evo_last,data_from,data_to,history_author,...,last_creator,last_reporter,last_assignee,last_commenter,last_evolver,prev_creators,prev_reporters,prev_assignees,prev_commenters,prev_evolvers
231626,Jira,255582,0,Summary,0,True,True,,Right-clicking an issue in plan mode does not work as described,Jeison,...,,,,,,[],[],[],[],[]
231627,Jira,255582,0,Description,0,True,True,,"According to this documentation: https://confluence.atlassian.com/display/GH/Ranking+an+Issue#RankinganIssue-Tipsandnotes:, right-clicking an issue in plan mode should make a menu with a few actions to pop up, but only a default browser menu is shown, no matter which browser/OS.",Jeison,...,,,,,,[],[],[],[],[]
231628,Jira,255582,0,Labels,0,True,False,,Triaged,Jeison,...,,,,,,[],[],[],[],[]
231629,Jira,255582,0,IssueType,0,True,True,,Bug,Jeison,...,,,,,,[],[],[],[],[]
231630,Jira,255582,0,Project,0,True,True,,Jira Software Server and Data Center,Jeison,...,,,,,,[],[],[],[],[]
231631,Jira,255582,0,CreatedDate,0,True,True,,2013-02-01T19:07:55.000+0000,Jeison,...,,,,,,[],[],[],[],[]
231632,Jira,255582,0,ResolvedDate,0,True,True,,2013-02-10T22:59:17.000+0000,Jeison,...,,,,,,[],[],[],[],[]
231633,Jira,255582,0,Status,0,True,False,,Open,Jeison,...,,,,,,[],[],[],[],[]
231634,Jira,255582,0,Priority,0,True,True,,Low,Jeison,...,,,,,,[],[],[],[],[]
231635,Jira,255582,0,Creator,0,True,True,,Jeison,Jeison,...,,,,,,[],[],[],[],[]


In [19]:
evolutionStep = 0
ticket = helper.createTicket(sample, evolutionStep)
ticket

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row.loc[:, 'Comments'] = [comment_history]


Unnamed: 0,Jira,IssueId,EvoId,Summary,Description,VersionsAffected,IssueType,Project,Components,CreatedDate,...,Priority,Creator,Reporter,Resolution,IssueLinks,Labels,VersionsFixed,Assignee,TimeSpent,Comments
0,Jira,255582,0,Right-clicking an issue in plan mode does not work as described,"According to this documentation: https://confluence.atlassian.com/display/GH/Ranking+an+Issue#RankinganIssue-Tipsandnotes:, right-clicking an issue in plan mode should make a menu with a few actions to pop up, but only a default browser menu is shown, no matter which browser/OS.",,Bug,Jira Software Server and Data Center,,2013-02-01T19:07:55.000+0000,...,Low,Jeison,Jeison,,,Triaged,,,,"Empty DataFrame Columns: [Author, Created, Comment] Index: []"


In [20]:
print(ticket['Description'].values[0])

According to this documentation: https://confluence.atlassian.com/display/GH/Ranking+an+Issue#RankinganIssue-Tipsandnotes:, right-clicking an issue in plan mode should make a menu with a few actions to pop up, but only a default browser menu is shown, no matter which browser/OS.


# Ticket PreProcessing

In [21]:
def preprocessTickets(ticket):
    
    ### Convert CreatedDate and ResolvedDate to datetime
    c_date = ticket['CreatedDate'].values[0]
    c_dt_obj = datetime.strptime(c_date, '%Y-%m-%dT%H:%M:%S.%f%z')
    ticket['CreatedDate'] = c_dt_obj.strftime('%Y-%m-%d %H:%M:%S')
    r_date = ticket['ResolvedDate'].values[0]
    r_dt_obj = datetime.strptime(r_date, '%Y-%m-%dT%H:%M:%S.%f%z')
    ticket['ResolvedDate'] = r_dt_obj.strftime('%Y-%m-%d %H:%M:%S')

    ### Convert IssueId to Int
    id = ticket['IssueId'].values[0]
    ticket['IssueId'] = int(id)

preprocessTickets(ticket)

In [22]:
helper.saveTicket(FOLDERNAME, ticket, evolutionStep, JIRA, sample_id)

The JSON was successfully saved!


# Annotate Ticket

#### Update

In [None]:
def annotateTicket(ticket, annotation, reason):
    try:
        ticket['ViolationActual'] = annotation
        ticket['ViolationReason'] = reason
        print("Annotation successful.")
    except:
        print("Annotation failed.")

annotateTicket(ticket, "TRUE", """1. Resolution has to be set from 'None' to 'Low Priority'.
                           2. Status has to be set from 'Open' to 'Resolved'.""")

In [None]:
if os.path.isfile("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv"):
    dataset = pd.read_csv("./data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv")
    print("The dataset already exists.")
else:
    dataset = pd.DataFrame(columns=['Jira', 'IssueId', 'EvoId', 'Summary', 'Description', 'VersionsAffected', 'IssueType', 'Project', 'Components', 'CreatedDate', 'ResolvedDate', 'Status', 'Priority', 'Creator', 'Reporter', 'Resolution', 'IssueLinks', 'Labels','VersionsFixed', 'Assignee', 'TimeSpent', 'Comments', 'ViolationActual', 'ViolationReason', 'FieldCount', 'Fields'])
    print("The dataset was created successfully.")

dataset = pd.concat([dataset, ticket], ignore_index=True)

dataset.to_csv("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv", index=False)
print("The ticket was inserted into the dataset successfully!")


In [None]:
dataset = pd.read_csv('data/' + FOLDERNAME + '/' + FOLDERNAME + 'Dataset.csv')
dataset

#### Bug Report Structure

In [None]:
def annotateTicket(ticket, annotation, reason):
    try:
        ticket['SmellActual'] = annotation
        ticket['SmellReason'] = reason
        print("Annotation successful.")
    except:
        print("Annotation failed.")

#annotateTicket(ticket, "3", """No previous structure. The model has to generate a new structure from scratch.""")
annotateTicket(ticket, "2", """The structure contains some parts, but is not "complete".""")
#annotateTicket(ticket, "1", """The structure contains all important parts or was only been slightly modified.""")

In [None]:
if os.path.isfile("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv"):
    dataset = pd.read_csv("./data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv")
    print("The dataset already exists.")
else:
    dataset = pd.DataFrame(columns=['Jira', 'IssueId', 'EvoId', 'Summary', 'Description', 'VersionsAffected', 'IssueType', 'Project', 'Components', 'CreatedDate', 'ResolvedDate', 'Status', 'Priority', 'Creator', 'Reporter', 'Resolution', 'IssueLinks', 'Labels','VersionsFixed', 'Assignee', 'TimeSpent', 'Comments', 'ViolationActual', 'ViolationReason', 'FieldCount', 'Fields'])
    print("The dataset was created successfully.")

dataset = pd.concat([dataset, ticket], ignore_index=True)

dataset.to_csv("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv", index=False)
print("The ticket was inserted into the dataset successfully!")

In [None]:
dataset = pd.read_csv('data/' + FOLDERNAME + '/' + FOLDERNAME + 'Dataset.csv')
dataset