In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

import json
import os
from datetime import datetime
from pprint import pprint

import database
import helper
import utils

In [3]:
LOG = utils.CustomLogger("CustomLogger", log_level= "info", display_loglevel= False, display_datetime= False)
PICKLE_LIB = utils.PickleLib(data_path="./data", logger= LOG)
JIRA = "JiraEcosystem" 

# Data Search

In [None]:
client = database.connect()
db = client.JiraRepos
collection = db[JIRA]

#### Summary

In [None]:
def getInRange():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$lt": [{"$strLenCP": "$fields.summary"}, 70]},
                        {"$gt": [{"$strLenCP": "$fields.summary"}, 39]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 5
            }
        }
    ])
    return results

def getShorter():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$lt": [{"$strLenCP": "$fields.summary"}, 39]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 5
            }
        }
    ])
    return results

def getLonger():
    results = collection.aggregate([
        {
            "$match": {
                "fields.summary": {
                    "$exists": True, 
                    "$type": "string"
                },
                "$expr": {
                    "$and": [
                        {"$gt": [{"$strLenCP": "$fields.summary"}, 70]}
                    ]
                }
            }
        },
        {
            "$sample": {
                "size": 10
            }
        }
    ])
    return results

In [None]:
summary = []

# results = getInRange()
# results = getShorter()
results = getLonger()

for document in results:
    summary.append(document)
    print("Id: " + str(document['id']) + " Length: " + str(len(document['fields']['summary'])) + ": " + document['fields']['summary'])

#### Description Structure

In [None]:
def findKeysByCode(data, target_code):
    matching_keys = []
    for key, value in data.items():
        if 'code' in value and value['code'] == target_code:
            matching_keys.append(key)
    return matching_keys

In [None]:
with open('issueTypeMapping.json') as f:
    mappedIssueTypes = json.load(f)

In [None]:
mappedTypes = findKeysByCode(mappedIssueTypes[JIRA], 'Bug Report')
mappedTypes

In [None]:
tickets = []
results = collection.aggregate([
    {
        "$match": {
            "fields.description": {
                "$exists": True, 
                "$type": "string",
            },
            "fields.issuetype.name": {
                "$in": mappedTypes
            }
        }
    },
    {
        "$sample": {
            "size": 30
        }
    }
])

for document in results:
    tickets.append(document)
    print("Id", document['id'])


# Ticket Creation

In [4]:
### Globals ###
LOG.reset()
FOLDERNAME = "examples"

In [5]:
evo_df = PICKLE_LIB.pickle_load("./jiraEvolutions/load_evolution_dataframe(jiras=[_"+JIRA+"_])", 'gzip')
# evo_df = PICKLE_LIB.pickle_load("./jiraEvolutions/load_evolution_dataframe(sample_data_n=10000)", 'gzip')

[Start] 🥒 Loading data from Pickle: "./jiraEvolutions/load_evolution_dataframe(jiras=[_JiraEcosystem_]).pgzip"


	 Data: 100%|#####################################################| 163M/163M [00:01<00:00, 113MB/s]

[ End ] Duration: 00:00:01.7841





In [7]:
# tickets = evo_df[evo_df["jira"]== JIRA]
# bugReports = tickets[tickets["data_to"].isin(mappedTypes)]
# sample_id = bugReports['issue_id'].values[3]
# sample_id = tickets[7]['id']
sample_id = "180305"
samples = evo_df[evo_df["issue_id"] == sample_id]
sample = samples[samples["jira"]== JIRA]
sample

Unnamed: 0,jira,issue_id,history_order,field,field_evo_order,field_evo_first,field_evo_last,data_from,data_to,history_author,...,last_creator,last_reporter,last_assignee,last_commenter,last_evolver,prev_creators,prev_reporters,prev_assignees,prev_commenters,prev_evolvers
324470,JiraEcosystem,180305,0,Summary,0,True,True,,Uninstalling a theme add-on does not reset the color scheme,RichardS,...,,,,,,[],[],[],[],[]
324471,JiraEcosystem,180305,0,Description,0,True,True,,As a customer I would like the color scheme selection to revert to the default color scheme if I uninstall the currently activated global theme add-on.\r\n\r\nSteps to reproduce:\r\n\r\n1. Install a theme add-on\r\n2. Select the theme globally\r\n3. Uninstall the add-on\r\n\r\nExpected results:\r\nAll spaces are back to the look they had previously\r\n\r\nActual results:\r\nTheme's default color scheme is still applied and color scheme settings page looks like this\r\n\r\n !Screen Shot 2016-11-14 at 16.51.59.png|thumbnail!,RichardS,...,,,,,,[],[],[],[],[]
324472,JiraEcosystem,180305,0,Labels,0,True,False,,refinedwiki,RichardS,...,,,,,,[],[],[],[],[]
324473,JiraEcosystem,180305,0,IssueType,0,True,True,,Bug,RichardS,...,,,,,,[],[],[],[],[]
324474,JiraEcosystem,180305,0,Project,0,True,True,,Confluence Ecosystem (Moved: go.atlassian.com/ce-move),RichardS,...,,,,,,[],[],[],[],[]
324475,JiraEcosystem,180305,0,CreatedDate,0,True,True,,2016-11-14T09:57:24.511-0600,RichardS,...,,,,,,[],[],[],[],[]
324476,JiraEcosystem,180305,0,ResolvedDate,0,True,True,,2017-03-19T23:51:05.921-0500,RichardS,...,,,,,,[],[],[],[],[]
324477,JiraEcosystem,180305,0,Status,0,True,False,,Triage,RichardS,...,,,,,,[],[],[],[],[]
324478,JiraEcosystem,180305,0,Priority,0,True,True,,Major,RichardS,...,,,,,,[],[],[],[],[]
324479,JiraEcosystem,180305,0,Creator,0,True,True,,RichardS,RichardS,...,,,,,,[],[],[],[],[]


In [None]:
evolutionStep = 0
ticket = helper.createTicket(sample, evolutionStep)
ticket

In [None]:
print(ticket['Description'].values[0])

# Ticket PreProcessing

In [None]:
def preprocessTickets(ticket):
    
    ### Convert CreatedDate and ResolvedDate to datetime
    c_date = ticket['CreatedDate'].values[0]
    c_dt_obj = datetime.strptime(c_date, '%Y-%m-%dT%H:%M:%S.%f%z')
    ticket['CreatedDate'] = c_dt_obj.strftime('%Y-%m-%d %H:%M:%S')
    r_date = ticket['ResolvedDate'].values[0]
    r_dt_obj = datetime.strptime(r_date, '%Y-%m-%dT%H:%M:%S.%f%z')
    ticket['ResolvedDate'] = r_dt_obj.strftime('%Y-%m-%d %H:%M:%S')

    ### Convert IssueId to Int
    id = ticket['IssueId'].values[0]
    ticket['IssueId'] = int(id)

preprocessTickets(ticket)

In [None]:
helper.saveTicket(FOLDERNAME, ticket, evolutionStep, JIRA, sample_id)

# Annotate Ticket

#### Update

In [None]:
def annotateTicket(ticket, annotation, reason):
    try:
        ticket['ViolationActual'] = annotation
        ticket['ViolationReason'] = reason
        print("Annotation successful.")
    except:
        print("Annotation failed.")

annotateTicket(ticket, "TRUE", """1. Resolution has to be set from 'None' to 'Low Priority'.
                           2. Status has to be set from 'Open' to 'Resolved'.""")

In [None]:
if os.path.isfile("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv"):
    dataset = pd.read_csv("./data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv")
    print("The dataset already exists.")
else:
    dataset = pd.DataFrame(columns=['Jira', 'IssueId', 'EvoId', 'Summary', 'Description', 'VersionsAffected', 'IssueType', 'Project', 'Components', 'CreatedDate', 'ResolvedDate', 'Status', 'Priority', 'Creator', 'Reporter', 'Resolution', 'IssueLinks', 'Labels','VersionsFixed', 'Assignee', 'TimeSpent', 'Comments', 'ViolationActual', 'ViolationReason', 'FieldCount', 'Fields'])
    print("The dataset was created successfully.")

dataset = pd.concat([dataset, ticket], ignore_index=True)

dataset.to_csv("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv", index=False)
print("The ticket was inserted into the dataset successfully!")


In [None]:
dataset = pd.read_csv('data/' + FOLDERNAME + '/' + FOLDERNAME + 'Dataset.csv')
dataset

#### Bug Report Structure

In [None]:
def annotateTicket(ticket, annotation, reason):
    try:
        ticket['SmellActual'] = annotation
        ticket['SmellReason'] = reason
        print("Annotation successful.")
    except:
        print("Annotation failed.")

#annotateTicket(ticket, "3", """No previous structure. The model has to generate a new structure from scratch.""")
annotateTicket(ticket, "2", """The structure contains some parts, but is not "complete".""")
#annotateTicket(ticket, "1", """The structure contains all important parts or was only been slightly modified.""")

In [None]:
if os.path.isfile("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv"):
    dataset = pd.read_csv("./data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv")
    print("The dataset already exists.")
else:
    dataset = pd.DataFrame(columns=['Jira', 'IssueId', 'EvoId', 'Summary', 'Description', 'VersionsAffected', 'IssueType', 'Project', 'Components', 'CreatedDate', 'ResolvedDate', 'Status', 'Priority', 'Creator', 'Reporter', 'Resolution', 'IssueLinks', 'Labels','VersionsFixed', 'Assignee', 'TimeSpent', 'Comments', 'ViolationActual', 'ViolationReason', 'FieldCount', 'Fields'])
    print("The dataset was created successfully.")

dataset = pd.concat([dataset, ticket], ignore_index=True)

dataset.to_csv("data/" + FOLDERNAME + "/" + FOLDERNAME + "Dataset.csv", index=False)
print("The ticket was inserted into the dataset successfully!")

In [None]:
dataset = pd.read_csv('data/' + FOLDERNAME + '/' + FOLDERNAME + 'Dataset.csv')
dataset