In [55]:
import pandas as pd
import pickle
import json
import pprint
pp = pprint.PrettyPrinter(depth=6)
from jira import JIRA

In [25]:
jira = JIRA('https://jira.sonarsource.com/')

In [27]:
issue_fields = ['issuetype',
 'customfield_12130',
 'customfield_11041',
 'customfield_12132',
 'customfield_12131',
 'project',
 'customfield_12134',
 'customfield_12530',
 'fixVersions',
 'customfield_12133',
 'customfield_12136',
 'customfield_12532',
 'resolution',
 'customfield_10431',
 'customfield_12135',
 'customfield_12531',
 'customfield_12138',
 'customfield_10432',
 'customfield_12534',
 'customfield_12137',
 'customfield_10433',
 'customfield_12533',
 'customfield_11832',
 'customfield_11831',
 'customfield_11833',
 'resolutiondate',
 'workratio',
 'lastViewed',
 'watches',
 'created',
 'customfield_11032',
 'customfield_11033',
 'priority',
 'customfield_11630',
 'customfield_11233',
 'customfield_11036',
 'customfield_11830',
 'labels',
 'customfield_11631',
 'customfield_11038',
 'customfield_10930',
 'customfield_11347',
 'customfield_10931',
 'customfield_10932',
 'versions',
 'issuelinks',
 'assignee',
 'updated',
 'status',
 'components',
 'customfield_12031',
 'customfield_11140',
 'customfield_12030',
 'customfield_12033',
 'customfield_11141',
 'description',
 'customfield_12032',
 'customfield_12034',
 'customfield_12430',
 'customfield_11344',
 'customfield_11343',
 'customfield_10530',
 'customfield_11345',
 'customfield_10730',
 'customfield_11337',
 'customfield_11930',
 'customfield_11139',
 'customfield_11338',
 'summary',
 'creator',
 'subtasks',
 'customfield_11130',
 'customfield_11131',
 'reporter',
 'customfield_11132',
 'customfield_11133',
 'customfield_10243',
 'customfield_11335',
 'customfield_11334',
 'customfield_11730',
 'customfield_10434',
 'customfield_12536',
 'customfield_12535',
 'customfield_12139',
 'environment',
 'customfield_12538',
 'customfield_10437',
 'customfield_12537',
 'customfield_10438',
 'duedate',
 'votes',
 #'__module__',
 #'__dict__',
 #'__weakref__',
 #'__doc__'
       ]

#download all issues
size = 100
initial = 0
issue_tuples = []
all_issues = []
while True:
    start= initial*size
    issues = jira.search_issues('project=SONAR',  start,size)
    all_issues = all_issues + issues
    if len(issues) == 0:
        break
    initial += 1

#create tuples for dataframe creation
for issue in all_issues:
    values = []
    values.append(issue.key)
    values.append(issue.id)
    for field in issue_fields:
        values.append(getattr(issue.fields, field))
    issue_tuples.append(values)

In [63]:
issues_df = pd.DataFrame(issue_tuples, columns=(["issue_key", "issue_id"] + issue_fields))
issues_df

Unnamed: 0,issue_key,issue_id,issuetype,customfield_12130,customfield_11041,customfield_12132,customfield_12131,project,customfield_12134,customfield_12530,...,customfield_12536,customfield_12535,customfield_12139,environment,customfield_12538,customfield_10437,customfield_12537,customfield_10438,duedate,votes
0,SONAR-13913,97898,Task,P3-Medium,0|i0dkhb:,No root cause identified,,SONAR,Not captured yet,,...,New,,,,,,,[],,0
1,SONAR-13912,97868,Task,P3-Medium,0|i0dkav:,No root cause identified,,SONAR,Not captured yet,,...,New,,,,,,,[],,0
2,SONAR-13910,97864,Improvement,P3-Medium,0|i0dka7:,No root cause identified,,SONAR,Not captured yet,,...,New,,,,,,,[],2020-10-05,0
3,SONAR-13909,97858,New Feature,P3-Medium,0|i0dk8v:,No root cause identified,,SONAR,Not captured yet,,...,New,,,,,,,[],2020-10-05,0
4,SONAR-13907,97855,Improvement,P3-Medium,0|i0dk87:,No root cause identified,,SONAR,Not captured yet,,...,New,,,,,,,[],2020-10-05,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11502,SONAR-5,30985,New Feature,,0|i0387z:,,,SONAR,,,...,,,,,,,,[],,0
11503,SONAR-4,29449,Bug,,0|i02zzj:,,,SONAR,,,...,,,,,,,,[],,0
11504,SONAR-3,32314,New Feature,,0|i03fqn:,,,SONAR,,,...,,,,,,,,[],,0
11505,SONAR-2,29663,New Feature,,0|i0312n:,,,SONAR,,,...,,,,,,,,[],,0


In [64]:
def extract_features(df, field, fields_to_extract):
    for fte in fields_to_extract:
        df[f"{field}_{fte}"] = issues_df[field].apply(lambda x: getattr(x,fte) if x else None)
    return df

extract_features(issues_df, "issuetype", ["id", "name"])
extract_features(issues_df, "priority", ["id", "name"])
extract_features(issues_df, "assignee", ["key", "name"])
extract_features(issues_df, "reporter", ["key", "name"])
extract_features(issues_df, "creator", ["key", "name"])
extract_features(issues_df, "status", ["id", "name"])
extract_features(issues_df, "votes", ["votes"])

issues_df['created'] = pd.to_datetime(issues_df['created'], utc=True)
issues_df['resolutiondate'] = pd.to_datetime(issues_df['resolutiondate'], utc=True)
issues_df['updated'] = pd.to_datetime(issues_df['updated'], utc=True)
issues_df['duedate'] = pd.to_datetime(issues_df['duedate'], utc=True)

In [65]:
#issues_df.to_csv("issues.csv")
issues_df = pd.read_csv("issues.csv", index_col=0)
issues_df

In [82]:
def extract_versions(all_issues, version_field, prefix):
    values = []
    for issue in all_issues:
        for version in getattr(issue.fields, version_field):
            values.append((issue.id,version.name,version.id))
    return pd.DataFrame(values, columns=["issue_id", f"{prefix}_name", f"{prefix}_id"])

In [86]:
#issues_fixversions = extract_versions(all_issues, "fixVersions", "fixVersion")
#issues_fixversions.to_csv("issues_fixversions.csv")
issues_fixversions = pd.read_csv("issues_fixversions.csv", index_col=0)
issues_fixversions

Unnamed: 0,issue_id,fixVersion_name,fixVersion_id
0,97864,8.5,16342
1,97858,8.5,16342
2,97855,8.5,16342
3,97786,8.6,16435
4,97773,8.5,16342
...,...,...,...
8514,30985,1.1,11537
8515,29449,1.1,11537
8516,32314,1.1,11537
8517,29663,1.1,11537


In [87]:
#issues_versions = extract_versions(all_issues, "versions", "version")
#issues_versions.to_csv("issues_versions.csv")
issues_versions = pd.read_csv("issues_versions.csv", index_col=0)
issues_versions

Unnamed: 0,issue_id,version_name,version_id
0,97777,8.4.2,16404
1,97773,8.4.2,16404
2,97525,8.4.2,16404
3,97521,8.4,15833
4,97347,8.4.2,16404
...,...,...,...
3330,29492,1.0.2,11536
3331,29496,1.0.2,11536
3332,29480,1.1,11537
3333,29473,1.5,11545


In [31]:
import pprint
pp = pprint.PrettyPrinter(depth=6)
pp.pprint(list(issues[0].fields.__dict__.keys()))

['issuetype',
 'customfield_12130',
 'customfield_11041',
 'customfield_12132',
 'customfield_12131',
 'project',
 'customfield_12134',
 'customfield_12530',
 'fixVersions',
 'customfield_12133',
 'customfield_12136',
 'customfield_12532',
 'resolution',
 'customfield_10431',
 'customfield_12135',
 'customfield_12531',
 'customfield_12138',
 'customfield_10432',
 'customfield_12534',
 'customfield_12137',
 'customfield_10433',
 'customfield_12533',
 'customfield_11832',
 'customfield_11831',
 'customfield_11833',
 'resolutiondate',
 'workratio',
 'lastViewed',
 'watches',
 'created',
 'customfield_11032',
 'customfield_11033',
 'priority',
 'customfield_11630',
 'customfield_11233',
 'customfield_11036',
 'customfield_11830',
 'labels',
 'customfield_11631',
 'customfield_11038',
 'customfield_10930',
 'customfield_11347',
 'customfield_10931',
 'customfield_10932',
 'versions',
 'issuelinks',
 'assignee',
 'updated',
 'status',
 'components',
 'customfield_12031',
 'customfield_11140'