In [1]:
import pymongo
# Default connection to localhost
myclient = pymongo.MongoClient("mongodb://localhost:27017/")


mydb = myclient["JiraRepos"]
collist = mydb.list_collection_names()
collist

['Spring',
 'RedHat',
 'Sakai',
 'JiraEcosystem',
 'Jira',
 'Hyperledger',
 'Apache',
 'SecondLife',
 'MariaDB',
 'MongoDB',
 'Mojang',
 'Qt',
 'JFrog',
 'IntelDAOS',
 'Mindville',
 'Sonatype']

In [2]:
import pandas as pd
import os
# Create dir if not exists
os.makedirs('only_useful_cols', exist_ok=True)
all_dfs = []
for collection in collist:
    try:
        project = pd.read_csv(f'only_useful_cols/{collection}.csv')

        all_dfs.append(project)
        # Make a new column where the rows are the collection name
        project['collection'] = collection
        print(f'Processed {collection}')
   

    except Exception as e:
        print(f'Error  {e} in {collection}')
        continue


if all_dfs:
    df = pd.concat(all_dfs, ignore_index=True)
else:
    print('No CSV files were processed.')
df

Processed Spring
Processed RedHat
Processed Sakai
Processed JiraEcosystem
Processed Jira
Processed Hyperledger
Processed Apache
Processed SecondLife
Error  [Errno 2] No such file or directory: 'only_useful_cols/MariaDB.csv' in MariaDB
Processed MongoDB


  project = pd.read_csv(f'only_useful_cols/{collection}.csv')


Processed Mojang
Processed Qt
Processed JFrog
Processed IntelDAOS
Processed Mindville
Processed Sonatype


Unnamed: 0,fields.priority.name,fields.description,fields.project.name,fields.labels,fields.issuetype.name,collection
0,Blocker,We tried upgrading from Spring Boot 2.0.6 to S...,Spring XD,[],Bug,Spring
1,Major,The jobs that appear under Executions section ...,Spring XD,[],Bug,Spring
2,Trivial,Working with Spring-XD version 1.3.2.RELEASE\n...,Spring XD,[],Bug,Spring
3,Major,My project 7 node cluster and in that 2 node a...,Spring XD,"['Spring', 'xd']",Bug,Spring
4,Minor,See https://github.com/spring-projects/spring-...,Spring XD,[],Story,Spring
...,...,...,...,...,...,...
2655048,Major,it is very beautiful.,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2655049,Major,library,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2655050,Major,What is reactive-gremlin\r\n\r\nreactive-greml...,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2655051,Major,"Android view for a swipeable, weekly calendar.",Community Support - Open Source Project Reposi...,[],New Project,Sonatype


In [3]:
# To csv
df.to_csv('all_projects.csv', index=False)

In [9]:
# rename columns using rename function
df.rename(columns={'fields.priority.name': 'priority', 'fields.description': 'description', 'fields.project.name': 'project', 'fields.issuetype.name': 'issuetype', 'fields.labels': 'labels'}, inplace=True)
df

Unnamed: 0,priority,description,project,labels,issuetype,collection
0,Blocker,We tried upgrading from Spring Boot 2.0.6 to S...,Spring XD,[],Bug,Spring
1,Major,The jobs that appear under Executions section ...,Spring XD,[],Bug,Spring
2,Trivial,Working with Spring-XD version 1.3.2.RELEASE\n...,Spring XD,[],Bug,Spring
3,Major,My project 7 node cluster and in that 2 node a...,Spring XD,"['Spring', 'xd']",Bug,Spring
4,Minor,See https://github.com/spring-projects/spring-...,Spring XD,[],Story,Spring
...,...,...,...,...,...,...
2018905,Major,it is very beautiful.,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018906,Major,library,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018907,Major,What is reactive-gremlin\r\n\r\nreactive-greml...,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018908,Major,"Android view for a swipeable, weekly calendar.",Community Support - Open Source Project Reposi...,[],New Project,Sonatype


In [5]:
# drop priority rows with NaN
df = df.dropna(subset=['priority'])
# rest index
df = df.reset_index(drop=True)
df

Unnamed: 0,priority,description,project,fields.labels,issuetype,collection
0,Blocker,We tried upgrading from Spring Boot 2.0.6 to S...,Spring XD,[],Bug,Spring
1,Major,The jobs that appear under Executions section ...,Spring XD,[],Bug,Spring
2,Trivial,Working with Spring-XD version 1.3.2.RELEASE\n...,Spring XD,[],Bug,Spring
3,Major,My project 7 node cluster and in that 2 node a...,Spring XD,"['Spring', 'xd']",Bug,Spring
4,Minor,See https://github.com/spring-projects/spring-...,Spring XD,[],Story,Spring
...,...,...,...,...,...,...
2071438,Major,it is very beautiful.,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2071439,Major,library,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2071440,Major,What is reactive-gremlin\r\n\r\nreactive-greml...,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2071441,Major,"Android view for a swipeable, weekly calendar.",Community Support - Open Source Project Reposi...,[],New Project,Sonatype


In [6]:
# count priority
df['priority'].value_counts().to_frame()[:50]


Unnamed: 0_level_0,count
priority,Unnamed: 1_level_1
Major,1080149
Minor,287654
Major - P3,109573
Low,86032
Critical,77997
Medium,72888
Blocker,57974
P2: Important,46926
Not Evaluated,42453
Trivial,34313


In [7]:
# Remove issues with no priority level set.
df = df[df['priority'] != 'Unset']
df = df[df['priority'] != 'TBD']
df = df[df['priority'] != 'Undefined']
df = df[df['priority'] != 'Unprioritized']
df = df[df['priority'] != 'Not Evaluated']
df = df[df['priority'] != 'Unknown']


# rest index
df = df.reset_index(drop=True)
df

Unnamed: 0,priority,description,project,fields.labels,issuetype,collection
0,Blocker,We tried upgrading from Spring Boot 2.0.6 to S...,Spring XD,[],Bug,Spring
1,Major,The jobs that appear under Executions section ...,Spring XD,[],Bug,Spring
2,Trivial,Working with Spring-XD version 1.3.2.RELEASE\n...,Spring XD,[],Bug,Spring
3,Major,My project 7 node cluster and in that 2 node a...,Spring XD,"['Spring', 'xd']",Bug,Spring
4,Minor,See https://github.com/spring-projects/spring-...,Spring XD,[],Story,Spring
...,...,...,...,...,...,...
2018905,Major,it is very beautiful.,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018906,Major,library,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018907,Major,What is reactive-gremlin\r\n\r\nreactive-greml...,Community Support - Open Source Project Reposi...,[],New Project,Sonatype
2018908,Major,"Android view for a swipeable, weekly calendar.",Community Support - Open Source Project Reposi...,[],New Project,Sonatype


In [8]:

df['priority'].value_counts().to_frame()[:50]

Unnamed: 0_level_0,count
priority,Unnamed: 1_level_1
Major,1080149
Minor,287654
Major - P3,109573
Low,86032
Critical,77997
Medium,72888
Blocker,57974
P2: Important,46926
Trivial,34313
P3: Somewhat important,28075


## Definition of each priority level according to the atlassian documentation.
* Lowest - Trivial problem with little or no impact on progress. Color: Light grey.
* Low - Minor problem or easily worked around. Color: Dark grey.
* Medium - Has the potential to affect progress. Color: Yellow.
* High - Serious problem that could block progress. Color: Orange.
* Highest - The problem will block progress. Color: A dark red.

#### Compared to github
High == High and highest 
medium == medium
low = low and lowest

# Ranking of priority levels.
Most commonly used priority levels in this dataset.
Using 4-6 levels of priority.
5 levels is most common.
From low to high
## 5 priority levels
### Sonatype, MongoDB, Apache, RedHat, Spring, Sakai(not any trivial), JiraEcosytem, Sonatype
* 4: Trivial
* 3: Minor
* 2: Major
* 1: Critical
* 0: Blocker
### Apache
* P4
* P3
* P2
* P1
* P0

### Hyperledger, Mindville
* Lowest
* Low 
* Medium
* High
* Highest

### RadHat
* Low
* Normal
* Medium
* High
* Urgent

### IntelDOAS:
* Trivial
* Low
* Medium
* High
* Urgent

### SecondLife
* Trivial
* Minor
* Major
* Severe
* Showstopper

### Mojang
* Low
* Normal
* Important
* Critical
* Blocker

## 6 levels
### QT
* P5: Not important
* P4: Low
* P3: Somewhat important
* P2: Important
* P1: Critical
* P0: Blocker

### JFrog
* Trivial
* Minor
* Normal
* High
* Critical
* Blocker


## 4 levels
### Apache
* Low
* Normal
* High
* Urgent

### Jira (the org)
* Low
* Medium
* High
* Highest
### Mindville (few issues) can ignore.
* Level 4
* Level 3
* Level 2
* Level 1